summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/block_builder.cc16
-rw-r--r--compiler/optimizing/block_builder.h5
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc13
-rw-r--r--compiler/optimizing/code_generation_data.cc57
-rw-r--r--compiler/optimizing/code_generation_data.h123
-rw-r--r--compiler/optimizing/code_generator.cc199
-rw-r--r--compiler/optimizing/code_generator.h77
-rw-r--r--compiler/optimizing/code_generator_arm64.cc487
-rw-r--r--compiler/optimizing/code_generator_arm64.h173
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc433
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h43
-rw-r--r--compiler/optimizing/code_generator_riscv64.cc6883
-rw-r--r--compiler/optimizing/code_generator_riscv64.h820
-rw-r--r--compiler/optimizing/code_generator_vector_arm64_neon.cc30
-rw-r--r--compiler/optimizing/code_generator_vector_arm64_sve.cc133
-rw-r--r--compiler/optimizing/code_generator_vector_arm_vixl.cc24
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc24
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc24
-rw-r--r--compiler/optimizing/code_generator_x86.cc447
-rw-r--r--compiler/optimizing/code_generator_x86.h17
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc518
-rw-r--r--compiler/optimizing/code_generator_x86_64.h30
-rw-r--r--compiler/optimizing/code_sinking.cc65
-rw-r--r--compiler/optimizing/codegen_test.cc18
-rw-r--r--compiler/optimizing/codegen_test_utils.h40
-rw-r--r--compiler/optimizing/common_arm64.h6
-rw-r--r--compiler/optimizing/constant_folding.cc344
-rw-r--r--compiler/optimizing/constant_folding_test.cc2
-rw-r--r--compiler/optimizing/constructor_fence_redundancy_elimination.cc4
-rw-r--r--compiler/optimizing/critical_native_abi_fixup_arm.cc45
-rw-r--r--compiler/optimizing/critical_native_abi_fixup_riscv64.cc71
-rw-r--r--compiler/optimizing/critical_native_abi_fixup_riscv64.h41
-rw-r--r--compiler/optimizing/dead_code_elimination.cc124
-rw-r--r--compiler/optimizing/dead_code_elimination.h11
-rw-r--r--compiler/optimizing/dead_code_elimination_test.cc7
-rw-r--r--compiler/optimizing/execution_subgraph.cc359
-rw-r--r--compiler/optimizing/execution_subgraph.h365
-rw-r--r--compiler/optimizing/execution_subgraph_test.cc975
-rw-r--r--compiler/optimizing/execution_subgraph_test.h40
-rw-r--r--compiler/optimizing/graph_checker.cc241
-rw-r--r--compiler/optimizing/graph_checker.h37
-rw-r--r--compiler/optimizing/graph_visualizer.cc25
-rw-r--r--compiler/optimizing/induction_var_range.cc314
-rw-r--r--compiler/optimizing/induction_var_range.h36
-rw-r--r--compiler/optimizing/induction_var_range_test.cc6
-rw-r--r--compiler/optimizing/inliner.cc104
-rw-r--r--compiler/optimizing/instruction_builder.cc57
-rw-r--r--compiler/optimizing/instruction_builder.h4
-rw-r--r--compiler/optimizing/instruction_simplifier.cc353
-rw-r--r--compiler/optimizing/instruction_simplifier_arm.cc14
-rw-r--r--compiler/optimizing/instruction_simplifier_arm.h11
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.cc18
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.h11
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.cc7
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.h7
-rw-r--r--compiler/optimizing/instruction_simplifier_test.cc254
-rw-r--r--compiler/optimizing/intrinsic_objects.cc87
-rw-r--r--compiler/optimizing/intrinsic_objects.h61
-rw-r--r--compiler/optimizing/intrinsics.cc320
-rw-r--r--compiler/optimizing/intrinsics.h56
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc556
-rw-r--r--compiler/optimizing/intrinsics_arm64.h13
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc505
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.h13
-rw-r--r--compiler/optimizing/intrinsics_riscv64.cc4584
-rw-r--r--compiler/optimizing/intrinsics_riscv64.h87
-rw-r--r--compiler/optimizing/intrinsics_utils.h22
-rw-r--r--compiler/optimizing/intrinsics_x86.cc596
-rw-r--r--compiler/optimizing/intrinsics_x86.h13
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc638
-rw-r--r--compiler/optimizing/intrinsics_x86_64.h13
-rw-r--r--compiler/optimizing/jit_patches_arm64.cc104
-rw-r--r--compiler/optimizing/jit_patches_arm64.h107
-rw-r--r--compiler/optimizing/linearize_test.cc7
-rw-r--r--compiler/optimizing/live_ranges_test.cc6
-rw-r--r--compiler/optimizing/liveness_test.cc12
-rw-r--r--compiler/optimizing/load_store_analysis.cc106
-rw-r--r--compiler/optimizing/load_store_analysis.h104
-rw-r--r--compiler/optimizing/load_store_analysis_test.cc797
-rw-r--r--compiler/optimizing/load_store_elimination.cc1215
-rw-r--r--compiler/optimizing/load_store_elimination.h11
-rw-r--r--compiler/optimizing/load_store_elimination_test.cc6350
-rw-r--r--compiler/optimizing/locations.cc4
-rw-r--r--compiler/optimizing/locations.h16
-rw-r--r--compiler/optimizing/loop_analysis.cc7
-rw-r--r--compiler/optimizing/loop_analysis.h16
-rw-r--r--compiler/optimizing/loop_optimization.cc857
-rw-r--r--compiler/optimizing/loop_optimization.h210
-rw-r--r--compiler/optimizing/loop_optimization_test.cc10
-rw-r--r--compiler/optimizing/nodes.cc141
-rw-r--r--compiler/optimizing/nodes.h514
-rw-r--r--compiler/optimizing/nodes_shared.h10
-rw-r--r--compiler/optimizing/nodes_vector.h87
-rw-r--r--compiler/optimizing/nodes_x86.h12
-rw-r--r--compiler/optimizing/optimization.cc24
-rw-r--r--compiler/optimizing/optimization.h3
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc46
-rw-r--r--compiler/optimizing/optimizing_compiler.cc303
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h3
-rw-r--r--compiler/optimizing/optimizing_unit_test.h1
-rw-r--r--compiler/optimizing/parallel_move_test.cc6
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.cc13
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc5
-rw-r--r--compiler/optimizing/profiling_info_builder.cc90
-rw-r--r--compiler/optimizing/profiling_info_builder.h64
-rw-r--r--compiler/optimizing/reference_type_propagation.cc27
-rw-r--r--compiler/optimizing/reference_type_propagation_test.cc4
-rw-r--r--compiler/optimizing/register_allocation_resolver.cc29
-rw-r--r--compiler/optimizing/register_allocator.cc5
-rw-r--r--compiler/optimizing/register_allocator_graph_color.cc2086
-rw-r--r--compiler/optimizing/register_allocator_graph_color.h195
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.cc13
-rw-r--r--compiler/optimizing/register_allocator_test.cc3
-rw-r--r--compiler/optimizing/scheduler.cc14
-rw-r--r--compiler/optimizing/scheduler_arm.cc29
-rw-r--r--compiler/optimizing/scheduler_arm.h3
-rw-r--r--compiler/optimizing/scheduler_arm64.cc47
-rw-r--r--compiler/optimizing/scheduler_arm64.h2
-rw-r--r--compiler/optimizing/scheduler_test.cc3
-rw-r--r--compiler/optimizing/select_generator.cc3
-rw-r--r--compiler/optimizing/ssa_builder.cc16
-rw-r--r--compiler/optimizing/ssa_liveness_analysis_test.cc9
-rw-r--r--compiler/optimizing/ssa_phi_elimination.cc2
-rw-r--r--compiler/optimizing/stack_map_stream.cc18
-rw-r--r--compiler/optimizing/stack_map_stream.h8
-rw-r--r--compiler/optimizing/write_barrier_elimination.cc9
-rw-r--r--compiler/optimizing/x86_memory_gen.cc2
127 files changed, 19551 insertions, 16393 deletions
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc
index 703584c537..9da2bfb8ef 100644
--- a/compiler/optimizing/block_builder.cc
+++ b/compiler/optimizing/block_builder.cc
@@ -20,7 +20,6 @@
#include "dex/bytecode_utils.h"
#include "dex/code_item_accessors-inl.h"
#include "dex/dex_file_exception_helpers.h"
-#include "quicken_info.h"
namespace art HIDDEN {
@@ -40,9 +39,7 @@ HBasicBlockBuilder::HBasicBlockBuilder(HGraph* graph,
local_allocator->Adapter(kArenaAllocGraphBuilder)),
throwing_blocks_(kDefaultNumberOfThrowingBlocks,
local_allocator->Adapter(kArenaAllocGraphBuilder)),
- number_of_branches_(0u),
- quicken_index_for_dex_pc_(std::less<uint32_t>(),
- local_allocator->Adapter(kArenaAllocGraphBuilder)) {}
+ number_of_branches_(0u) {}
HBasicBlock* HBasicBlockBuilder::MaybeCreateBlockAt(uint32_t dex_pc) {
return MaybeCreateBlockAt(dex_pc, dex_pc);
@@ -147,7 +144,6 @@ void HBasicBlockBuilder::ConnectBasicBlocks() {
HBasicBlock* block = graph_->GetEntryBlock();
graph_->AddBlock(block);
- size_t quicken_index = 0;
bool is_throwing_block = false;
// Calculate the qucikening index here instead of CreateBranchTargets since it's easier to
// calculate in dex_pc order.
@@ -158,8 +154,6 @@ void HBasicBlockBuilder::ConnectBasicBlocks() {
// Check if this dex_pc address starts a new basic block.
HBasicBlock* next_block = GetBlockAt(dex_pc);
if (next_block != nullptr) {
- // We only need quicken index entries for basic block boundaries.
- quicken_index_for_dex_pc_.Put(dex_pc, quicken_index);
if (block != nullptr) {
// Last instruction did not end its basic block but a new one starts here.
// It must have been a block falling through into the next one.
@@ -169,10 +163,6 @@ void HBasicBlockBuilder::ConnectBasicBlocks() {
is_throwing_block = false;
graph_->AddBlock(block);
}
- // Make sure to increment this before the continues.
- if (QuickenInfoTable::NeedsIndexForInstruction(&instruction)) {
- ++quicken_index;
- }
if (block == nullptr) {
// Ignore dead code.
@@ -483,8 +473,4 @@ void HBasicBlockBuilder::BuildIntrinsic() {
body->AddSuccessor(exit_block);
}
-size_t HBasicBlockBuilder::GetQuickenIndex(uint32_t dex_pc) const {
- return quicken_index_for_dex_pc_.Get(dex_pc);
-}
-
} // namespace art
diff --git a/compiler/optimizing/block_builder.h b/compiler/optimizing/block_builder.h
index 8668ef8221..1aa9375e5a 100644
--- a/compiler/optimizing/block_builder.h
+++ b/compiler/optimizing/block_builder.h
@@ -45,8 +45,6 @@ class HBasicBlockBuilder : public ValueObject {
size_t GetNumberOfBranches() const { return number_of_branches_; }
HBasicBlock* GetBlockAt(uint32_t dex_pc) const { return branch_targets_[dex_pc]; }
- size_t GetQuickenIndex(uint32_t dex_pc) const;
-
private:
// Creates a basic block starting at given `dex_pc`.
HBasicBlock* MaybeCreateBlockAt(uint32_t dex_pc);
@@ -83,9 +81,6 @@ class HBasicBlockBuilder : public ValueObject {
ScopedArenaVector<HBasicBlock*> throwing_blocks_;
size_t number_of_branches_;
- // A table to quickly find the quicken index for the first instruction of a basic block.
- ScopedArenaSafeMap<uint32_t, uint32_t> quicken_index_for_dex_pc_;
-
static constexpr size_t kDefaultNumberOfThrowingBlocks = 2u;
DISALLOW_COPY_AND_ASSIGN(HBasicBlockBuilder);
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 919abfdc49..c0d4c37659 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1047,14 +1047,14 @@ class BCEVisitor final : public HGraphVisitor {
HDiv* div = nullptr;
int64_t const_divisor = 0;
- if (HMul* mul = instruction->GetRight()->AsMul()) {
+ if (HMul* mul = instruction->GetRight()->AsMulOrNull()) {
if (!mul->GetLeft()->IsDiv() || !mul->GetRight()->IsConstant()) {
return false;
}
div = mul->GetLeft()->AsDiv();
const_divisor = Int64FromConstant(mul->GetRight()->AsConstant());
- } else if (HAdd* add = instruction->GetRight()->AsAdd()) {
- HShl* shl = add->GetRight()->AsShl();
+ } else if (HAdd* add = instruction->GetRight()->AsAddOrNull()) {
+ HShl* shl = add->GetRight()->AsShlOrNull();
if (!is_needed_shl(shl)) {
return false;
}
@@ -1070,8 +1070,8 @@ class BCEVisitor final : public HGraphVisitor {
return false;
}
const_divisor = (1LL << n) + 1;
- } else if (HSub* sub = instruction->GetRight()->AsSub()) {
- HShl* shl = sub->GetLeft()->AsShl();
+ } else if (HSub* sub = instruction->GetRight()->AsSubOrNull()) {
+ HShl* shl = sub->GetLeft()->AsShlOrNull();
if (!is_needed_shl(shl)) {
return false;
}
@@ -1378,8 +1378,7 @@ class BCEVisitor final : public HGraphVisitor {
HInstruction* array_length,
HInstruction* base,
int32_t min_c, int32_t max_c) {
- HBoundsCheck* bounds_check =
- first_index_bounds_check_map_.Get(array_length->GetId())->AsBoundsCheck();
+ HBoundsCheck* bounds_check = first_index_bounds_check_map_.Get(array_length->GetId());
// Construct deoptimization on single or double bounds on range [base-min_c,base+max_c],
// for example either for a[0]..a[3] just 3 or for a[base-1]..a[base+3] both base-1
// and base+3, since we made the assumption any in between value may occur too.
diff --git a/compiler/optimizing/code_generation_data.cc b/compiler/optimizing/code_generation_data.cc
new file mode 100644
index 0000000000..7b23d46dc5
--- /dev/null
+++ b/compiler/optimizing/code_generation_data.cc
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "class_linker.h"
+#include "code_generation_data.h"
+#include "code_generator.h"
+#include "intern_table.h"
+#include "mirror/object-inl.h"
+#include "runtime.h"
+
+namespace art HIDDEN {
+
+void CodeGenerationData::EmitJitRoots(
+ /*out*/std::vector<Handle<mirror::Object>>* roots) {
+ DCHECK(roots->empty());
+ roots->reserve(GetNumberOfJitRoots());
+ ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+ size_t index = 0;
+ for (auto& entry : jit_string_roots_) {
+ // Update the `roots` with the string, and replace the address temporarily
+ // stored to the index in the table.
+ uint64_t address = entry.second;
+ roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address));
+ DCHECK(roots->back() != nullptr);
+ DCHECK(roots->back()->IsString());
+ entry.second = index;
+ // Ensure the string is strongly interned. This is a requirement on how the JIT
+ // handles strings. b/32995596
+ class_linker->GetInternTable()->InternStrong(roots->back()->AsString());
+ ++index;
+ }
+ for (auto& entry : jit_class_roots_) {
+ // Update the `roots` with the class, and replace the address temporarily
+ // stored to the index in the table.
+ uint64_t address = entry.second;
+ roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address));
+ DCHECK(roots->back() != nullptr);
+ DCHECK(roots->back()->IsClass());
+ entry.second = index;
+ ++index;
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/code_generation_data.h b/compiler/optimizing/code_generation_data.h
new file mode 100644
index 0000000000..e78ba8f574
--- /dev/null
+++ b/compiler/optimizing/code_generation_data.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATION_DATA_H_
+#define ART_COMPILER_OPTIMIZING_CODE_GENERATION_DATA_H_
+
+#include <memory>
+
+#include "arch/instruction_set.h"
+#include "base/scoped_arena_allocator.h"
+#include "base/scoped_arena_containers.h"
+#include "code_generator.h"
+#include "dex/string_reference.h"
+#include "dex/type_reference.h"
+#include "handle.h"
+#include "mirror/class.h"
+#include "mirror/object.h"
+#include "mirror/string.h"
+#include "stack_map_stream.h"
+
+namespace art HIDDEN {
+
+class CodeGenerationData : public DeletableArenaObject<kArenaAllocCodeGenerator> {
+ public:
+ static std::unique_ptr<CodeGenerationData> Create(ArenaStack* arena_stack,
+ InstructionSet instruction_set) {
+ ScopedArenaAllocator allocator(arena_stack);
+ void* memory = allocator.Alloc<CodeGenerationData>(kArenaAllocCodeGenerator);
+ return std::unique_ptr<CodeGenerationData>(
+ ::new (memory) CodeGenerationData(std::move(allocator), instruction_set));
+ }
+
+ ScopedArenaAllocator* GetScopedAllocator() {
+ return &allocator_;
+ }
+
+ void AddSlowPath(SlowPathCode* slow_path) {
+ slow_paths_.emplace_back(std::unique_ptr<SlowPathCode>(slow_path));
+ }
+
+ ArrayRef<const std::unique_ptr<SlowPathCode>> GetSlowPaths() const {
+ return ArrayRef<const std::unique_ptr<SlowPathCode>>(slow_paths_);
+ }
+
+ StackMapStream* GetStackMapStream() { return &stack_map_stream_; }
+
+ void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string) {
+ jit_string_roots_.Overwrite(string_reference,
+ reinterpret_cast64<uint64_t>(string.GetReference()));
+ }
+
+ uint64_t GetJitStringRootIndex(StringReference string_reference) const {
+ return jit_string_roots_.Get(string_reference);
+ }
+
+ size_t GetNumberOfJitStringRoots() const {
+ return jit_string_roots_.size();
+ }
+
+ void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass) {
+ jit_class_roots_.Overwrite(type_reference, reinterpret_cast64<uint64_t>(klass.GetReference()));
+ }
+
+ uint64_t GetJitClassRootIndex(TypeReference type_reference) const {
+ return jit_class_roots_.Get(type_reference);
+ }
+
+ size_t GetNumberOfJitClassRoots() const {
+ return jit_class_roots_.size();
+ }
+
+ size_t GetNumberOfJitRoots() const {
+ return GetNumberOfJitStringRoots() + GetNumberOfJitClassRoots();
+ }
+
+ void EmitJitRoots(/*out*/std::vector<Handle<mirror::Object>>* roots)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
+ private:
+ CodeGenerationData(ScopedArenaAllocator&& allocator, InstructionSet instruction_set)
+ : allocator_(std::move(allocator)),
+ stack_map_stream_(&allocator_, instruction_set),
+ slow_paths_(allocator_.Adapter(kArenaAllocCodeGenerator)),
+ jit_string_roots_(StringReferenceValueComparator(),
+ allocator_.Adapter(kArenaAllocCodeGenerator)),
+ jit_class_roots_(TypeReferenceValueComparator(),
+ allocator_.Adapter(kArenaAllocCodeGenerator)) {
+ slow_paths_.reserve(kDefaultSlowPathsCapacity);
+ }
+
+ static constexpr size_t kDefaultSlowPathsCapacity = 8;
+
+ ScopedArenaAllocator allocator_;
+ StackMapStream stack_map_stream_;
+ ScopedArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_;
+
+ // Maps a StringReference (dex_file, string_index) to the index in the literal table.
+ // Entries are initially added with a pointer in the handle zone, and `EmitJitRoots`
+ // will compute all the indices.
+ ScopedArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_;
+
+ // Maps a ClassReference (dex_file, type_index) to the index in the literal table.
+ // Entries are initially added with a pointer in the handle zone, and `EmitJitRoots`
+ // will compute all the indices.
+ ScopedArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_;
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATION_DATA_H_
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index c9f42b52f5..b0e07e32ea 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -44,6 +44,7 @@
#include "base/leb128.h"
#include "class_linker.h"
#include "class_root-inl.h"
+#include "code_generation_data.h"
#include "dex/bytecode_utils.h"
#include "dex/code_item_accessors-inl.h"
#include "graph_visualizer.h"
@@ -141,120 +142,20 @@ static bool CheckTypeConsistency(HInstruction* instruction) {
return true;
}
-class CodeGenerator::CodeGenerationData : public DeletableArenaObject<kArenaAllocCodeGenerator> {
- public:
- static std::unique_ptr<CodeGenerationData> Create(ArenaStack* arena_stack,
- InstructionSet instruction_set) {
- ScopedArenaAllocator allocator(arena_stack);
- void* memory = allocator.Alloc<CodeGenerationData>(kArenaAllocCodeGenerator);
- return std::unique_ptr<CodeGenerationData>(
- ::new (memory) CodeGenerationData(std::move(allocator), instruction_set));
- }
-
- ScopedArenaAllocator* GetScopedAllocator() {
- return &allocator_;
- }
-
- void AddSlowPath(SlowPathCode* slow_path) {
- slow_paths_.emplace_back(std::unique_ptr<SlowPathCode>(slow_path));
- }
-
- ArrayRef<const std::unique_ptr<SlowPathCode>> GetSlowPaths() const {
- return ArrayRef<const std::unique_ptr<SlowPathCode>>(slow_paths_);
- }
-
- StackMapStream* GetStackMapStream() { return &stack_map_stream_; }
-
- void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string) {
- jit_string_roots_.Overwrite(string_reference,
- reinterpret_cast64<uint64_t>(string.GetReference()));
- }
-
- uint64_t GetJitStringRootIndex(StringReference string_reference) const {
- return jit_string_roots_.Get(string_reference);
- }
-
- size_t GetNumberOfJitStringRoots() const {
- return jit_string_roots_.size();
- }
-
- void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass) {
- jit_class_roots_.Overwrite(type_reference, reinterpret_cast64<uint64_t>(klass.GetReference()));
- }
-
- uint64_t GetJitClassRootIndex(TypeReference type_reference) const {
- return jit_class_roots_.Get(type_reference);
- }
-
- size_t GetNumberOfJitClassRoots() const {
- return jit_class_roots_.size();
- }
-
- size_t GetNumberOfJitRoots() const {
- return GetNumberOfJitStringRoots() + GetNumberOfJitClassRoots();
- }
+bool CodeGenerator::EmitReadBarrier() const {
+ return GetCompilerOptions().EmitReadBarrier();
+}
- void EmitJitRoots(/*out*/std::vector<Handle<mirror::Object>>* roots)
- REQUIRES_SHARED(Locks::mutator_lock_);
+bool CodeGenerator::EmitBakerReadBarrier() const {
+ return kUseBakerReadBarrier && GetCompilerOptions().EmitReadBarrier();
+}
- private:
- CodeGenerationData(ScopedArenaAllocator&& allocator, InstructionSet instruction_set)
- : allocator_(std::move(allocator)),
- stack_map_stream_(&allocator_, instruction_set),
- slow_paths_(allocator_.Adapter(kArenaAllocCodeGenerator)),
- jit_string_roots_(StringReferenceValueComparator(),
- allocator_.Adapter(kArenaAllocCodeGenerator)),
- jit_class_roots_(TypeReferenceValueComparator(),
- allocator_.Adapter(kArenaAllocCodeGenerator)) {
- slow_paths_.reserve(kDefaultSlowPathsCapacity);
- }
-
- static constexpr size_t kDefaultSlowPathsCapacity = 8;
-
- ScopedArenaAllocator allocator_;
- StackMapStream stack_map_stream_;
- ScopedArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_;
-
- // Maps a StringReference (dex_file, string_index) to the index in the literal table.
- // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
- // will compute all the indices.
- ScopedArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_;
-
- // Maps a ClassReference (dex_file, type_index) to the index in the literal table.
- // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
- // will compute all the indices.
- ScopedArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_;
-};
+bool CodeGenerator::EmitNonBakerReadBarrier() const {
+ return !kUseBakerReadBarrier && GetCompilerOptions().EmitReadBarrier();
+}
-void CodeGenerator::CodeGenerationData::EmitJitRoots(
- /*out*/std::vector<Handle<mirror::Object>>* roots) {
- DCHECK(roots->empty());
- roots->reserve(GetNumberOfJitRoots());
- ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
- size_t index = 0;
- for (auto& entry : jit_string_roots_) {
- // Update the `roots` with the string, and replace the address temporarily
- // stored to the index in the table.
- uint64_t address = entry.second;
- roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address));
- DCHECK(roots->back() != nullptr);
- DCHECK(roots->back()->IsString());
- entry.second = index;
- // Ensure the string is strongly interned. This is a requirement on how the JIT
- // handles strings. b/32995596
- class_linker->GetInternTable()->InternStrong(roots->back()->AsString());
- ++index;
- }
- for (auto& entry : jit_class_roots_) {
- // Update the `roots` with the class, and replace the address temporarily
- // stored to the index in the table.
- uint64_t address = entry.second;
- roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address));
- DCHECK(roots->back() != nullptr);
- DCHECK(roots->back()->IsClass());
- entry.second = index;
- ++index;
- }
+ReadBarrierOption CodeGenerator::GetCompilerReadBarrierOption() const {
+ return EmitReadBarrier() ? kWithReadBarrier : kWithoutReadBarrier;
}
ScopedArenaAllocator* CodeGenerator::GetScopedAllocator() {
@@ -288,8 +189,8 @@ uint64_t CodeGenerator::GetJitClassRootIndex(TypeReference type_reference) {
return code_generation_data_->GetJitClassRootIndex(type_reference);
}
-void CodeGenerator::EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED,
- const uint8_t* roots_data ATTRIBUTE_UNUSED) {
+void CodeGenerator::EmitJitRootPatches([[maybe_unused]] uint8_t* code,
+ [[maybe_unused]] const uint8_t* roots_data) {
DCHECK(code_generation_data_ != nullptr);
DCHECK_EQ(code_generation_data_->GetNumberOfJitStringRoots(), 0u);
DCHECK_EQ(code_generation_data_->GetNumberOfJitClassRoots(), 0u);
@@ -378,7 +279,7 @@ void CodeGenerator::InitializeCodeGenerationData() {
code_generation_data_ = CodeGenerationData::Create(graph_->GetArenaStack(), GetInstructionSet());
}
-void CodeGenerator::Compile(CodeAllocator* allocator) {
+void CodeGenerator::Compile() {
InitializeCodeGenerationData();
// The register allocator already called `InitializeCodeGeneration`,
@@ -394,7 +295,8 @@ void CodeGenerator::Compile(CodeAllocator* allocator) {
fpu_spill_mask_,
GetGraph()->GetNumberOfVRegs(),
GetGraph()->IsCompilingBaseline(),
- GetGraph()->IsDebuggable());
+ GetGraph()->IsDebuggable(),
+ GetGraph()->HasShouldDeoptimizeFlag());
size_t frame_start = GetAssembler()->CodeSize();
GenerateFrameEntry();
@@ -443,32 +345,28 @@ void CodeGenerator::Compile(CodeAllocator* allocator) {
}
// Finalize instructions in assember;
- Finalize(allocator);
+ Finalize();
GetStackMapStream()->EndMethod(GetAssembler()->CodeSize());
}
-void CodeGenerator::Finalize(CodeAllocator* allocator) {
- size_t code_size = GetAssembler()->CodeSize();
- uint8_t* buffer = allocator->Allocate(code_size);
-
- MemoryRegion code(buffer, code_size);
- GetAssembler()->FinalizeInstructions(code);
+void CodeGenerator::Finalize() {
+ GetAssembler()->FinalizeCode();
}
void CodeGenerator::EmitLinkerPatches(
- ArenaVector<linker::LinkerPatch>* linker_patches ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] ArenaVector<linker::LinkerPatch>* linker_patches) {
// No linker patches by default.
}
-bool CodeGenerator::NeedsThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED) const {
+bool CodeGenerator::NeedsThunkCode([[maybe_unused]] const linker::LinkerPatch& patch) const {
// Code generators that create patches requiring thunk compilation should override this function.
return false;
}
-void CodeGenerator::EmitThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED,
- /*out*/ ArenaVector<uint8_t>* code ATTRIBUTE_UNUSED,
- /*out*/ std::string* debug_name ATTRIBUTE_UNUSED) {
+void CodeGenerator::EmitThunkCode([[maybe_unused]] const linker::LinkerPatch& patch,
+ [[maybe_unused]] /*out*/ ArenaVector<uint8_t>* code,
+ [[maybe_unused]] /*out*/ std::string* debug_name) {
// Code generators that create patches requiring thunk compilation should override this function.
LOG(FATAL) << "Unexpected call to EmitThunkCode().";
}
@@ -730,7 +628,7 @@ void CodeGenerator::CreateUnresolvedFieldLocationSummary(
}
// Note that pSetXXStatic/pGetXXStatic always takes/returns an int or int64
- // regardless of the the type. Because of that we forced to special case
+ // regardless of the type. Because of that we forced to special case
// the access to floating point values.
if (is_get) {
if (DataType::IsFloatingPointType(field_type)) {
@@ -745,8 +643,8 @@ void CodeGenerator::CreateUnresolvedFieldLocationSummary(
locations->SetOut(calling_convention.GetReturnLocation(field_type));
}
} else {
- size_t set_index = is_instance ? 1 : 0;
- if (DataType::IsFloatingPointType(field_type)) {
+ size_t set_index = is_instance ? 1 : 0;
+ if (DataType::IsFloatingPointType(field_type)) {
// The set value comes from a float location while the calling convention
// expects it in a regular register location. Allocate a temp for it and
// make the transfer at codegen.
@@ -1028,6 +926,12 @@ std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph,
new (allocator) arm64::CodeGeneratorARM64(graph, compiler_options, stats));
}
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+ case InstructionSet::kRiscv64: {
+ return std::unique_ptr<CodeGenerator>(
+ new (allocator) riscv64::CodeGeneratorRISCV64(graph, compiler_options, stats));
+ }
+#endif
#ifdef ART_ENABLE_CODEGEN_x86
case InstructionSet::kX86: {
return std::unique_ptr<CodeGenerator>(
@@ -1736,10 +1640,8 @@ void CodeGenerator::ValidateInvokeRuntime(QuickEntrypointEnum entrypoint,
// When (non-Baker) read barriers are enabled, some instructions
// use a slow path to emit a read barrier, which does not trigger
// GC.
- (gUseReadBarrier &&
- !kUseBakerReadBarrier &&
+ (EmitNonBakerReadBarrier() &&
(instruction->IsInstanceFieldGet() ||
- instruction->IsPredicatedInstanceFieldGet() ||
instruction->IsStaticFieldGet() ||
instruction->IsArrayGet() ||
instruction->IsLoadClass() ||
@@ -1776,11 +1678,11 @@ void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* in
// PC-related information.
DCHECK(kUseBakerReadBarrier);
DCHECK(instruction->IsInstanceFieldGet() ||
- instruction->IsPredicatedInstanceFieldGet() ||
instruction->IsStaticFieldGet() ||
instruction->IsArrayGet() ||
instruction->IsArraySet() ||
instruction->IsLoadClass() ||
+ instruction->IsLoadMethodType() ||
instruction->IsLoadString() ||
instruction->IsInstanceOf() ||
instruction->IsCheckCast() ||
@@ -1831,26 +1733,28 @@ void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary*
}
}
-void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) {
+LocationSummary* CodeGenerator::CreateSystemArrayCopyLocationSummary(
+ HInvoke* invoke, int32_t length_threshold, size_t num_temps) {
// Check to see if we have known failures that will cause us to have to bail out
// to the runtime, and just generate the runtime call directly.
- HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
- HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+ HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+ HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
// The positions must be non-negative.
if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
(dest_pos != nullptr && dest_pos->GetValue() < 0)) {
// We will have to fail anyways.
- return;
+ return nullptr;
}
- // The length must be >= 0.
- HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+ // The length must be >= 0. If a positive `length_threshold` is provided, lengths
+ // greater or equal to the threshold are also handled by the normal implementation.
+ HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
if (length != nullptr) {
int32_t len = length->GetValue();
- if (len < 0) {
+ if (len < 0 || (length_threshold > 0 && len >= length_threshold)) {
// Just call as normal.
- return;
+ return nullptr;
}
}
@@ -1859,13 +1763,13 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) {
if (optimizations.GetDestinationIsSource()) {
if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
// We only support backward copying if source and destination are the same.
- return;
+ return nullptr;
}
}
if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
// We currently don't intrinsify primitive copying.
- return;
+ return nullptr;
}
ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
@@ -1879,9 +1783,10 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) {
locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
- locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
+ if (num_temps != 0u) {
+ locations->AddRegisterTemps(num_temps);
+ }
+ return locations;
}
void CodeGenerator::EmitJitRoots(uint8_t* code,
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 9872efaa4a..de6fc85da4 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -59,9 +59,6 @@ static int32_t constexpr kPrimIntMax = 0x7fffffff;
// Maximum value for a primitive long.
static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
-static const ReadBarrierOption gCompilerReadBarrierOption =
- gUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
-
constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
constexpr size_t status_byte_offset =
mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
@@ -73,6 +70,7 @@ constexpr uint32_t shifted_initialized_value =
enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
class Assembler;
+class CodeGenerationData;
class CodeGenerator;
class CompilerOptions;
class StackMapStream;
@@ -82,18 +80,6 @@ namespace linker {
class LinkerPatch;
} // namespace linker
-class CodeAllocator {
- public:
- CodeAllocator() {}
- virtual ~CodeAllocator() {}
-
- virtual uint8_t* Allocate(size_t size) = 0;
- virtual ArrayRef<const uint8_t> GetMemory() const = 0;
-
- private:
- DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
-};
-
class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
public:
explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
@@ -200,7 +186,7 @@ class FieldAccessCallingConvention {
class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
public:
// Compiles the graph to executable instructions.
- void Compile(CodeAllocator* allocator);
+ void Compile();
static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats = nullptr);
@@ -221,7 +207,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
}
virtual void Initialize() = 0;
- virtual void Finalize(CodeAllocator* allocator);
+ virtual void Finalize();
virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches);
virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const;
virtual void EmitThunkCode(const linker::LinkerPatch& patch,
@@ -278,20 +264,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
}
- static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
- uint32_t mask = 0;
- for (size_t i = 0, e = length; i < e; ++i) {
- mask |= (1 << registers[i]);
- }
- return mask;
- }
-
virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
virtual InstructionSet GetInstructionSet() const = 0;
- const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
-
// Saves the register in the stack. Returns the size taken on stack.
virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
// Restores the register from the stack. Returns the size taken on stack.
@@ -398,6 +374,12 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
// TODO: Replace with a catch-entering instruction that records the environment.
void RecordCatchBlockInfo();
+ const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
+ bool EmitReadBarrier() const;
+ bool EmitBakerReadBarrier() const;
+ bool EmitNonBakerReadBarrier() const;
+ ReadBarrierOption GetCompilerReadBarrierOption() const;
+
// Get the ScopedArenaAllocator used for codegen memory allocation.
ScopedArenaAllocator* GetScopedAllocator();
@@ -469,7 +451,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
Location to2,
DataType::Type type2);
- static bool InstanceOfNeedsReadBarrier(HInstanceOf* instance_of) {
+ bool InstanceOfNeedsReadBarrier(HInstanceOf* instance_of) {
// Used only for kExactCheck, kAbstractClassCheck, kClassHierarchyCheck and kArrayObjectCheck.
DCHECK(instance_of->GetTypeCheckKind() == TypeCheckKind::kExactCheck ||
instance_of->GetTypeCheckKind() == TypeCheckKind::kAbstractClassCheck ||
@@ -479,14 +461,14 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
// If the target class is in the boot image, it's non-moveable and it doesn't matter
// if we compare it with a from-space or to-space reference, the result is the same.
// It's OK to traverse a class hierarchy jumping between from-space and to-space.
- return gUseReadBarrier && !instance_of->GetTargetClass()->IsInBootImage();
+ return EmitReadBarrier() && !instance_of->GetTargetClass()->IsInBootImage();
}
- static ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) {
+ ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) {
return InstanceOfNeedsReadBarrier(instance_of) ? kWithReadBarrier : kWithoutReadBarrier;
}
- static bool IsTypeCheckSlowPathFatal(HCheckCast* check_cast) {
+ bool IsTypeCheckSlowPathFatal(HCheckCast* check_cast) {
switch (check_cast->GetTypeCheckKind()) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
@@ -494,7 +476,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
case TypeCheckKind::kArrayObjectCheck:
case TypeCheckKind::kInterfaceCheck: {
bool needs_read_barrier =
- gUseReadBarrier && !check_cast->GetTargetClass()->IsInBootImage();
+ EmitReadBarrier() && !check_cast->GetTargetClass()->IsInBootImage();
// We do not emit read barriers for HCheckCast, so we can get false negatives
// and the slow path shall re-check and simply return if the cast is actually OK.
return !needs_read_barrier;
@@ -509,7 +491,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
UNREACHABLE();
}
- static LocationSummary::CallKind GetCheckCastCallKind(HCheckCast* check_cast) {
+ LocationSummary::CallKind GetCheckCastCallKind(HCheckCast* check_cast) {
return (IsTypeCheckSlowPathFatal(check_cast) && !check_cast->CanThrowIntoCatchBlock())
? LocationSummary::kNoCall // In fact, call on a fatal (non-returning) slow path.
: LocationSummary::kCallOnSlowPath;
@@ -613,7 +595,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
if (kIsDebugBuild) {
uint32_t shorty_len;
const char* shorty = GetCriticalNativeShorty(invoke, &shorty_len);
- DCHECK_EQ(GetCriticalNativeDirectCallFrameSize(shorty, shorty_len), out_frame_size);
+ CHECK_EQ(GetCriticalNativeDirectCallFrameSize(shorty, shorty_len), out_frame_size);
}
if (out_frame_size != 0u) {
FinishCriticalNativeFrameSetup(out_frame_size, &parallel_move);
@@ -667,7 +649,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
static uint32_t GetBootImageOffset(ClassRoot class_root);
static uint32_t GetBootImageOffsetOfIntrinsicDeclaringClass(HInvoke* invoke);
- static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
+ static LocationSummary* CreateSystemArrayCopyLocationSummary(
+ HInvoke* invoke, int32_t length_threshold = -1, size_t num_temps = 3);
void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
@@ -687,7 +670,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
virtual HLoadClass::LoadKind GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) = 0;
- static LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) {
+ LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) {
switch (load->GetLoadKind()) {
case HLoadString::LoadKind::kBssEntry:
DCHECK(load->NeedsEnvironment());
@@ -697,7 +680,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
return LocationSummary::kCallOnMainOnly;
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(!load->NeedsEnvironment());
- return gUseReadBarrier
+ return EmitReadBarrier()
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
break;
@@ -731,6 +714,11 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array);
static ScaleFactor ScaleFactorForType(DataType::Type type);
+ ArrayRef<const uint8_t> GetCode() const {
+ return ArrayRef<const uint8_t>(GetAssembler().CodeBufferBaseAddress(),
+ GetAssembler().CodeSize());
+ }
+
protected:
// Patch info used for recording locations of required linker patches and their targets,
// i.e. target method, string, type or code identified by their dex file and index,
@@ -761,6 +749,15 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
virtual HGraphVisitor* GetLocationBuilder() = 0;
virtual HGraphVisitor* GetInstructionVisitor() = 0;
+ template <typename RegType>
+ static uint32_t ComputeRegisterMask(const RegType* registers, size_t length) {
+ uint32_t mask = 0;
+ for (size_t i = 0, e = length; i < e; ++i) {
+ mask |= (1 << registers[i]);
+ }
+ return mask;
+ }
+
// Returns the location of the first spilled entry for floating point registers,
// relative to the stack pointer.
uint32_t GetFpuSpillStart() const {
@@ -814,6 +811,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
StackMapStream* GetStackMapStream();
+ CodeGenerationData* GetCodeGenerationData() {
+ return code_generation_data_.get();
+ }
+
void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string);
uint64_t GetJitStringRootIndex(StringReference string_reference);
void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass);
@@ -848,8 +849,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
DisassemblyInformation* disasm_info_;
private:
- class CodeGenerationData;
-
void InitializeCodeGenerationData();
size_t GetStackOffsetOfSavedRegister(size_t index);
void GenerateSlowPaths();
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 41db9a2542..9027976165 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -35,6 +35,7 @@
#include "interpreter/mterp/nterp.h"
#include "intrinsics.h"
#include "intrinsics_arm64.h"
+#include "intrinsics_list.h"
#include "intrinsics_utils.h"
#include "linker/linker_patch.h"
#include "lock_word.h"
@@ -44,7 +45,9 @@
#include "offsets.h"
#include "optimizing/common_arm64.h"
#include "optimizing/nodes.h"
+#include "profiling_info_builder.h"
#include "thread.h"
+#include "trace.h"
#include "utils/arm64/assembler_arm64.h"
#include "utils/assembler.h"
#include "utils/stack_checks.h"
@@ -88,6 +91,9 @@ using helpers::VIXLRegCodeFromART;
using helpers::WRegisterFrom;
using helpers::XRegisterFrom;
+// TODO(mythria): Expand SystemRegister in vixl to include this value.
+uint16_t SYS_CNTVCT_EL0 = SystemRegisterEncoder<1, 3, 14, 0, 2>::value;
+
// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
// generates less code/data with a small num_entries.
@@ -582,7 +588,6 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
obj_(obj),
offset_(offset),
index_(index) {
- DCHECK(gUseReadBarrier);
// If `obj` is equal to `out` or `ref`, it means the initial object
// has been overwritten by (or after) the heap object reference load
// to be instrumented, e.g.:
@@ -597,13 +602,13 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
}
void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(codegen->EmitReadBarrier());
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
LocationSummary* locations = instruction_->GetLocations();
DataType::Type type = DataType::Type::kReference;
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
DCHECK(instruction_->IsInstanceFieldGet() ||
- instruction_->IsPredicatedInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
@@ -680,9 +685,9 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
DCHECK(instruction_->GetLocations()->Intrinsified());
HInvoke* invoke = instruction_->AsInvoke();
- DCHECK(IsUnsafeGetObject(invoke) ||
+ DCHECK(IsUnsafeGetReference(invoke) ||
IsVarHandleGet(invoke) ||
- IsUnsafeCASObject(invoke) ||
+ IsUnsafeCASReference(invoke) ||
IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
DCHECK_EQ(offset_, 0u);
DCHECK(index_.IsRegister());
@@ -761,10 +766,10 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
public:
ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
: SlowPathCodeARM64(instruction), out_(out), root_(root) {
- DCHECK(gUseReadBarrier);
}
void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(codegen->EmitReadBarrier());
LocationSummary* locations = instruction_->GetLocations();
DataType::Type type = DataType::Type::kReference;
DCHECK(locations->CanCall());
@@ -842,12 +847,20 @@ class MethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 {
class CompileOptimizedSlowPathARM64 : public SlowPathCodeARM64 {
public:
- CompileOptimizedSlowPathARM64() : SlowPathCodeARM64(/* instruction= */ nullptr) {}
+ explicit CompileOptimizedSlowPathARM64(Register profiling_info)
+ : SlowPathCodeARM64(/* instruction= */ nullptr),
+ profiling_info_(profiling_info) {}
void EmitNativeCode(CodeGenerator* codegen) override {
uint32_t entrypoint_offset =
GetThreadOffset<kArm64PointerSize>(kQuickCompileOptimized).Int32Value();
__ Bind(GetEntryLabel());
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
+ Register counter = temps.AcquireW();
+ __ Mov(counter, ProfilingInfo::GetOptimizeThreshold());
+ __ Strh(counter,
+ MemOperand(profiling_info_, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
__ Ldr(lr, MemOperand(tr, entrypoint_offset));
// Note: we don't record the call here (and therefore don't generate a stack
// map), as the entrypoint should never be suspended.
@@ -860,6 +873,10 @@ class CompileOptimizedSlowPathARM64 : public SlowPathCodeARM64 {
}
private:
+ // The register where the profiling info is stored when entering the slow
+ // path.
+ Register profiling_info_;
+
DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathARM64);
};
@@ -936,6 +953,7 @@ Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const
}
namespace detail {
+
// Mark which intrinsics we don't have handcrafted code for.
template <Intrinsics T>
struct IsUnimplemented {
@@ -950,15 +968,13 @@ struct IsUnimplemented {
UNIMPLEMENTED_INTRINSIC_LIST_ARM64(TRUE_OVERRIDE)
#undef TRUE_OVERRIDE
-#include "intrinsics_list.h"
static constexpr bool kIsIntrinsicUnimplemented[] = {
- false, // kNone
+ false, // kNone
#define IS_UNIMPLEMENTED(Intrinsic, ...) \
- IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
- INTRINSICS_LIST(IS_UNIMPLEMENTED)
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
#undef IS_UNIMPLEMENTED
};
-#undef INTRINSICS_LIST
} // namespace detail
@@ -995,14 +1011,7 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- uint32_literals_(std::less<uint32_t>(),
- graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- uint64_literals_(std::less<uint64_t>(),
- graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- jit_string_patches_(StringReferenceValueComparator(),
- graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
- jit_class_patches_(TypeReferenceValueComparator(),
- graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ jit_patches_(&assembler_, graph->GetAllocator()),
jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
// Save the link register (containing the return address) to mimic Quick.
@@ -1036,7 +1045,7 @@ void CodeGeneratorARM64::EmitJumpTables() {
}
}
-void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
+void CodeGeneratorARM64::Finalize() {
EmitJumpTables();
// Emit JIT baker read barrier slow paths.
@@ -1051,11 +1060,11 @@ void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
// Ensure we emit the literal pool.
__ FinalizeCode();
- CodeGenerator::Finalize(allocator);
+ CodeGenerator::Finalize();
// Verify Baker read barrier linker patches.
if (kIsDebugBuild) {
- ArrayRef<const uint8_t> code = allocator->GetMemory();
+ ArrayRef<const uint8_t> code(GetCode());
for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
DCHECK(info.label.IsBound());
uint32_t literal_offset = info.label.GetLocation();
@@ -1192,8 +1201,9 @@ void LocationsBuilderARM64::VisitMethodExitHook(HMethodExitHook* method_hook) {
void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* instruction) {
MacroAssembler* masm = GetVIXLAssembler();
UseScratchRegisterScope temps(masm);
- Register temp = temps.AcquireX();
- Register value = temps.AcquireW();
+ Register addr = temps.AcquireX();
+ Register index = temps.AcquireX();
+ Register value = index.W();
SlowPathCodeARM64* slow_path =
new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARM64(instruction);
@@ -1213,9 +1223,44 @@ void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* in
MemberOffset offset = instruction->IsMethodExitHook() ?
instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
- __ Mov(temp, address + offset.Int32Value());
- __ Ldrb(value, MemOperand(temp, 0));
- __ Cbnz(value, slow_path->GetEntryLabel());
+ __ Mov(addr, address + offset.Int32Value());
+ __ Ldrb(value, MemOperand(addr, 0));
+ __ Cmp(value, Operand(instrumentation::Instrumentation::kFastTraceListeners));
+ // Check if there are any method entry / exit listeners. If no, continue.
+ __ B(lt, slow_path->GetExitLabel());
+ // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
+ // If yes, just take the slow path.
+ __ B(gt, slow_path->GetEntryLabel());
+
+ // Check if there is place in the buffer to store a new entry, if no, take slow path.
+ uint32_t trace_buffer_index_offset =
+ Thread::TraceBufferIndexOffset<kArm64PointerSize>().Int32Value();
+ __ Ldr(index, MemOperand(tr, trace_buffer_index_offset));
+ __ Subs(index, index, kNumEntriesForWallClock);
+ __ B(lt, slow_path->GetEntryLabel());
+
+ // Update the index in the `Thread`.
+ __ Str(index, MemOperand(tr, trace_buffer_index_offset));
+ // Calculate the entry address in the buffer.
+ // addr = base_addr + sizeof(void*) * index;
+ __ Ldr(addr, MemOperand(tr, Thread::TraceBufferPtrOffset<kArm64PointerSize>().SizeValue()));
+ __ ComputeAddress(addr, MemOperand(addr, index, LSL, TIMES_8));
+
+ Register tmp = index;
+ // Record method pointer and trace action.
+ __ Ldr(tmp, MemOperand(sp, 0));
+ // Use last two bits to encode trace method action. For MethodEntry it is 0
+ // so no need to set the bits since they are 0 already.
+ if (instruction->IsMethodExitHook()) {
+ DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
+ __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
+ }
+ __ Str(tmp, MemOperand(addr, kMethodOffsetInBytes));
+ // Record the timestamp.
+ __ Mrs(tmp, (SystemRegister)SYS_CNTVCT_EL0);
+ __ Str(tmp, MemOperand(addr, kTimestampOffsetInBytes));
__ Bind(slow_path->GetExitLabel());
}
@@ -1254,21 +1299,21 @@ void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) {
}
if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
- SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARM64();
- AddSlowPath(slow_path);
ProfilingInfo* info = GetGraph()->GetProfilingInfo();
DCHECK(info != nullptr);
DCHECK(!HasEmptyFrame());
uint64_t address = reinterpret_cast64<uint64_t>(info);
vixl::aarch64::Label done;
UseScratchRegisterScope temps(masm);
- Register temp = temps.AcquireX();
Register counter = temps.AcquireW();
- __ Ldr(temp, DeduplicateUint64Literal(address));
- __ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
+ SlowPathCodeARM64* slow_path =
+ new (GetScopedAllocator()) CompileOptimizedSlowPathARM64(/* profiling_info= */ lr);
+ AddSlowPath(slow_path);
+ __ Ldr(lr, jit_patches_.DeduplicateUint64Literal(address));
+ __ Ldrh(counter, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
__ Cbz(counter, slow_path->GetEntryLabel());
__ Add(counter, counter, -1);
- __ Strh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
+ __ Strh(counter, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
__ Bind(slow_path->GetExitLabel());
}
}
@@ -1532,15 +1577,15 @@ size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_
return kArm64WordSize;
}
-size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
- uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARM64::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index,
+ [[maybe_unused]] uint32_t reg_id) {
LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
<< "use SaveRestoreLiveRegistersHelper";
UNREACHABLE();
}
-size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
- uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARM64::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index,
+ [[maybe_unused]] uint32_t reg_id) {
LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
<< "use SaveRestoreLiveRegistersHelper";
UNREACHABLE();
@@ -2136,14 +2181,10 @@ void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
const FieldInfo& field_info) {
- DCHECK(instruction->IsInstanceFieldGet() ||
- instruction->IsStaticFieldGet() ||
- instruction->IsPredicatedInstanceFieldGet());
-
- bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
bool object_field_get_with_read_barrier =
- gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
object_field_get_with_read_barrier
@@ -2160,37 +2201,24 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
}
}
// Input for object receiver.
- locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
+ locations->SetInAt(0, Location::RequiresRegister());
if (DataType::IsFloatingPointType(instruction->GetType())) {
- if (is_predicated) {
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
- } else {
- locations->SetOut(Location::RequiresFpuRegister());
- }
+ locations->SetOut(Location::RequiresFpuRegister());
} else {
- if (is_predicated) {
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
- } else {
- // The output overlaps for an object field get when read barriers
- // are enabled: we do not want the load to overwrite the object's
- // location, as we need it to emit the read barrier.
- locations->SetOut(Location::RequiresRegister(),
- object_field_get_with_read_barrier ? Location::kOutputOverlap
- : Location::kNoOutputOverlap);
- }
+ // The output overlaps for an object field get when read barriers
+ // are enabled: we do not want the load to overwrite the object's
+ // location, as we need it to emit the read barrier.
+ locations->SetOut(
+ Location::RequiresRegister(),
+ object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
}
void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
const FieldInfo& field_info) {
- DCHECK(instruction->IsInstanceFieldGet() ||
- instruction->IsStaticFieldGet() ||
- instruction->IsPredicatedInstanceFieldGet());
- bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
LocationSummary* locations = instruction->GetLocations();
- uint32_t receiver_input = is_predicated ? 1 : 0;
+ uint32_t receiver_input = 0;
Location base_loc = locations->InAt(receiver_input);
Location out = locations->Out();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
@@ -2199,8 +2227,7 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
MemOperand field =
HeapOperand(InputRegisterAt(instruction, receiver_input), field_info.GetFieldOffset());
- if (gUseReadBarrier && kUseBakerReadBarrier &&
- load_type == DataType::Type::kReference) {
+ if (load_type == DataType::Type::kReference && codegen_->EmitBakerReadBarrier()) {
// Object FieldGet with Baker's read barrier case.
// /* HeapReference<Object> */ out = *(base + offset)
Register base = RegisterFrom(base_loc, DataType::Type::kReference);
@@ -2261,20 +2288,12 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
bool value_can_be_null,
WriteBarrierKind write_barrier_kind) {
DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
- bool is_predicated =
- instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
Register obj = InputRegisterAt(instruction, 0);
CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
CPURegister source = value;
Offset offset = field_info.GetFieldOffset();
DataType::Type field_type = field_info.GetFieldType();
- std::optional<vixl::aarch64::Label> pred_is_null;
- if (is_predicated) {
- pred_is_null.emplace();
- __ Cbz(obj, &*pred_is_null);
- }
-
{
// We use a block to end the scratch scope before the write barrier, thus
// freeing the temporary registers so they can be used in `MarkGCCard`.
@@ -2306,10 +2325,6 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
Register(value),
value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck);
}
-
- if (is_predicated) {
- __ Bind(&*pred_is_null);
- }
}
void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
@@ -2647,7 +2662,7 @@ void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate*
void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
bool object_array_get_with_read_barrier =
- gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
object_array_get_with_read_barrier
@@ -2703,10 +2718,9 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
// does not support the HIntermediateAddress instruction.
DCHECK(!((type == DataType::Type::kReference) &&
instruction->GetArray()->IsIntermediateAddress() &&
- gUseReadBarrier &&
- !kUseBakerReadBarrier));
+ codegen_->EmitNonBakerReadBarrier()));
- if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) {
+ if (type == DataType::Type::kReference && codegen_->EmitBakerReadBarrier()) {
// Object ArrayGet with Baker's read barrier case.
// Note that a potential implicit null check is handled in the
// CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
@@ -3647,7 +3661,7 @@ void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
}
void InstructionCodeGeneratorARM64::VisitDoubleConstant(
- HDoubleConstant* constant ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HDoubleConstant* constant) {
// Will be generated at use site.
}
@@ -3655,8 +3669,7 @@ void LocationsBuilderARM64::VisitExit(HExit* exit) {
exit->SetLocations(nullptr);
}
-void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
-}
+void InstructionCodeGeneratorARM64::VisitExit([[maybe_unused]] HExit* exit) {}
void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
LocationSummary* locations =
@@ -3664,7 +3677,7 @@ void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
// Will be generated at use site.
}
@@ -3747,7 +3760,7 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct
// The condition instruction has been materialized, compare the output to 0.
Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
DCHECK(cond_val.IsRegister());
- if (true_target == nullptr) {
+ if (true_target == nullptr) {
__ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
} else {
__ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
@@ -3835,6 +3848,35 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
false_target = nullptr;
}
+ if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
+ if (GetGraph()->IsCompilingBaseline() &&
+ codegen_->GetCompilerOptions().ProfileBranches() &&
+ !Runtime::Current()->IsAotCompiler()) {
+ DCHECK(if_instr->InputAt(0)->IsCondition());
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
+ // Currently, not all If branches are profiled.
+ if (cache != nullptr) {
+ uint64_t address =
+ reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
+ static_assert(
+ BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
+ "Unexpected offsets for BranchCache");
+ vixl::aarch64::Label done;
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp = temps.AcquireX();
+ Register counter = temps.AcquireW();
+ Register condition = InputRegisterAt(if_instr, 0).X();
+ __ Mov(temp, address);
+ __ Ldrh(counter, MemOperand(temp, condition, LSL, 1));
+ __ Add(counter, counter, 1);
+ __ Tbnz(counter, 16, &done);
+ __ Strh(counter, MemOperand(temp, condition, LSL, 1));
+ __ Bind(&done);
+ }
+ }
+ }
GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
}
@@ -3876,7 +3918,7 @@ static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
}
static inline Condition GetConditionForSelect(HCondition* condition) {
- IfCondition cond = condition->AsCondition()->GetCondition();
+ IfCondition cond = condition->GetCondition();
return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
: ARM64Condition(cond);
}
@@ -3888,8 +3930,8 @@ void LocationsBuilderARM64::VisitSelect(HSelect* select) {
locations->SetInAt(1, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
} else {
- HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
- HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
+ HConstant* cst_true_value = select->GetTrueValue()->AsConstantOrNull();
+ HConstant* cst_false_value = select->GetFalseValue()->AsConstantOrNull();
bool is_true_value_constant = cst_true_value != nullptr;
bool is_false_value_constant = cst_false_value != nullptr;
// Ask VIXL whether we should synthesize constants in registers.
@@ -3972,23 +4014,10 @@ void CodeGeneratorARM64::GenerateNop() {
__ Nop();
}
-void LocationsBuilderARM64::VisitPredicatedInstanceFieldGet(
- HPredicatedInstanceFieldGet* instruction) {
- HandleFieldGet(instruction, instruction->GetFieldInfo());
-}
-
void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
HandleFieldGet(instruction, instruction->GetFieldInfo());
}
-void InstructionCodeGeneratorARM64::VisitPredicatedInstanceFieldGet(
- HPredicatedInstanceFieldGet* instruction) {
- vixl::aarch64::Label finish;
- __ Cbz(InputRegisterAt(instruction, 1), &finish);
- HandleFieldGet(instruction, instruction->GetFieldInfo());
- __ Bind(&finish);
-}
-
void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
HandleFieldGet(instruction, instruction->GetFieldInfo());
}
@@ -4005,8 +4034,8 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins
}
// Temp is used for read barrier.
-static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
- if (gUseReadBarrier &&
+static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
+ if (emit_read_barrier &&
(kUseBakerReadBarrier ||
type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
@@ -4019,11 +4048,11 @@ static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
// Interface case has 3 temps, one for holding the number of interfaces, one for the current
// interface pointer, one for loading the current interface.
// The other checks have one temp for loading the object's class.
-static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
return 3;
}
- return 1 + NumberOfInstanceOfTemps(type_check_kind);
+ return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
}
void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -4035,7 +4064,7 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck: {
- bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
+ bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
break;
@@ -4066,7 +4095,8 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
// Note that TypeCheckSlowPathARM64 uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
// Add temps if necessary for read barriers.
- locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
+ locations->AddRegisterTemps(
+ NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
}
void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -4079,7 +4109,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
: InputRegisterAt(instruction, 1);
Location out_loc = locations->Out();
Register out = OutputRegister(instruction);
- const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+ const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
DCHECK_LE(num_temps, 1u);
Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -4099,7 +4129,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
ReadBarrierOption read_barrier_option =
- CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
@@ -4117,7 +4147,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kAbstractClassCheck: {
ReadBarrierOption read_barrier_option =
- CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
@@ -4148,7 +4178,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kClassHierarchyCheck: {
ReadBarrierOption read_barrier_option =
- CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
@@ -4180,7 +4210,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kArrayObjectCheck: {
ReadBarrierOption read_barrier_option =
- CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
@@ -4297,7 +4327,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
- LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
+ LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
@@ -4308,8 +4338,7 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
} else {
locations->SetInAt(1, Location::RequiresRegister());
}
- // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
- locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
+ locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
}
void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
@@ -4320,7 +4349,7 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
? Register()
: InputRegisterAt(instruction, 1);
- const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+ const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
DCHECK_GE(num_temps, 1u);
DCHECK_LE(num_temps, 3u);
Location temp_loc = locations->GetTemp(0);
@@ -4336,7 +4365,7 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
const uint32_t object_array_data_offset =
mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
- bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
+ bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
SlowPathCodeARM64* type_check_slow_path =
new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
instruction, is_type_check_slow_path_fatal);
@@ -4478,12 +4507,11 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
kWithoutReadBarrier);
// /* HeapReference<Class> */ temp = temp->iftable_
- GenerateReferenceLoadTwoRegisters(instruction,
- temp_loc,
- temp_loc,
- iftable_offset,
- maybe_temp2_loc,
- kWithoutReadBarrier);
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ iftable_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// Iftable is never null.
__ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
// Loop through the iftable and check if any class matches.
@@ -4525,7 +4553,7 @@ void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
// Will be generated at use site.
}
@@ -4534,7 +4562,7 @@ void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
// Will be generated at use site.
}
@@ -4566,24 +4594,26 @@ void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
void CodeGeneratorARM64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
Register klass) {
DCHECK_EQ(klass.GetCode(), 0u);
- // We know the destination of an intrinsic, so no need to record inline
- // caches.
- if (!instruction->GetLocations()->Intrinsified() &&
- GetGraph()->IsCompilingBaseline() &&
- !Runtime::Current()->IsAotCompiler()) {
- DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
+ if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
ProfilingInfo* info = GetGraph()->GetProfilingInfo();
DCHECK(info != nullptr);
- InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
- uint64_t address = reinterpret_cast64<uint64_t>(cache);
- vixl::aarch64::Label done;
- __ Mov(x8, address);
- __ Ldr(x9, MemOperand(x8, InlineCache::ClassesOffset().Int32Value()));
- // Fast path for a monomorphic cache.
- __ Cmp(klass, x9);
- __ B(eq, &done);
- InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
- __ Bind(&done);
+ InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(info, instruction->AsInvoke());
+ if (cache != nullptr) {
+ uint64_t address = reinterpret_cast64<uint64_t>(cache);
+ vixl::aarch64::Label done;
+ __ Mov(x8, address);
+ __ Ldr(w9, MemOperand(x8, InlineCache::ClassesOffset().Int32Value()));
+ // Fast path for a monomorphic cache.
+ __ Cmp(klass.W(), w9);
+ __ B(eq, &done);
+ InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
+ __ Bind(&done);
+ } else {
+ // This is unexpected, but we don't guarantee stable compilation across
+ // JIT runs so just warn about it.
+ ScopedObjectAccess soa(Thread::Current());
+ LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
+ }
}
}
@@ -4709,8 +4739,8 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege
}
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
- const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- ArtMethod* method ATTRIBUTE_UNUSED) {
+ const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+ [[maybe_unused]] ArtMethod* method) {
// On ARM64 we support all dispatch types.
return desired_dispatch_info;
}
@@ -4749,7 +4779,8 @@ void CodeGeneratorARM64::LoadMethod(MethodLoadKind load_kind, Location temp, HIn
case MethodLoadKind::kJitDirectAddress: {
// Load method address from literal pool.
__ Ldr(XRegisterFrom(temp),
- DeduplicateUint64Literal(reinterpret_cast<uint64_t>(invoke->GetResolvedMethod())));
+ jit_patches_.DeduplicateUint64Literal(
+ reinterpret_cast<uint64_t>(invoke->GetResolvedMethod())));
break;
}
case MethodLoadKind::kRuntimeCall: {
@@ -4775,14 +4806,12 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(
__ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
break;
}
- case MethodLoadKind::kRecursive: {
+ case MethodLoadKind::kRecursive:
callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
break;
- }
- case MethodLoadKind::kRuntimeCall: {
+ case MethodLoadKind::kRuntimeCall:
GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
return; // No code pointer retrieval; the runtime performs the call directly.
- }
case MethodLoadKind::kBootImageLinkTimePcRelative:
DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
@@ -4798,10 +4827,9 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(
break;
}
FALLTHROUGH_INTENDED;
- default: {
+ default:
LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
break;
- }
}
auto call_lr = [&]() {
@@ -4906,6 +4934,7 @@ void CodeGeneratorARM64::GenerateVirtualCall(
}
// Instead of simply (possibly) unpoisoning `temp` here, we should
// emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
// intermediate/temporary reference and because the current
// concurrent copying collector keeps the from-space memory
// intact/accessible until the end of the marking phase (the
@@ -5090,25 +5119,8 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
return label;
}
-vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
- uint64_t address) {
- return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
-}
-
-vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
- const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) {
- ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
- return jit_string_patches_.GetOrCreate(
- StringReference(&dex_file, string_index),
- [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
-}
-
-vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral(
- const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) {
- ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
- return jit_class_patches_.GetOrCreate(
- TypeReference(&dex_file, type_index),
- [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
+void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
+ jit_patches_.EmitJitRootPatches(code, roots_data, *GetCodeGenerationData());
}
void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
@@ -5171,7 +5183,7 @@ void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg,
void CodeGeneratorARM64::LoadTypeForBootImageIntrinsic(vixl::aarch64::Register reg,
TypeReference target_type) {
- // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
+ // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
// Add ADRP with its PC-relative type patch.
vixl::aarch64::Label* adrp_label =
@@ -5332,19 +5344,7 @@ void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch,
assembler.FinalizeCode();
code->resize(assembler.CodeSize());
MemoryRegion code_region(code->data(), code->size());
- assembler.FinalizeInstructions(code_region);
-}
-
-vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) {
- return uint32_literals_.GetOrCreate(
- value,
- [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
-}
-
-vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
- return uint64_literals_.GetOrCreate(
- value,
- [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
+ assembler.CopyInstructions(code_region);
}
void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
@@ -5370,13 +5370,8 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
return;
}
- {
- // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
- // are no pools emitted.
- EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
- codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
- DCHECK(!codegen_->IsLeafMethod());
- }
+ codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
+ DCHECK(!codegen_->IsLeafMethod());
codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
@@ -5421,7 +5416,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
load_kind == HLoadClass::LoadKind::kBssEntryPackage);
- const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage();
+ const bool requires_read_barrier = !cls->IsInBootImage() && codegen_->EmitReadBarrier();
LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
@@ -5434,12 +5429,14 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
locations->SetInAt(0, Location::RequiresRegister());
}
locations->SetOut(Location::RequiresRegister());
- if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
- if (!gUseReadBarrier || kUseBakerReadBarrier) {
+ if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ } else {
// Rely on the type resolution or initialization and marking to save everything we need.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
- } else {
- // For non-Baker read barrier we have a temp-clobbering call.
}
}
}
@@ -5460,9 +5457,8 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
Location out_loc = cls->GetLocations()->Out();
Register out = OutputRegister(cls);
- const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
- ? kWithoutReadBarrier
- : gCompilerReadBarrierOption;
+ const ReadBarrierOption read_barrier_option =
+ cls->IsInBootImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
bool generate_null_check = false;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -5600,7 +5596,7 @@ void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
}
-void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitClearException([[maybe_unused]] HClearException* clear) {
__ Str(wzr, GetExceptionTlsAddress());
}
@@ -5623,7 +5619,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
}
void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
- LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
+ LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
InvokeRuntimeCallingConvention calling_convention;
@@ -5631,11 +5627,11 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
} else {
locations->SetOut(Location::RequiresRegister());
if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
- if (!gUseReadBarrier || kUseBakerReadBarrier) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ } else {
// Rely on the pResolveString and marking to save everything we need.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
- } else {
- // For non-Baker read barrier we have a temp-clobbering call.
}
}
}
@@ -5685,7 +5681,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
temp,
/* offset placeholder */ 0u,
ldr_label,
- gCompilerReadBarrierOption);
+ codegen_->GetCompilerReadBarrierOption());
SlowPathCodeARM64* slow_path =
new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
codegen_->AddSlowPath(slow_path);
@@ -5709,14 +5705,13 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
out.X(),
/* offset= */ 0,
/* fixup_label= */ nullptr,
- gCompilerReadBarrierOption);
+ codegen_->GetCompilerReadBarrierOption());
return;
}
default:
break;
}
- // TODO: Re-add the compiler code to do string dex cache lookup again.
InvokeRuntimeCallingConvention calling_convention;
DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
__ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
@@ -5730,7 +5725,7 @@ void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
// Will be generated at use site.
}
@@ -5930,7 +5925,7 @@ void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
HandleBinaryOp(instruction);
}
-void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderARM64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
LOG(FATAL) << "Unreachable";
}
@@ -5957,7 +5952,7 @@ void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
}
void InstructionCodeGeneratorARM64::VisitParameterValue(
- HParameterValue* instruction ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HParameterValue* instruction) {
// Nothing to do, the parameter is already at its location.
}
@@ -5968,7 +5963,7 @@ void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
}
void InstructionCodeGeneratorARM64::VisitCurrentMethod(
- HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HCurrentMethod* instruction) {
// Nothing to do, the method is already at its location.
}
@@ -5980,7 +5975,7 @@ void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
locations->SetOut(Location::Any());
}
-void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitPhi([[maybe_unused]] HPhi* instruction) {
LOG(FATAL) << "Unreachable";
}
@@ -6175,7 +6170,7 @@ void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor
}
void InstructionCodeGeneratorARM64::VisitConstructorFence(
- HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HConstructorFence* constructor_fence) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
}
@@ -6215,7 +6210,7 @@ void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
instruction->SetLocations(nullptr);
}
-void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitReturnVoid([[maybe_unused]] HReturnVoid* instruction) {
codegen_->GenerateFrameExit();
}
@@ -6353,6 +6348,9 @@ void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
// In suspend check slow path, usually there are no caller-save registers at all.
// If SIMD instructions are present, however, we force spilling all live SIMD
// registers in full width (since the runtime only saves/restores lower part).
+ // Note that only a suspend check can see live SIMD registers. In the
+ // loop optimization, we make sure this does not happen for any other slow
+ // path.
locations->SetCustomSlowPathCallerSaves(
GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
}
@@ -6467,12 +6465,12 @@ void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
HandleBinaryOp(instruction);
}
-void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderARM64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
}
-void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
}
@@ -6573,7 +6571,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
DataType::Type type = DataType::Type::kReference;
Register out_reg = RegisterFrom(out, type);
if (read_barrier_option == kWithReadBarrier) {
- CHECK(gUseReadBarrier);
+ DCHECK(codegen_->EmitReadBarrier());
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
@@ -6614,7 +6612,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
Register out_reg = RegisterFrom(out, type);
Register obj_reg = RegisterFrom(obj, type);
if (read_barrier_option == kWithReadBarrier) {
- CHECK(gUseReadBarrier);
+ DCHECK(codegen_->EmitReadBarrier());
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -6649,7 +6647,7 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad(
DCHECK(fixup_label == nullptr || offset == 0u);
Register root_reg = RegisterFrom(root, DataType::Type::kReference);
if (read_barrier_option == kWithReadBarrier) {
- DCHECK(gUseReadBarrier);
+ DCHECK(EmitReadBarrier());
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
@@ -6712,11 +6710,10 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad(
MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
-void CodeGeneratorARM64::GenerateIntrinsicCasMoveWithBakerReadBarrier(
+void CodeGeneratorARM64::GenerateIntrinsicMoveWithBakerReadBarrier(
vixl::aarch64::Register marked_old_value,
vixl::aarch64::Register old_value) {
- DCHECK(gUseReadBarrier);
- DCHECK(kUseBakerReadBarrier);
+ DCHECK(EmitBakerReadBarrier());
// Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked_old_value.GetCode());
@@ -6737,8 +6734,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins
const vixl::aarch64::MemOperand& src,
bool needs_null_check,
bool use_load_acquire) {
- DCHECK(gUseReadBarrier);
- DCHECK(kUseBakerReadBarrier);
+ DCHECK(EmitBakerReadBarrier());
// Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
// Marking Register) to decide whether we need to enter the slow
@@ -6833,8 +6829,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instru
uint32_t data_offset,
Location index,
bool needs_null_check) {
- DCHECK(gUseReadBarrier);
- DCHECK(kUseBakerReadBarrier);
+ DCHECK(EmitBakerReadBarrier());
static_assert(
sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
@@ -6911,7 +6906,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instru
void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
// The following condition is a compile-time one, so it does not have a run-time cost.
- if (kIsDebugBuild && gUseReadBarrier && kUseBakerReadBarrier) {
+ if (kIsDebugBuild && EmitBakerReadBarrier()) {
// The following condition is a run-time one; it is executed after the
// previous compile-time test, to avoid penalizing non-debug builds.
if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
@@ -6940,7 +6935,7 @@ void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- DCHECK(gUseReadBarrier);
+ DCHECK(EmitReadBarrier());
// Insert a slow path based read barrier *after* the reference load.
//
@@ -6965,7 +6960,7 @@ void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- if (gUseReadBarrier) {
+ if (EmitReadBarrier()) {
// Baker's read barriers shall be handled by the fast path
// (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
DCHECK(!kUseBakerReadBarrier);
@@ -6980,7 +6975,7 @@ void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
Location out,
Location root) {
- DCHECK(gUseReadBarrier);
+ DCHECK(EmitReadBarrier());
// Insert a slow path based read barrier *after* the GC root load.
//
@@ -7018,32 +7013,6 @@ void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instructi
}
}
-static void PatchJitRootUse(uint8_t* code,
- const uint8_t* roots_data,
- vixl::aarch64::Literal<uint32_t>* literal,
- uint64_t index_in_table) {
- uint32_t literal_offset = literal->GetOffset();
- uintptr_t address =
- reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
- uint8_t* data = code + literal_offset;
- reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
-}
-
-void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
- for (const auto& entry : jit_string_patches_) {
- const StringReference& string_reference = entry.first;
- vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
- uint64_t index_in_table = GetJitStringRootIndex(string_reference);
- PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
- }
- for (const auto& entry : jit_class_patches_) {
- const TypeReference& type_reference = entry.first;
- vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
- uint64_t index_in_table = GetJitClassRootIndex(type_reference);
- PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
- }
-}
-
MemOperand InstructionCodeGeneratorARM64::VecNEONAddress(
HVecMemoryOperation* instruction,
UseScratchRegisterScope* temps_scope,
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 6190364d1d..7ff08f55cb 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -26,6 +26,7 @@
#include "dex/string_reference.h"
#include "dex/type_reference.h"
#include "driver/compiler_options.h"
+#include "jit_patches_arm64.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
#include "utils/arm64/assembler_arm64.h"
@@ -50,30 +51,29 @@ class CodeGeneratorARM64;
// Use a local definition to prevent copying mistakes.
static constexpr size_t kArm64WordSize = static_cast<size_t>(kArm64PointerSize);
-// These constants are used as an approximate margin when emission of veneer and literal pools
+// This constant is used as an approximate margin when emission of veneer and literal pools
// must be blocked.
static constexpr int kMaxMacroInstructionSizeInBytes = 15 * vixl::aarch64::kInstructionSize;
-static constexpr int kInvokeCodeMarginSizeInBytes = 6 * kMaxMacroInstructionSizeInBytes;
static const vixl::aarch64::Register kParameterCoreRegisters[] = {
- vixl::aarch64::x1,
- vixl::aarch64::x2,
- vixl::aarch64::x3,
- vixl::aarch64::x4,
- vixl::aarch64::x5,
- vixl::aarch64::x6,
- vixl::aarch64::x7
+ vixl::aarch64::x1,
+ vixl::aarch64::x2,
+ vixl::aarch64::x3,
+ vixl::aarch64::x4,
+ vixl::aarch64::x5,
+ vixl::aarch64::x6,
+ vixl::aarch64::x7
};
static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
static const vixl::aarch64::VRegister kParameterFPRegisters[] = {
- vixl::aarch64::d0,
- vixl::aarch64::d1,
- vixl::aarch64::d2,
- vixl::aarch64::d3,
- vixl::aarch64::d4,
- vixl::aarch64::d5,
- vixl::aarch64::d6,
- vixl::aarch64::d7
+ vixl::aarch64::d0,
+ vixl::aarch64::d1,
+ vixl::aarch64::d2,
+ vixl::aarch64::d3,
+ vixl::aarch64::d4,
+ vixl::aarch64::d5,
+ vixl::aarch64::d6,
+ vixl::aarch64::d7
};
static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters);
@@ -116,7 +116,7 @@ const vixl::aarch64::CPURegList callee_saved_core_registers(
vixl::aarch64::CPURegister::kRegister,
vixl::aarch64::kXRegSize,
(kReserveMarkingRegister ? vixl::aarch64::x21.GetCode() : vixl::aarch64::x20.GetCode()),
- vixl::aarch64::x30.GetCode());
+ vixl::aarch64::x30.GetCode());
const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kVRegister,
vixl::aarch64::kDRegSize,
vixl::aarch64::d8.GetCode(),
@@ -144,19 +144,8 @@ Location ARM64ReturnLocation(DataType::Type return_type);
V(SystemArrayCopyByte) \
V(SystemArrayCopyInt) \
/* 1.8 */ \
- V(UnsafeGetAndAddInt) \
- V(UnsafeGetAndAddLong) \
- V(UnsafeGetAndSetInt) \
- V(UnsafeGetAndSetLong) \
- V(UnsafeGetAndSetObject) \
V(MethodHandleInvokeExact) \
- V(MethodHandleInvoke) \
- /* OpenJDK 11 */ \
- V(JdkUnsafeGetAndAddInt) \
- V(JdkUnsafeGetAndAddLong) \
- V(JdkUnsafeGetAndSetInt) \
- V(JdkUnsafeGetAndSetLong) \
- V(JdkUnsafeGetAndSetObject)
+ V(MethodHandleInvoke)
class SlowPathCodeARM64 : public SlowPathCode {
public:
@@ -192,34 +181,34 @@ class JumpTableARM64 : public DeletableArenaObject<kArenaAllocSwitchTable> {
DISALLOW_COPY_AND_ASSIGN(JumpTableARM64);
};
-static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] =
- { vixl::aarch64::x0,
- vixl::aarch64::x1,
- vixl::aarch64::x2,
- vixl::aarch64::x3,
- vixl::aarch64::x4,
- vixl::aarch64::x5,
- vixl::aarch64::x6,
- vixl::aarch64::x7 };
+static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] = {
+ vixl::aarch64::x0,
+ vixl::aarch64::x1,
+ vixl::aarch64::x2,
+ vixl::aarch64::x3,
+ vixl::aarch64::x4,
+ vixl::aarch64::x5,
+ vixl::aarch64::x6,
+ vixl::aarch64::x7
+};
static constexpr size_t kRuntimeParameterCoreRegistersLength =
arraysize(kRuntimeParameterCoreRegisters);
-static const vixl::aarch64::VRegister kRuntimeParameterFpuRegisters[] =
- { vixl::aarch64::d0,
- vixl::aarch64::d1,
- vixl::aarch64::d2,
- vixl::aarch64::d3,
- vixl::aarch64::d4,
- vixl::aarch64::d5,
- vixl::aarch64::d6,
- vixl::aarch64::d7 };
+static const vixl::aarch64::VRegister kRuntimeParameterFpuRegisters[] = {
+ vixl::aarch64::d0,
+ vixl::aarch64::d1,
+ vixl::aarch64::d2,
+ vixl::aarch64::d3,
+ vixl::aarch64::d4,
+ vixl::aarch64::d5,
+ vixl::aarch64::d6,
+ vixl::aarch64::d7
+};
static constexpr size_t kRuntimeParameterFpuRegistersLength =
arraysize(kRuntimeParameterCoreRegisters);
class InvokeRuntimeCallingConvention : public CallingConvention<vixl::aarch64::Register,
vixl::aarch64::VRegister> {
public:
- static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
-
InvokeRuntimeCallingConvention()
: CallingConvention(kRuntimeParameterCoreRegisters,
kRuntimeParameterCoreRegistersLength,
@@ -304,16 +293,16 @@ class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention {
Location GetFieldIndexLocation() const override {
return helpers::LocationFrom(vixl::aarch64::x0);
}
- Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+ Location GetReturnLocation([[maybe_unused]] DataType::Type type) const override {
return helpers::LocationFrom(vixl::aarch64::x0);
}
- Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED,
+ Location GetSetValueLocation([[maybe_unused]] DataType::Type type,
bool is_instance) const override {
return is_instance
? helpers::LocationFrom(vixl::aarch64::x2)
: helpers::LocationFrom(vixl::aarch64::x1);
}
- Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+ Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override {
return helpers::LocationFrom(vixl::aarch64::d0);
}
@@ -551,12 +540,31 @@ class InstructionCodeGeneratorARM64Sve : public InstructionCodeGeneratorARM64 {
// register size (full SIMD register is used).
void ValidateVectorLength(HVecOperation* instr) const;
- // Returns default predicate register which is used as governing vector predicate
- // to implement predicated loop execution.
+ vixl::aarch64::PRegister GetVecGoverningPReg(HVecOperation* instr) {
+ return GetVecPredSetFixedOutPReg(instr->GetGoverningPredicate());
+ }
+
+ // Returns a fixed p-reg for predicate setting instruction.
+ //
+ // Currently we only support diamond CF loops for predicated vectorization; also we don't have
+ // register allocator support for vector predicates. Thus we use fixed P-regs for loop main,
+ // True and False predicates as a temporary solution.
//
- // TODO: This is a hack to be addressed when register allocator supports SIMD types.
- static vixl::aarch64::PRegister LoopPReg() {
- return vixl::aarch64::p0;
+ // TODO: Support SIMD types and registers in ART.
+ static vixl::aarch64::PRegister GetVecPredSetFixedOutPReg(HVecPredSetOperation* instr) {
+ if (instr->IsVecPredWhile() || instr->IsVecPredSetAll()) {
+ // VecPredWhile and VecPredSetAll live ranges never overlap due to the current vectorization
+ // scheme: the former only is live inside a vectorized loop and the later is never in a
+ // loop and never spans across loops.
+ return vixl::aarch64::p0;
+ } else if (instr->IsVecPredNot()) {
+ // This relies on the fact that we only use PredNot manually in the autovectorizer,
+ // so there is only one of them in each loop.
+ return vixl::aarch64::p1;
+ } else {
+ DCHECK(instr->IsVecCondition());
+ return vixl::aarch64::p2;
+ }
}
};
@@ -698,7 +706,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
return jump_tables_.back().get();
}
- void Finalize(CodeAllocator* allocator) override;
+ void Finalize() override;
// Code generation helpers.
void MoveConstant(vixl::aarch64::CPURegister destination, HConstant* constant);
@@ -737,9 +745,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
ParallelMoveResolverARM64* GetMoveResolver() override { return &move_resolver_; }
- bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override {
- return false;
- }
+ bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; }
// Check if the desired_string_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
@@ -838,13 +844,21 @@ class CodeGeneratorARM64 : public CodeGenerator {
// the associated patch for AOT or slow path for JIT.
void EmitBakerReadBarrierCbnz(uint32_t custom_data);
- vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
+ vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address) {
+ return jit_patches_.DeduplicateBootImageAddressLiteral(address);
+ }
vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file,
dex::StringIndex string_index,
- Handle<mirror::String> handle);
+ Handle<mirror::String> handle) {
+ return jit_patches_.DeduplicateJitStringLiteral(
+ dex_file, string_index, handle, GetCodeGenerationData());
+ }
vixl::aarch64::Literal<uint32_t>* DeduplicateJitClassLiteral(const DexFile& dex_file,
- dex::TypeIndex string_index,
- Handle<mirror::Class> handle);
+ dex::TypeIndex class_index,
+ Handle<mirror::Class> handle) {
+ return jit_patches_.DeduplicateJitClassLiteral(
+ dex_file, class_index, handle, GetCodeGenerationData());
+ }
void EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, vixl::aarch64::Register reg);
void EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
@@ -879,9 +893,9 @@ class CodeGeneratorARM64 : public CodeGenerator {
uint32_t offset,
vixl::aarch64::Label* fixup_label,
ReadBarrierOption read_barrier_option);
- // Generate MOV for the `old_value` in intrinsic CAS and mark it with Baker read barrier.
- void GenerateIntrinsicCasMoveWithBakerReadBarrier(vixl::aarch64::Register marked_old_value,
- vixl::aarch64::Register old_value);
+ // Generate MOV for the `old_value` in intrinsic and mark it with Baker read barrier.
+ void GenerateIntrinsicMoveWithBakerReadBarrier(vixl::aarch64::Register marked_old_value,
+ vixl::aarch64::Register old_value);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
// Overload suitable for Unsafe.getObject/-Volatile() intrinsic.
@@ -1074,18 +1088,6 @@ class CodeGeneratorARM64 : public CodeGenerator {
uint32_t encoded_data,
/*out*/ std::string* debug_name);
- using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
- using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
- using StringToLiteralMap = ArenaSafeMap<StringReference,
- vixl::aarch64::Literal<uint32_t>*,
- StringReferenceValueComparator>;
- using TypeToLiteralMap = ArenaSafeMap<TypeReference,
- vixl::aarch64::Literal<uint32_t>*,
- TypeReferenceValueComparator>;
-
- vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value);
- vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
-
// The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types,
// whether through .data.bimg.rel.ro, .bss, or directly in the boot image.
struct PcRelativePatchInfo : PatchInfo<vixl::aarch64::Label> {
@@ -1158,14 +1160,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
// Baker read barrier patch info.
ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
- // Deduplication map for 32-bit literals, used for JIT for boot image addresses.
- Uint32ToLiteralMap uint32_literals_;
- // Deduplication map for 64-bit literals, used for JIT for method address or method code.
- Uint64ToLiteralMap uint64_literals_;
- // Patches for string literals in JIT compiled code.
- StringToLiteralMap jit_string_patches_;
- // Patches for class literals in JIT compiled code.
- TypeToLiteralMap jit_class_patches_;
+ JitPatchesARM64 jit_patches_;
// Baker read barrier slow paths, mapping custom data (uint32_t) to label.
// Wrap the label to work around vixl::aarch64::Label being non-copyable
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index d69e77045b..00c14b0b46 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -33,13 +33,16 @@
#include "interpreter/mterp/nterp.h"
#include "intrinsics.h"
#include "intrinsics_arm_vixl.h"
+#include "intrinsics_list.h"
#include "intrinsics_utils.h"
#include "linker/linker_patch.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "mirror/var_handle.h"
+#include "profiling_info_builder.h"
#include "scoped_thread_state_change-inl.h"
#include "thread.h"
+#include "trace.h"
#include "utils/arm/assembler_arm_vixl.h"
#include "utils/arm/managed_register_arm.h"
#include "utils/assembler.h"
@@ -147,7 +150,7 @@ static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
RegisterSet caller_saves = RegisterSet::Empty();
caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
// TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
- // that the the kPrimNot result register is the same as the first argument register.
+ // that the kPrimNot result register is the same as the first argument register.
return caller_saves;
}
@@ -295,31 +298,6 @@ static LoadOperandType GetLoadOperandType(DataType::Type type) {
}
}
-static StoreOperandType GetStoreOperandType(DataType::Type type) {
- switch (type) {
- case DataType::Type::kReference:
- return kStoreWord;
- case DataType::Type::kBool:
- case DataType::Type::kUint8:
- case DataType::Type::kInt8:
- return kStoreByte;
- case DataType::Type::kUint16:
- case DataType::Type::kInt16:
- return kStoreHalfword;
- case DataType::Type::kInt32:
- return kStoreWord;
- case DataType::Type::kInt64:
- return kStoreWordPair;
- case DataType::Type::kFloat32:
- return kStoreSWord;
- case DataType::Type::kFloat64:
- return kStoreDWord;
- default:
- LOG(FATAL) << "Unreachable type " << type;
- UNREACHABLE();
- }
-}
-
void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
size_t orig_offset = stack_offset;
@@ -743,7 +721,6 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
obj_(obj),
offset_(offset),
index_(index) {
- DCHECK(gUseReadBarrier);
// If `obj` is equal to `out` or `ref`, it means the initial object
// has been overwritten by (or after) the heap object reference load
// to be instrumented, e.g.:
@@ -758,13 +735,13 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
}
void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(codegen->EmitReadBarrier());
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
LocationSummary* locations = instruction_->GetLocations();
vixl32::Register reg_out = RegisterFrom(out_);
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode()));
DCHECK(instruction_->IsInstanceFieldGet() ||
- instruction_->IsPredicatedInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
@@ -840,7 +817,9 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
DCHECK(instruction_->GetLocations()->Intrinsified());
HInvoke* invoke = instruction_->AsInvoke();
- DCHECK(IsUnsafeGetObject(invoke) || IsVarHandleGet(invoke) || IsVarHandleCASFamily(invoke))
+ DCHECK(IsUnsafeGetReference(invoke) ||
+ IsVarHandleGet(invoke) ||
+ IsVarHandleCASFamily(invoke))
<< invoke->GetIntrinsic();
DCHECK_EQ(offset_, 0U);
// Though UnsafeGet's offset location is a register pair, we only pass the low
@@ -921,10 +900,10 @@ class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
ReadBarrierForRootSlowPathARMVIXL(HInstruction* instruction, Location out, Location root)
: SlowPathCodeARMVIXL(instruction), out_(out), root_(root) {
- DCHECK(gUseReadBarrier);
}
void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(codegen->EmitReadBarrier());
LocationSummary* locations = instruction_->GetLocations();
vixl32::Register reg_out = RegisterFrom(out_);
DCHECK(locations->CanCall());
@@ -992,12 +971,20 @@ class MethodEntryExitHooksSlowPathARMVIXL : public SlowPathCodeARMVIXL {
class CompileOptimizedSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
- CompileOptimizedSlowPathARMVIXL() : SlowPathCodeARMVIXL(/* instruction= */ nullptr) {}
+ explicit CompileOptimizedSlowPathARMVIXL(vixl32::Register profiling_info)
+ : SlowPathCodeARMVIXL(/* instruction= */ nullptr),
+ profiling_info_(profiling_info) {}
void EmitNativeCode(CodeGenerator* codegen) override {
uint32_t entry_point_offset =
GetThreadOffset<kArmPointerSize>(kQuickCompileOptimized).Int32Value();
__ Bind(GetEntryLabel());
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+ UseScratchRegisterScope temps(arm_codegen->GetVIXLAssembler());
+ vixl32::Register tmp = temps.Acquire();
+ __ Mov(tmp, ProfilingInfo::GetOptimizeThreshold());
+ __ Strh(tmp,
+ MemOperand(profiling_info_, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
__ Ldr(lr, MemOperand(tr, entry_point_offset));
// Note: we don't record the call here (and therefore don't generate a stack
// map), as the entrypoint should never be suspended.
@@ -1010,6 +997,8 @@ class CompileOptimizedSlowPathARMVIXL : public SlowPathCodeARMVIXL {
}
private:
+ vixl32::Register profiling_info_;
+
DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathARMVIXL);
};
@@ -1102,27 +1091,27 @@ static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) {
}
// Saves the register in the stack. Returns the size taken on stack.
-size_t CodeGeneratorARMVIXL::SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
- uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARMVIXL::SaveCoreRegister([[maybe_unused]] size_t stack_index,
+ [[maybe_unused]] uint32_t reg_id) {
TODO_VIXL32(FATAL);
UNREACHABLE();
}
// Restores the register from the stack. Returns the size taken on stack.
-size_t CodeGeneratorARMVIXL::RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
- uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARMVIXL::RestoreCoreRegister([[maybe_unused]] size_t stack_index,
+ [[maybe_unused]] uint32_t reg_id) {
TODO_VIXL32(FATAL);
UNREACHABLE();
}
-size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
- uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index,
+ [[maybe_unused]] uint32_t reg_id) {
TODO_VIXL32(FATAL);
UNREACHABLE();
}
-size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
- uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index,
+ [[maybe_unused]] uint32_t reg_id) {
TODO_VIXL32(FATAL);
UNREACHABLE();
}
@@ -1908,6 +1897,7 @@ vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
}
namespace detail {
+
// Mark which intrinsics we don't have handcrafted code for.
template <Intrinsics T>
struct IsUnimplemented {
@@ -1922,15 +1912,13 @@ struct IsUnimplemented {
UNIMPLEMENTED_INTRINSIC_LIST_ARM(TRUE_OVERRIDE)
#undef TRUE_OVERRIDE
-#include "intrinsics_list.h"
static constexpr bool kIsIntrinsicUnimplemented[] = {
- false, // kNone
+ false, // kNone
#define IS_UNIMPLEMENTED(Intrinsic, ...) \
- IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
- INTRINSICS_LIST(IS_UNIMPLEMENTED)
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
#undef IS_UNIMPLEMENTED
};
-#undef INTRINSICS_LIST
} // namespace detail
@@ -2024,7 +2012,7 @@ void CodeGeneratorARMVIXL::FixJumpTables() {
#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> // NOLINT
-void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
+void CodeGeneratorARMVIXL::Finalize() {
FixJumpTables();
// Emit JIT baker read barrier slow paths.
@@ -2037,11 +2025,11 @@ void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
}
GetAssembler()->FinalizeCode();
- CodeGenerator::Finalize(allocator);
+ CodeGenerator::Finalize();
// Verify Baker read barrier linker patches.
if (kIsDebugBuild) {
- ArrayRef<const uint8_t> code = allocator->GetMemory();
+ ArrayRef<const uint8_t> code(GetCode());
for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
DCHECK(info.label.IsBound());
uint32_t literal_offset = info.label.GetLocation();
@@ -2188,11 +2176,16 @@ void LocationsBuilderARMVIXL::VisitMethodExitHook(HMethodExitHook* method_hook)
LocationSummary* locations = new (GetGraph()->GetAllocator())
LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
locations->SetInAt(0, parameter_visitor_.GetReturnLocation(method_hook->InputAt(0)->GetType()));
+ // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to
+ // compute the address to store the timestamp counter.
+ locations->AddRegisterTemps(3);
}
void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* instruction) {
- UseScratchRegisterScope temps(GetVIXLAssembler());
- vixl32::Register temp = temps.Acquire();
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::Register addr = RegisterFrom(locations->GetTemp(0));
+ vixl32::Register value = RegisterFrom(locations->GetTemp(1));
+ vixl32::Register tmp = RegisterFrom(locations->GetTemp(2));
SlowPathCodeARMVIXL* slow_path =
new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction);
@@ -2204,20 +2197,61 @@ void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction*
// if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
// disabled in debuggable runtime. The other bit is used when this method itself requires a
// deoptimization due to redefinition. So it is safe to just check for non-zero value here.
- GetAssembler()->LoadFromOffset(kLoadWord,
- temp,
- sp,
- codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
- __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
+ GetAssembler()->LoadFromOffset(
+ kLoadWord, value, sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
+ __ CompareAndBranchIfNonZero(value, slow_path->GetEntryLabel());
}
MemberOffset offset = instruction->IsMethodExitHook() ?
instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation());
- __ Mov(temp, address + offset.Int32Value());
- __ Ldrb(temp, MemOperand(temp, 0));
- __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
+ __ Mov(addr, address + offset.Int32Value());
+ __ Ldrb(value, MemOperand(addr, 0));
+ __ Cmp(value, instrumentation::Instrumentation::kFastTraceListeners);
+ // Check if there are any trace method entry / exit listeners. If no, continue.
+ __ B(lt, slow_path->GetExitLabel());
+ // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
+ // If yes, just take the slow path.
+ __ B(gt, slow_path->GetEntryLabel());
+
+ // Check if there is place in the buffer to store a new entry, if no, take slow path.
+ uint32_t trace_buffer_index_offset =
+ Thread::TraceBufferIndexOffset<kArmPointerSize>().Int32Value();
+ vixl32::Register index = value;
+ __ Ldr(index, MemOperand(tr, trace_buffer_index_offset));
+ __ Subs(index, index, kNumEntriesForWallClock);
+ __ B(lt, slow_path->GetEntryLabel());
+
+ // Update the index in the `Thread`.
+ __ Str(index, MemOperand(tr, trace_buffer_index_offset));
+ // Calculate the entry address in the buffer.
+ // addr = base_addr + sizeof(void*) * index
+ __ Ldr(addr, MemOperand(tr, Thread::TraceBufferPtrOffset<kArmPointerSize>().SizeValue()));
+ __ Add(addr, addr, Operand(index, LSL, TIMES_4));
+
+ // Record method pointer and trace action.
+ __ Ldr(tmp, MemOperand(sp, 0));
+ // Use last two bits to encode trace method action. For MethodEntry it is 0
+ // so no need to set the bits since they are 0 already.
+ if (instruction->IsMethodExitHook()) {
+ DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
+ __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
+ }
+ __ Str(tmp, MemOperand(addr, kMethodOffsetInBytes));
+
+ vixl32::Register tmp1 = index;
+ // See Architecture Reference Manual ARMv7-A and ARMv7-R edition section B4.1.34.
+ __ Mrrc(/* lower 32-bit */ tmp,
+ /* higher 32-bit */ tmp1,
+ /* coproc= */ 15,
+ /* opc1= */ 1,
+ /* crm= */ 14);
+ static_assert(kHighTimestampOffsetInBytes ==
+ kTimestampOffsetInBytes + static_cast<uint32_t>(kRuntimePointerSize));
+ __ Strd(tmp, tmp1, MemOperand(addr, kTimestampOffsetInBytes));
__ Bind(slow_path->GetExitLabel());
}
@@ -2228,7 +2262,11 @@ void InstructionCodeGeneratorARMVIXL::VisitMethodExitHook(HMethodExitHook* instr
}
void LocationsBuilderARMVIXL::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
- new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+ // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to
+ // compute the address to store the timestamp counter.
+ locations->AddRegisterTemps(3);
}
void InstructionCodeGeneratorARMVIXL::VisitMethodEntryHook(HMethodEntryHook* instruction) {
@@ -2262,14 +2300,15 @@ void CodeGeneratorARMVIXL::MaybeIncrementHotness(bool is_frame_entry) {
}
if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
- SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARMVIXL();
- AddSlowPath(slow_path);
ProfilingInfo* info = GetGraph()->GetProfilingInfo();
DCHECK(info != nullptr);
DCHECK(!HasEmptyFrame());
uint32_t address = reinterpret_cast32<uint32_t>(info);
UseScratchRegisterScope temps(GetVIXLAssembler());
vixl32::Register tmp = temps.Acquire();
+ SlowPathCodeARMVIXL* slow_path =
+ new (GetScopedAllocator()) CompileOptimizedSlowPathARMVIXL(/* profiling_info= */ lr);
+ AddSlowPath(slow_path);
__ Mov(lr, address);
__ Ldrh(tmp, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
__ Adds(tmp, tmp, -1);
@@ -2824,8 +2863,7 @@ void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
exit->SetLocations(nullptr);
}
-void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
-}
+void InstructionCodeGeneratorARMVIXL::VisitExit([[maybe_unused]] HExit* exit) {}
void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
vixl32::Label* true_target,
@@ -2963,6 +3001,11 @@ void LocationsBuilderARMVIXL::VisitIf(HIf* if_instr) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
+ if (GetGraph()->IsCompilingBaseline() &&
+ codegen_->GetCompilerOptions().ProfileBranches() &&
+ !Runtime::Current()->IsAotCompiler()) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
}
@@ -2973,6 +3016,36 @@ void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) {
nullptr : codegen_->GetLabelOf(true_successor);
vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
nullptr : codegen_->GetLabelOf(false_successor);
+ if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
+ if (GetGraph()->IsCompilingBaseline() &&
+ codegen_->GetCompilerOptions().ProfileBranches() &&
+ !Runtime::Current()->IsAotCompiler()) {
+ DCHECK(if_instr->InputAt(0)->IsCondition());
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
+ // Currently, not all If branches are profiled.
+ if (cache != nullptr) {
+ uint32_t address =
+ reinterpret_cast32<uint32_t>(cache) + BranchCache::FalseOffset().Int32Value();
+ static_assert(
+ BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
+ "Unexpected offsets for BranchCache");
+ vixl32::Label done;
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ vixl32::Register counter = RegisterFrom(if_instr->GetLocations()->GetTemp(0));
+ vixl32::Register condition = InputRegisterAt(if_instr, 0);
+ __ Mov(temp, address);
+ __ Ldrh(counter, MemOperand(temp, condition, LSL, 1));
+ __ Adds(counter, counter, 1);
+ __ Uxth(counter, counter);
+ __ CompareAndBranchIfZero(counter, &done);
+ __ Strh(counter, MemOperand(temp, condition, LSL, 1));
+ __ Bind(&done);
+ }
+ }
+ }
GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
}
@@ -3422,7 +3495,7 @@ void LocationsBuilderARMVIXL::VisitIntConstant(HIntConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorARMVIXL::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
// Will be generated at use site.
}
@@ -3432,7 +3505,7 @@ void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorARMVIXL::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
// Will be generated at use site.
}
@@ -3442,7 +3515,7 @@ void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorARMVIXL::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
// Will be generated at use site.
}
@@ -3453,7 +3526,7 @@ void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) {
}
void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(
- HFloatConstant* constant ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HFloatConstant* constant) {
// Will be generated at use site.
}
@@ -3464,7 +3537,7 @@ void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) {
}
void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(
- HDoubleConstant* constant ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HDoubleConstant* constant) {
// Will be generated at use site.
}
@@ -3473,7 +3546,7 @@ void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* construct
}
void InstructionCodeGeneratorARMVIXL::VisitConstructorFence(
- HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HConstructorFence* constructor_fence) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
}
@@ -3489,7 +3562,7 @@ void LocationsBuilderARMVIXL::VisitReturnVoid(HReturnVoid* ret) {
ret->SetLocations(nullptr);
}
-void InstructionCodeGeneratorARMVIXL::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
codegen_->GenerateFrameExit();
}
@@ -3612,26 +3685,27 @@ void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
void CodeGeneratorARMVIXL::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
vixl32::Register klass) {
DCHECK_EQ(r0.GetCode(), klass.GetCode());
- // We know the destination of an intrinsic, so no need to record inline
- // caches.
- if (!instruction->GetLocations()->Intrinsified() &&
- GetGraph()->IsCompilingBaseline() &&
- !Runtime::Current()->IsAotCompiler()) {
- DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
+ if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
ProfilingInfo* info = GetGraph()->GetProfilingInfo();
- DCHECK(info != nullptr);
- InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
- uint32_t address = reinterpret_cast32<uint32_t>(cache);
- vixl32::Label done;
- UseScratchRegisterScope temps(GetVIXLAssembler());
- temps.Exclude(ip);
- __ Mov(r4, address);
- __ Ldr(ip, MemOperand(r4, InlineCache::ClassesOffset().Int32Value()));
- // Fast path for a monomorphic cache.
- __ Cmp(klass, ip);
- __ B(eq, &done, /* is_far_target= */ false);
- InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
- __ Bind(&done);
+ InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(info, instruction->AsInvoke());
+ if (cache != nullptr) {
+ uint32_t address = reinterpret_cast32<uint32_t>(cache);
+ vixl32::Label done;
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ temps.Exclude(ip);
+ __ Mov(r4, address);
+ __ Ldr(ip, MemOperand(r4, InlineCache::ClassesOffset().Int32Value()));
+ // Fast path for a monomorphic cache.
+ __ Cmp(klass, ip);
+ __ B(eq, &done, /* is_far_target= */ false);
+ InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
+ __ Bind(&done);
+ } else {
+ // This is unexpected, but we don't guarantee stable compilation across
+ // JIT runs so just warn about it.
+ ScopedObjectAccess soa(Thread::Current());
+ LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
+ }
}
}
@@ -5617,7 +5691,7 @@ void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction)
}
void InstructionCodeGeneratorARMVIXL::VisitParameterValue(
- HParameterValue* instruction ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HParameterValue* instruction) {
// Nothing to do, the parameter is already at its location.
}
@@ -5628,7 +5702,7 @@ void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) {
}
void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod(
- HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HCurrentMethod* instruction) {
// Nothing to do, the method is already at its location.
}
@@ -5769,7 +5843,7 @@ void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
locations->SetOut(Location::Any());
}
-void InstructionCodeGeneratorARMVIXL::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitPhi([[maybe_unused]] HPhi* instruction) {
LOG(FATAL) << "Unreachable";
}
@@ -5893,10 +5967,7 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
LocationSummary* locations = instruction->GetLocations();
vixl32::Register base = InputRegisterAt(instruction, 0);
Location value = locations->InAt(1);
- std::optional<vixl::aarch32::Label> pred_is_null;
- bool is_predicated =
- instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
bool is_volatile = field_info.IsVolatile();
bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
DataType::Type field_type = field_info.GetFieldType();
@@ -5904,11 +5975,6 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
- if (is_predicated) {
- pred_is_null.emplace();
- __ CompareAndBranchIfZero(base, &*pred_is_null, /* is_far_target= */ false);
- }
-
if (is_volatile) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
}
@@ -6018,21 +6084,14 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
if (is_volatile) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
}
-
- if (is_predicated) {
- __ Bind(&*pred_is_null);
- }
}
void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
const FieldInfo& field_info) {
- DCHECK(instruction->IsInstanceFieldGet() ||
- instruction->IsStaticFieldGet() ||
- instruction->IsPredicatedInstanceFieldGet());
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
bool object_field_get_with_read_barrier =
- gUseReadBarrier && (field_info.GetFieldType() == DataType::Type::kReference);
- bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
+ (field_info.GetFieldType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
object_field_get_with_read_barrier
@@ -6042,7 +6101,7 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
// Input for object receiver.
- locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
+ locations->SetInAt(0, Location::RequiresRegister());
bool volatile_for_double = field_info.IsVolatile()
&& (field_info.GetFieldType() == DataType::Type::kFloat64)
@@ -6057,20 +6116,10 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
object_field_get_with_read_barrier;
if (DataType::IsFloatingPointType(instruction->GetType())) {
- if (is_predicated) {
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
- } else {
- locations->SetOut(Location::RequiresFpuRegister());
- }
+ locations->SetOut(Location::RequiresFpuRegister());
} else {
- if (is_predicated) {
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
- } else {
- locations->SetOut(Location::RequiresRegister(),
- (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
- }
+ locations->SetOut(Location::RequiresRegister(),
+ (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
}
if (volatile_for_double) {
// ARM encoding have some additional constraints for ldrexd/strexd:
@@ -6104,8 +6153,7 @@ Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* inpu
Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant,
Opcode opcode) {
DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
- if (constant->IsConstant() &&
- CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
+ if (constant->IsConstant() && CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
return Location::ConstantLocation(constant);
}
return Location::RequiresRegister();
@@ -6171,12 +6219,10 @@ bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst,
void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
const FieldInfo& field_info) {
- DCHECK(instruction->IsInstanceFieldGet() ||
- instruction->IsStaticFieldGet() ||
- instruction->IsPredicatedInstanceFieldGet());
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
LocationSummary* locations = instruction->GetLocations();
- uint32_t receiver_input = instruction->IsPredicatedInstanceFieldGet() ? 1 : 0;
+ uint32_t receiver_input = 0;
vixl32::Register base = InputRegisterAt(instruction, receiver_input);
Location out = locations->Out();
bool is_volatile = field_info.IsVolatile();
@@ -6202,7 +6248,7 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
case DataType::Type::kReference: {
// /* HeapReference<Object> */ out = *(base + offset)
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
// Note that a potential implicit null check is handled in this
// CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call.
@@ -6299,19 +6345,6 @@ void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instructi
HandleFieldGet(instruction, instruction->GetFieldInfo());
}
-void LocationsBuilderARMVIXL::VisitPredicatedInstanceFieldGet(
- HPredicatedInstanceFieldGet* instruction) {
- HandleFieldGet(instruction, instruction->GetFieldInfo());
-}
-
-void InstructionCodeGeneratorARMVIXL::VisitPredicatedInstanceFieldGet(
- HPredicatedInstanceFieldGet* instruction) {
- vixl::aarch32::Label finish;
- __ CompareAndBranchIfZero(InputRegisterAt(instruction, 1), &finish, false);
- HandleFieldGet(instruction, instruction->GetFieldInfo());
- __ Bind(&finish);
-}
-
void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
HandleFieldGet(instruction, instruction->GetFieldInfo());
}
@@ -6512,7 +6545,7 @@ void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(DataType::Type type,
void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
bool object_array_get_with_read_barrier =
- gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
object_array_get_with_read_barrier
@@ -6660,14 +6693,14 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
// The read barrier instrumentation of object ArrayGet
// instructions does not support the HIntermediateAddress
// instruction.
- DCHECK(!(has_intermediate_address && gUseReadBarrier));
+ DCHECK(!(has_intermediate_address && codegen_->EmitReadBarrier()));
static_assert(
sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// Note that a potential implicit null check is handled in this
// CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
@@ -7234,7 +7267,7 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
}
}
-void LocationsBuilderARMVIXL::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderARMVIXL::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
LOG(FATAL) << "Unreachable";
}
@@ -7591,7 +7624,7 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
load_kind == HLoadClass::LoadKind::kBssEntryPackage);
- const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage();
+ const bool requires_read_barrier = !cls->IsInBootImage() && codegen_->EmitReadBarrier();
LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
@@ -7604,12 +7637,14 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
locations->SetInAt(0, Location::RequiresRegister());
}
locations->SetOut(Location::RequiresRegister());
- if (load_kind == HLoadClass::LoadKind::kBssEntry) {
- if (!gUseReadBarrier || kUseBakerReadBarrier) {
+ if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ } else {
// Rely on the type resolution or initialization and marking to save everything we need.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
- } else {
- // For non-Baker read barrier we have a temp-clobbering call.
}
}
}
@@ -7631,9 +7666,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
Location out_loc = locations->Out();
vixl32::Register out = OutputRegister(cls);
- const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
- ? kWithoutReadBarrier
- : gCompilerReadBarrierOption;
+ const ReadBarrierOption read_barrier_option =
+ cls->IsInBootImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
bool generate_null_check = false;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -7840,7 +7874,7 @@ HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
}
void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
- LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
+ LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
HLoadString::LoadKind load_kind = load->GetLoadKind();
if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
@@ -7848,11 +7882,11 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
} else {
locations->SetOut(Location::RequiresRegister());
if (load_kind == HLoadString::LoadKind::kBssEntry) {
- if (!gUseReadBarrier || kUseBakerReadBarrier) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ } else {
// Rely on the pResolveString and marking to save everything we need, including temps.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
- } else {
- // For non-Baker read barrier we have a temp-clobbering call.
}
}
}
@@ -7887,7 +7921,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
codegen_->EmitMovwMovtPlaceholder(labels, out);
// All aligned loads are implicitly atomic consume operations on ARM.
codegen_->GenerateGcRootFieldLoad(
- load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption);
+ load, out_loc, out, /*offset=*/0, codegen_->GetCompilerReadBarrierOption());
LoadStringSlowPathARMVIXL* slow_path =
new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load);
codegen_->AddSlowPath(slow_path);
@@ -7908,14 +7942,13 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
load->GetString()));
// /* GcRoot<mirror::String> */ out = *out
codegen_->GenerateGcRootFieldLoad(
- load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption);
+ load, out_loc, out, /*offset=*/0, codegen_->GetCompilerReadBarrierOption());
return;
}
default:
break;
}
- // TODO: Re-add the compiler code to do string dex cache lookup again.
DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
InvokeRuntimeCallingConventionARMVIXL calling_convention;
__ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
@@ -7944,7 +7977,7 @@ void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) {
new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
}
-void InstructionCodeGeneratorARMVIXL::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitClearException([[maybe_unused]] HClearException* clear) {
UseScratchRegisterScope temps(GetVIXLAssembler());
vixl32::Register temp = temps.Acquire();
__ Mov(temp, 0);
@@ -7964,8 +7997,8 @@ void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) {
}
// Temp is used for read barrier.
-static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
- if (gUseReadBarrier &&
+static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
+ if (emit_read_barrier &&
(kUseBakerReadBarrier ||
type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
@@ -7978,11 +8011,11 @@ static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
// Interface case has 3 temps, one for holding the number of interfaces, one for the current
// interface pointer, one for loading the current interface.
// The other checks have one temp for loading the object's class.
-static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
return 3;
}
- return 1 + NumberOfInstanceOfTemps(type_check_kind);
+ return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
}
void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
@@ -7994,7 +8027,7 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck: {
- bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
+ bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
break;
@@ -8024,7 +8057,8 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
// The "out" register is used as a temporary, so it overlaps with the inputs.
// Note that TypeCheckSlowPathARM uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
- locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
+ locations->AddRegisterTemps(
+ NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
}
void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
@@ -8037,7 +8071,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
: InputRegisterAt(instruction, 1);
Location out_loc = locations->Out();
vixl32::Register out = OutputRegister(instruction);
- const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+ const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
DCHECK_LE(num_temps, 1u);
Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -8059,7 +8093,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
ReadBarrierOption read_barrier_option =
- CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
@@ -8094,7 +8128,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
case TypeCheckKind::kAbstractClassCheck: {
ReadBarrierOption read_barrier_option =
- CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
@@ -8122,7 +8156,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
case TypeCheckKind::kClassHierarchyCheck: {
ReadBarrierOption read_barrier_option =
- CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
@@ -8178,7 +8212,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
case TypeCheckKind::kArrayObjectCheck: {
ReadBarrierOption read_barrier_option =
- CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
@@ -8307,7 +8341,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
- LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
+ LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
@@ -8318,7 +8352,8 @@ void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
} else {
locations->SetInAt(1, Location::RequiresRegister());
}
- locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
+ locations->AddRegisterTemps(
+ NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
}
void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
@@ -8331,7 +8366,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
: InputRegisterAt(instruction, 1);
Location temp_loc = locations->GetTemp(0);
vixl32::Register temp = RegisterFrom(temp_loc);
- const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+ const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
DCHECK_LE(num_temps, 3u);
Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
@@ -8344,7 +8379,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
const uint32_t object_array_data_offset =
mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
- bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
+ bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
SlowPathCodeARMVIXL* type_check_slow_path =
new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
instruction, is_type_check_slow_path_fatal);
@@ -8490,12 +8525,11 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
kWithoutReadBarrier);
// /* HeapReference<Class> */ temp = temp->iftable_
- GenerateReferenceLoadTwoRegisters(instruction,
- temp_loc,
- temp_loc,
- iftable_offset,
- maybe_temp2_loc,
- kWithoutReadBarrier);
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ iftable_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// Iftable is never null.
__ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset));
// Loop through the iftable and check if any class matches.
@@ -8900,7 +8934,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister(
ReadBarrierOption read_barrier_option) {
vixl32::Register out_reg = RegisterFrom(out);
if (read_barrier_option == kWithReadBarrier) {
- CHECK(gUseReadBarrier);
+ DCHECK(codegen_->EmitReadBarrier());
DCHECK(maybe_temp.IsRegister()) << maybe_temp;
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
@@ -8935,7 +8969,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
vixl32::Register out_reg = RegisterFrom(out);
vixl32::Register obj_reg = RegisterFrom(obj);
if (read_barrier_option == kWithReadBarrier) {
- CHECK(gUseReadBarrier);
+ DCHECK(codegen_->EmitReadBarrier());
if (kUseBakerReadBarrier) {
DCHECK(maybe_temp.IsRegister()) << maybe_temp;
// Load with fast path based Baker's read barrier.
@@ -8964,7 +8998,7 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
ReadBarrierOption read_barrier_option) {
vixl32::Register root_reg = RegisterFrom(root);
if (read_barrier_option == kWithReadBarrier) {
- DCHECK(gUseReadBarrier);
+ DCHECK(EmitReadBarrier());
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
@@ -9025,11 +9059,10 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
MaybeGenerateMarkingRegisterCheck(/* code= */ 20);
}
-void CodeGeneratorARMVIXL::GenerateIntrinsicCasMoveWithBakerReadBarrier(
+void CodeGeneratorARMVIXL::GenerateIntrinsicMoveWithBakerReadBarrier(
vixl::aarch32::Register marked_old_value,
vixl::aarch32::Register old_value) {
- DCHECK(gUseReadBarrier);
- DCHECK(kUseBakerReadBarrier);
+ DCHECK(EmitBakerReadBarrier());
// Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
// For low registers, we can reuse the GC root narrow entrypoint, for high registers
@@ -9062,8 +9095,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i
vixl32::Register obj,
const vixl32::MemOperand& src,
bool needs_null_check) {
- DCHECK(gUseReadBarrier);
- DCHECK(kUseBakerReadBarrier);
+ DCHECK(EmitBakerReadBarrier());
// Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
// Marking Register) to decide whether we need to enter the slow
@@ -9155,8 +9187,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref,
Location index,
Location temp,
bool needs_null_check) {
- DCHECK(gUseReadBarrier);
- DCHECK(kUseBakerReadBarrier);
+ DCHECK(EmitBakerReadBarrier());
static_assert(
sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
@@ -9221,7 +9252,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref,
void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
// The following condition is a compile-time one, so it does not have a run-time cost.
- if (kIsDebugBuild && gUseReadBarrier && kUseBakerReadBarrier) {
+ if (kIsDebugBuild && EmitBakerReadBarrier()) {
// The following condition is a run-time one; it is executed after the
// previous compile-time test, to avoid penalizing non-debug builds.
if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
@@ -9251,7 +9282,7 @@ void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- DCHECK(gUseReadBarrier);
+ DCHECK(EmitReadBarrier());
// Insert a slow path based read barrier *after* the reference load.
//
@@ -9277,7 +9308,7 @@ void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instructio
Location obj,
uint32_t offset,
Location index) {
- if (gUseReadBarrier) {
+ if (EmitReadBarrier()) {
// Baker's read barriers shall be handled by the fast path
// (CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier).
DCHECK(!kUseBakerReadBarrier);
@@ -9292,7 +9323,7 @@ void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instructio
void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruction,
Location out,
Location root) {
- DCHECK(gUseReadBarrier);
+ DCHECK(EmitReadBarrier());
// Insert a slow path based read barrier *after* the GC root load.
//
@@ -9667,7 +9698,7 @@ void CodeGeneratorARMVIXL::LoadBootImageAddress(vixl32::Register reg,
void CodeGeneratorARMVIXL::LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg,
TypeReference target_type) {
- // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
+ // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
PcRelativePatchInfo* labels =
NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex());
@@ -9828,7 +9859,7 @@ void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch,
assembler.FinalizeCode();
code->resize(assembler.CodeSize());
MemoryRegion code_region(code->data(), code->size());
- assembler.FinalizeInstructions(code_region);
+ assembler.CopyInstructions(code_region);
}
VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
@@ -9867,12 +9898,12 @@ void InstructionCodeGeneratorARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulat
}
}
-void LocationsBuilderARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
}
-void InstructionCodeGeneratorARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index f5abe6951a..00e0bfa399 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -162,22 +162,37 @@ using VIXLUInt32Literal = vixl::aarch32::Literal<uint32_t>;
/* 1.8 */ \
V(MathFmaDouble) \
V(MathFmaFloat) \
- V(UnsafeGetAndAddInt) \
- V(UnsafeGetAndAddLong) \
- V(UnsafeGetAndSetInt) \
- V(UnsafeGetAndSetLong) \
- V(UnsafeGetAndSetObject) \
V(MethodHandleInvokeExact) \
V(MethodHandleInvoke) \
/* OpenJDK 11 */ \
V(JdkUnsafeCASLong) /* High register pressure */ \
- V(JdkUnsafeGetAndAddInt) \
- V(JdkUnsafeGetAndAddLong) \
- V(JdkUnsafeGetAndSetInt) \
- V(JdkUnsafeGetAndSetLong) \
- V(JdkUnsafeGetAndSetObject) \
V(JdkUnsafeCompareAndSetLong)
+ALWAYS_INLINE inline StoreOperandType GetStoreOperandType(DataType::Type type) {
+ switch (type) {
+ case DataType::Type::kReference:
+ return kStoreWord;
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ return kStoreByte;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ return kStoreHalfword;
+ case DataType::Type::kInt32:
+ return kStoreWord;
+ case DataType::Type::kInt64:
+ return kStoreWordPair;
+ case DataType::Type::kFloat32:
+ return kStoreSWord;
+ case DataType::Type::kFloat64:
+ return kStoreDWord;
+ default:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
class JumpTableARMVIXL : public DeletableArenaObject<kArenaAllocSwitchTable> {
public:
explicit JumpTableARMVIXL(HPackedSwitch* switch_instr)
@@ -620,7 +635,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
block_labels_.resize(GetGraph()->GetBlocks().size());
}
- void Finalize(CodeAllocator* allocator) override;
+ void Finalize() override;
bool NeedsTwoRegisters(DataType::Type type) const override {
return type == DataType::Type::kFloat64 || type == DataType::Type::kInt64;
@@ -725,9 +740,9 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
vixl::aarch32::Register obj,
uint32_t offset,
ReadBarrierOption read_barrier_option);
- // Generate MOV for an intrinsic CAS to mark the old value with Baker read barrier.
- void GenerateIntrinsicCasMoveWithBakerReadBarrier(vixl::aarch32::Register marked_old_value,
- vixl::aarch32::Register old_value);
+ // Generate MOV for an intrinsic to mark the old value with Baker read barrier.
+ void GenerateIntrinsicMoveWithBakerReadBarrier(vixl::aarch32::Register marked_old_value,
+ vixl::aarch32::Register old_value);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
// Overload suitable for Unsafe.getObject/-Volatile() intrinsic.
diff --git a/compiler/optimizing/code_generator_riscv64.cc b/compiler/optimizing/code_generator_riscv64.cc
new file mode 100644
index 0000000000..182c1d4d05
--- /dev/null
+++ b/compiler/optimizing/code_generator_riscv64.cc
@@ -0,0 +1,6883 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_riscv64.h"
+
+#include "android-base/logging.h"
+#include "android-base/macros.h"
+#include "arch/riscv64/jni_frame_riscv64.h"
+#include "arch/riscv64/registers_riscv64.h"
+#include "base/arena_containers.h"
+#include "base/macros.h"
+#include "class_root-inl.h"
+#include "code_generator_utils.h"
+#include "dwarf/register.h"
+#include "gc/heap.h"
+#include "gc/space/image_space.h"
+#include "heap_poisoning.h"
+#include "intrinsics_list.h"
+#include "intrinsics_riscv64.h"
+#include "jit/profiling_info.h"
+#include "linker/linker_patch.h"
+#include "mirror/class-inl.h"
+#include "optimizing/nodes.h"
+#include "optimizing/profiling_info_builder.h"
+#include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
+#include "stack_map_stream.h"
+#include "trace.h"
+#include "utils/label.h"
+#include "utils/riscv64/assembler_riscv64.h"
+#include "utils/stack_checks.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+// Placeholder values embedded in instructions, patched at link time.
+constexpr uint32_t kLinkTimeOffsetPlaceholderHigh = 0x12345;
+constexpr uint32_t kLinkTimeOffsetPlaceholderLow = 0x678;
+
+// Compare-and-jump packed switch generates approx. 3 + 1.5 * N 32-bit
+// instructions for N cases.
+// Table-based packed switch generates approx. 10 32-bit instructions
+// and N 32-bit data words for N cases.
+// We switch to the table-based method starting with 6 entries.
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 6;
+
+static constexpr XRegister kCoreCalleeSaves[] = {
+ // S1(TR) is excluded as the ART thread register.
+ S0, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, RA
+};
+
+static constexpr FRegister kFpuCalleeSaves[] = {
+ FS0, FS1, FS2, FS3, FS4, FS5, FS6, FS7, FS8, FS9, FS10, FS11
+};
+
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kRiscv64PointerSize, x).Int32Value()
+
+Location RegisterOrZeroBitPatternLocation(HInstruction* instruction) {
+ DCHECK(!DataType::IsFloatingPointType(instruction->GetType()));
+ return IsZeroBitPattern(instruction)
+ ? Location::ConstantLocation(instruction)
+ : Location::RequiresRegister();
+}
+
+Location FpuRegisterOrZeroBitPatternLocation(HInstruction* instruction) {
+ DCHECK(DataType::IsFloatingPointType(instruction->GetType()));
+ return IsZeroBitPattern(instruction)
+ ? Location::ConstantLocation(instruction)
+ : Location::RequiresFpuRegister();
+}
+
+XRegister InputXRegisterOrZero(Location location) {
+ if (location.IsConstant()) {
+ DCHECK(location.GetConstant()->IsZeroBitPattern());
+ return Zero;
+ } else {
+ return location.AsRegister<XRegister>();
+ }
+}
+
+Location ValueLocationForStore(HInstruction* value) {
+ if (IsZeroBitPattern(value)) {
+ return Location::ConstantLocation(value);
+ } else if (DataType::IsFloatingPointType(value->GetType())) {
+ return Location::RequiresFpuRegister();
+ } else {
+ return Location::RequiresRegister();
+ }
+}
+
+Location Riscv64ReturnLocation(DataType::Type return_type) {
+ switch (return_type) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kUint32:
+ case DataType::Type::kInt32:
+ case DataType::Type::kReference:
+ case DataType::Type::kUint64:
+ case DataType::Type::kInt64:
+ return Location::RegisterLocation(A0);
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ return Location::FpuRegisterLocation(FA0);
+
+ case DataType::Type::kVoid:
+ return Location::NoLocation();
+ }
+ UNREACHABLE();
+}
+
+static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ DCHECK_EQ(
+ calling_convention.GetRegisterAt(0),
+ calling_convention.GetReturnLocation(DataType::Type::kReference).AsRegister<XRegister>());
+ return caller_saves;
+}
+
+template <ClassStatus kStatus>
+static constexpr int64_t ShiftedSignExtendedClassStatusValue() {
+ // This is used only for status values that have the highest bit set.
+ static_assert(CLZ(enum_cast<uint32_t>(kStatus)) == status_lsb_position);
+ constexpr uint32_t kShiftedStatusValue = enum_cast<uint32_t>(kStatus) << status_lsb_position;
+ static_assert(kShiftedStatusValue >= 0x80000000u);
+ return static_cast<int64_t>(kShiftedStatusValue) - (INT64_C(1) << 32);
+}
+
+// Split a 64-bit address used by JIT to the nearest 4KiB-aligned base address and a 12-bit
+// signed offset. It is usually cheaper to materialize the aligned address than the full address.
+std::pair<uint64_t, int32_t> SplitJitAddress(uint64_t address) {
+ uint64_t bits0_11 = address & UINT64_C(0xfff);
+ uint64_t bit11 = address & UINT64_C(0x800);
+ // Round the address to nearest 4KiB address because the `imm12` has range [-0x800, 0x800).
+ uint64_t base_address = (address & ~UINT64_C(0xfff)) + (bit11 << 1);
+ int32_t imm12 = dchecked_integral_cast<int32_t>(bits0_11) -
+ dchecked_integral_cast<int32_t>(bit11 << 1);
+ return {base_address, imm12};
+}
+
+int32_t ReadBarrierMarkEntrypointOffset(Location ref) {
+ DCHECK(ref.IsRegister());
+ int reg = ref.reg();
+ DCHECK(T0 <= reg && reg <= T6 && reg != TR) << reg;
+ // Note: Entrypoints for registers X30 (T5) and X31 (T6) are stored in entries
+ // for X0 (Zero) and X1 (RA) because these are not valid registers for marking
+ // and we currently have slots only up to register 29.
+ int entry_point_number = (reg >= 30) ? reg - 30 : reg;
+ return Thread::ReadBarrierMarkEntryPointsOffset<kRiscv64PointerSize>(entry_point_number);
+}
+
+Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) {
+ return Riscv64ReturnLocation(return_type);
+}
+
+Location InvokeDexCallingConventionVisitorRISCV64::GetReturnLocation(DataType::Type type) const {
+ return Riscv64ReturnLocation(type);
+}
+
+Location InvokeDexCallingConventionVisitorRISCV64::GetMethodLocation() const {
+ return Location::RegisterLocation(kArtMethodRegister);
+}
+
+Location InvokeDexCallingConventionVisitorRISCV64::GetNextLocation(DataType::Type type) {
+ Location next_location;
+ if (type == DataType::Type::kVoid) {
+ LOG(FATAL) << "Unexpected parameter type " << type;
+ }
+
+ // Note: Unlike the RISC-V C/C++ calling convention, managed ABI does not use
+ // GPRs to pass FP args when we run out of FPRs.
+ if (DataType::IsFloatingPointType(type) &&
+ float_index_ < calling_convention.GetNumberOfFpuRegisters()) {
+ next_location =
+ Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(float_index_++));
+ } else if (!DataType::IsFloatingPointType(type) &&
+ (gp_index_ < calling_convention.GetNumberOfRegisters())) {
+ next_location = Location::RegisterLocation(calling_convention.GetRegisterAt(gp_index_++));
+ } else {
+ size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
+ next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) :
+ Location::StackSlot(stack_offset);
+ }
+
+ // Space on the stack is reserved for all arguments.
+ stack_index_ += DataType::Is64BitType(type) ? 2 : 1;
+
+ return next_location;
+}
+
+Location CriticalNativeCallingConventionVisitorRiscv64::GetNextLocation(DataType::Type type) {
+ DCHECK_NE(type, DataType::Type::kReference);
+
+ Location location = Location::NoLocation();
+ if (DataType::IsFloatingPointType(type)) {
+ if (fpr_index_ < kParameterFpuRegistersLength) {
+ location = Location::FpuRegisterLocation(kParameterFpuRegisters[fpr_index_]);
+ ++fpr_index_;
+ } else {
+ // Native ABI allows passing excessive FP args in GPRs. This is facilitated by
+ // inserting fake conversion intrinsic calls (`Double.doubleToRawLongBits()`
+ // or `Float.floatToRawIntBits()`) by `CriticalNativeAbiFixupRiscv64`.
+ // Remaining FP args shall be passed on the stack.
+ CHECK_EQ(gpr_index_, kRuntimeParameterCoreRegistersLength);
+ }
+ } else {
+ // Native ABI uses the same core registers as a runtime call.
+ if (gpr_index_ < kRuntimeParameterCoreRegistersLength) {
+ location = Location::RegisterLocation(kRuntimeParameterCoreRegisters[gpr_index_]);
+ ++gpr_index_;
+ }
+ }
+ if (location.IsInvalid()) {
+ // Only a `float` gets a single slot. Integral args need to be sign-extended to 64 bits.
+ if (type == DataType::Type::kFloat32) {
+ location = Location::StackSlot(stack_offset_);
+ } else {
+ location = Location::DoubleStackSlot(stack_offset_);
+ }
+ stack_offset_ += kFramePointerSize;
+
+ if (for_register_allocation_) {
+ location = Location::Any();
+ }
+ }
+ return location;
+}
+
+Location CriticalNativeCallingConventionVisitorRiscv64::GetReturnLocation(
+ DataType::Type type) const {
+ // The result is returned the same way in native ABI and managed ABI. No result conversion is
+ // needed, see comments in `Riscv64JniCallingConvention::RequiresSmallResultTypeExtension()`.
+ InvokeDexCallingConventionVisitorRISCV64 dex_calling_convention;
+ return dex_calling_convention.GetReturnLocation(type);
+}
+
+Location CriticalNativeCallingConventionVisitorRiscv64::GetMethodLocation() const {
+ // Pass the method in the hidden argument T0.
+ return Location::RegisterLocation(T0);
+}
+
+#define __ down_cast<CodeGeneratorRISCV64*>(codegen)->GetAssembler()-> // NOLINT
+
+void LocationsBuilderRISCV64::HandleInvoke(HInvoke* instruction) {
+ InvokeDexCallingConventionVisitorRISCV64 calling_convention_visitor;
+ CodeGenerator::CreateCommonInvokeLocationSummary(instruction, &calling_convention_visitor);
+}
+
+class CompileOptimizedSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ CompileOptimizedSlowPathRISCV64(XRegister base, int32_t imm12)
+ : SlowPathCodeRISCV64(/*instruction=*/ nullptr),
+ base_(base),
+ imm12_(imm12) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ uint32_t entrypoint_offset =
+ GetThreadOffset<kRiscv64PointerSize>(kQuickCompileOptimized).Int32Value();
+ __ Bind(GetEntryLabel());
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ riscv64::ScratchRegisterScope srs(riscv64_codegen->GetAssembler());
+ XRegister counter = srs.AllocateXRegister();
+ __ LoadConst32(counter, ProfilingInfo::GetOptimizeThreshold());
+ __ Sh(counter, base_, imm12_);
+ __ Loadd(RA, TR, entrypoint_offset);
+ // Note: we don't record the call here (and therefore don't generate a stack
+ // map), as the entrypoint should never be suspended.
+ __ Jalr(RA);
+ __ J(GetExitLabel());
+ }
+
+ const char* GetDescription() const override { return "CompileOptimizedSlowPath"; }
+
+ private:
+ XRegister base_;
+ const int32_t imm12_;
+
+ DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathRISCV64);
+};
+
+class SuspendCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ SuspendCheckSlowPathRISCV64(HSuspendCheck* instruction, HBasicBlock* successor)
+ : SlowPathCodeRISCV64(instruction), successor_(successor) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ LocationSummary* locations = instruction_->GetLocations();
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations); // Only saves live vector registers for SIMD.
+ riscv64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+ RestoreLiveRegisters(codegen, locations); // Only restores live vector registers for SIMD.
+ if (successor_ == nullptr) {
+ __ J(GetReturnLabel());
+ } else {
+ __ J(riscv64_codegen->GetLabelOf(successor_));
+ }
+ }
+
+ Riscv64Label* GetReturnLabel() {
+ DCHECK(successor_ == nullptr);
+ return &return_label_;
+ }
+
+ const char* GetDescription() const override { return "SuspendCheckSlowPathRISCV64"; }
+
+ HBasicBlock* GetSuccessor() const { return successor_; }
+
+ private:
+ // If not null, the block to branch to after the suspend check.
+ HBasicBlock* const successor_;
+
+ // If `successor_` is null, the label to branch to after the suspend check.
+ Riscv64Label return_label_;
+
+ DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathRISCV64);
+};
+
+class NullCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ explicit NullCheckSlowPathRISCV64(HNullCheck* instr) : SlowPathCodeRISCV64(instr) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ __ Bind(GetEntryLabel());
+ if (instruction_->CanThrowIntoCatchBlock()) {
+ // Live registers will be restored in the catch block if caught.
+ SaveLiveRegisters(codegen, instruction_->GetLocations());
+ }
+ riscv64_codegen->InvokeRuntime(
+ kQuickThrowNullPointer, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
+ }
+
+ bool IsFatal() const override { return true; }
+
+ const char* GetDescription() const override { return "NullCheckSlowPathRISCV64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathRISCV64);
+};
+
+class BoundsCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ explicit BoundsCheckSlowPathRISCV64(HBoundsCheck* instruction)
+ : SlowPathCodeRISCV64(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ LocationSummary* locations = instruction_->GetLocations();
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ __ Bind(GetEntryLabel());
+ if (instruction_->CanThrowIntoCatchBlock()) {
+ // Live registers will be restored in the catch block if caught.
+ SaveLiveRegisters(codegen, instruction_->GetLocations());
+ }
+ // We're moving two locations to locations that could overlap, so we need a parallel
+ // move resolver.
+ InvokeRuntimeCallingConvention calling_convention;
+ codegen->EmitParallelMoves(locations->InAt(0),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ DataType::Type::kInt32,
+ locations->InAt(1),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ DataType::Type::kInt32);
+ QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() ?
+ kQuickThrowStringBounds :
+ kQuickThrowArrayBounds;
+ riscv64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
+ CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
+ }
+
+ bool IsFatal() const override { return true; }
+
+ const char* GetDescription() const override { return "BoundsCheckSlowPathRISCV64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathRISCV64);
+};
+
+class LoadClassSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ LoadClassSlowPathRISCV64(HLoadClass* cls, HInstruction* at) : SlowPathCodeRISCV64(at), cls_(cls) {
+ DCHECK(at->IsLoadClass() || at->IsClinitCheck());
+ DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ LocationSummary* locations = instruction_->GetLocations();
+ Location out = locations->Out();
+ const uint32_t dex_pc = instruction_->GetDexPc();
+ bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
+ bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
+
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ if (must_resolve_type) {
+ DCHECK(IsSameDexFile(cls_->GetDexFile(), riscv64_codegen->GetGraph()->GetDexFile()) ||
+ riscv64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
+ ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
+ &cls_->GetDexFile()));
+ dex::TypeIndex type_index = cls_->GetTypeIndex();
+ __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_);
+ if (cls_->NeedsAccessCheck()) {
+ CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
+ riscv64_codegen->InvokeRuntime(
+ kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
+ } else {
+ CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
+ riscv64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
+ }
+ // If we also must_do_clinit, the resolved type is now in the correct register.
+ } else {
+ DCHECK(must_do_clinit);
+ Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
+ riscv64_codegen->MoveLocation(
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source, cls_->GetType());
+ }
+ if (must_do_clinit) {
+ riscv64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
+ }
+
+ // Move the class to the desired location.
+ if (out.IsValid()) {
+ DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+ DataType::Type type = DataType::Type::kReference;
+ DCHECK_EQ(type, instruction_->GetType());
+ riscv64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
+ }
+ RestoreLiveRegisters(codegen, locations);
+
+ __ J(GetExitLabel());
+ }
+
+ const char* GetDescription() const override { return "LoadClassSlowPathRISCV64"; }
+
+ private:
+ // The class this slow path will load.
+ HLoadClass* const cls_;
+
+ DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathRISCV64);
+};
+
+class DeoptimizationSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ explicit DeoptimizationSlowPathRISCV64(HDeoptimize* instruction)
+ : SlowPathCodeRISCV64(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ __ Bind(GetEntryLabel());
+ LocationSummary* locations = instruction_->GetLocations();
+ SaveLiveRegisters(codegen, locations);
+ InvokeRuntimeCallingConvention calling_convention;
+ __ LoadConst32(calling_convention.GetRegisterAt(0),
+ static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
+ riscv64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
+ }
+
+ const char* GetDescription() const override { return "DeoptimizationSlowPathRISCV64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathRISCV64);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ ReadBarrierForRootSlowPathRISCV64(HInstruction* instruction, Location out, Location root)
+ : SlowPathCodeRISCV64(instruction), out_(out), root_(root) {
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(codegen->EmitReadBarrier());
+ LocationSummary* locations = instruction_->GetLocations();
+ DataType::Type type = DataType::Type::kReference;
+ XRegister reg_out = out_.AsRegister<XRegister>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier for GC root slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ riscv64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ root_,
+ DataType::Type::kReference);
+ riscv64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+ riscv64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+ RestoreLiveRegisters(codegen, locations);
+ __ J(GetExitLabel());
+ }
+
+ const char* GetDescription() const override { return "ReadBarrierForRootSlowPathRISCV64"; }
+
+ private:
+ const Location out_;
+ const Location root_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathRISCV64);
+};
+
+class MethodEntryExitHooksSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ explicit MethodEntryExitHooksSlowPathRISCV64(HInstruction* instruction)
+ : SlowPathCodeRISCV64(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ LocationSummary* locations = instruction_->GetLocations();
+ QuickEntrypointEnum entry_point =
+ (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+ if (instruction_->IsMethodExitHook()) {
+ __ Li(A4, riscv64_codegen->GetFrameSize());
+ }
+ riscv64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
+ RestoreLiveRegisters(codegen, locations);
+ __ J(GetExitLabel());
+ }
+
+ const char* GetDescription() const override {
+ return "MethodEntryExitHooksSlowPathRISCV";
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathRISCV64);
+};
+
+class ArraySetSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ explicit ArraySetSlowPathRISCV64(HInstruction* instruction) : SlowPathCodeRISCV64(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ LocationSummary* locations = instruction_->GetLocations();
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
+ parallel_move.AddMove(
+ locations->InAt(0),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ DataType::Type::kReference,
+ nullptr);
+ parallel_move.AddMove(
+ locations->InAt(1),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ DataType::Type::kInt32,
+ nullptr);
+ parallel_move.AddMove(
+ locations->InAt(2),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+ DataType::Type::kReference,
+ nullptr);
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ riscv64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+ RestoreLiveRegisters(codegen, locations);
+ __ J(GetExitLabel());
+ }
+
+ const char* GetDescription() const override { return "ArraySetSlowPathRISCV64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathRISCV64);
+};
+
+class TypeCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ explicit TypeCheckSlowPathRISCV64(HInstruction* instruction, bool is_fatal)
+ : SlowPathCodeRISCV64(instruction), is_fatal_(is_fatal) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ LocationSummary* locations = instruction_->GetLocations();
+
+ uint32_t dex_pc = instruction_->GetDexPc();
+ DCHECK(instruction_->IsCheckCast()
+ || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+
+ __ Bind(GetEntryLabel());
+ if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
+ SaveLiveRegisters(codegen, locations);
+ }
+
+ // We're moving two locations to locations that could overlap, so we need a parallel
+ // move resolver.
+ InvokeRuntimeCallingConvention calling_convention;
+ codegen->EmitParallelMoves(locations->InAt(0),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ DataType::Type::kReference,
+ locations->InAt(1),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ DataType::Type::kReference);
+ if (instruction_->IsInstanceOf()) {
+ riscv64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
+ DataType::Type ret_type = instruction_->GetType();
+ Location ret_loc = calling_convention.GetReturnLocation(ret_type);
+ riscv64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
+ } else {
+ DCHECK(instruction_->IsCheckCast());
+ riscv64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
+ }
+
+ if (!is_fatal_) {
+ RestoreLiveRegisters(codegen, locations);
+ __ J(GetExitLabel());
+ }
+ }
+
+ const char* GetDescription() const override { return "TypeCheckSlowPathRISCV64"; }
+
+ bool IsFatal() const override { return is_fatal_; }
+
+ private:
+ const bool is_fatal_;
+
+ DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathRISCV64);
+};
+
+class DivZeroCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ explicit DivZeroCheckSlowPathRISCV64(HDivZeroCheck* instruction)
+ : SlowPathCodeRISCV64(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ __ Bind(GetEntryLabel());
+ riscv64_codegen->InvokeRuntime(
+ kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
+ }
+
+ bool IsFatal() const override { return true; }
+
+ const char* GetDescription() const override { return "DivZeroCheckSlowPathRISCV64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathRISCV64);
+};
+
+class ReadBarrierMarkSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ ReadBarrierMarkSlowPathRISCV64(HInstruction* instruction, Location ref, Location entrypoint)
+ : SlowPathCodeRISCV64(instruction), ref_(ref), entrypoint_(entrypoint) {
+ DCHECK(entrypoint.IsRegister());
+ }
+
+ const char* GetDescription() const override { return "ReadBarrierMarkSlowPathRISCV64"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(codegen->EmitReadBarrier());
+ LocationSummary* locations = instruction_->GetLocations();
+ XRegister ref_reg = ref_.AsRegister<XRegister>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsArraySet() ||
+ instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast() ||
+ (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ DCHECK(ref_reg >= T0 && ref_reg != TR);
+
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in A0 and V0 respectively):
+ //
+ // A0 <- ref
+ // V0 <- ReadBarrierMark(A0)
+ // ref <- V0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ riscv64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ DCHECK_NE(entrypoint_.AsRegister<XRegister>(), TMP); // A taken branch can clobber `TMP`.
+ __ Jalr(entrypoint_.AsRegister<XRegister>()); // Clobbers `RA` (used as the `entrypoint_`).
+ __ J(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+
+ // The location of the already loaded entrypoint.
+ const Location entrypoint_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathRISCV64);
+};
+
+class LoadStringSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ explicit LoadStringSlowPathRISCV64(HLoadString* instruction)
+ : SlowPathCodeRISCV64(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(instruction_->IsLoadString());
+ DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+ const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ InvokeRuntimeCallingConvention calling_convention;
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ __ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_);
+ riscv64_codegen->InvokeRuntime(
+ kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+
+ DataType::Type type = DataType::Type::kReference;
+ DCHECK_EQ(type, instruction_->GetType());
+ riscv64_codegen->MoveLocation(
+ locations->Out(), calling_convention.GetReturnLocation(type), type);
+ RestoreLiveRegisters(codegen, locations);
+
+ __ J(GetExitLabel());
+ }
+
+ const char* GetDescription() const override { return "LoadStringSlowPathRISCV64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathRISCV64);
+};
+
+#undef __
+#define __ down_cast<Riscv64Assembler*>(GetAssembler())-> // NOLINT
+
+template <typename Reg,
+ void (Riscv64Assembler::*opS)(Reg, FRegister, FRegister),
+ void (Riscv64Assembler::*opD)(Reg, FRegister, FRegister)>
+inline void InstructionCodeGeneratorRISCV64::FpBinOp(
+ Reg rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ Riscv64Assembler* assembler = down_cast<CodeGeneratorRISCV64*>(codegen_)->GetAssembler();
+ if (type == DataType::Type::kFloat32) {
+ (assembler->*opS)(rd, rs1, rs2);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat64);
+ (assembler->*opD)(rd, rs1, rs2);
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::FAdd(
+ FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<FRegister, &Riscv64Assembler::FAddS, &Riscv64Assembler::FAddD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FSub(
+ FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<FRegister, &Riscv64Assembler::FSubS, &Riscv64Assembler::FSubD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FDiv(
+ FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<FRegister, &Riscv64Assembler::FDivS, &Riscv64Assembler::FDivD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FMul(
+ FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<FRegister, &Riscv64Assembler::FMulS, &Riscv64Assembler::FMulD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FMin(
+ FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<FRegister, &Riscv64Assembler::FMinS, &Riscv64Assembler::FMinD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FMax(
+ FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<FRegister, &Riscv64Assembler::FMaxS, &Riscv64Assembler::FMaxD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FEq(
+ XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<XRegister, &Riscv64Assembler::FEqS, &Riscv64Assembler::FEqD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FLt(
+ XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<XRegister, &Riscv64Assembler::FLtS, &Riscv64Assembler::FLtD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FLe(
+ XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+ FpBinOp<XRegister, &Riscv64Assembler::FLeS, &Riscv64Assembler::FLeD>(rd, rs1, rs2, type);
+}
+
+template <typename Reg,
+ void (Riscv64Assembler::*opS)(Reg, FRegister),
+ void (Riscv64Assembler::*opD)(Reg, FRegister)>
+inline void InstructionCodeGeneratorRISCV64::FpUnOp(
+ Reg rd, FRegister rs1, DataType::Type type) {
+ Riscv64Assembler* assembler = down_cast<CodeGeneratorRISCV64*>(codegen_)->GetAssembler();
+ if (type == DataType::Type::kFloat32) {
+ (assembler->*opS)(rd, rs1);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat64);
+ (assembler->*opD)(rd, rs1);
+ }
+}
+
+inline void InstructionCodeGeneratorRISCV64::FAbs(
+ FRegister rd, FRegister rs1, DataType::Type type) {
+ FpUnOp<FRegister, &Riscv64Assembler::FAbsS, &Riscv64Assembler::FAbsD>(rd, rs1, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FNeg(
+ FRegister rd, FRegister rs1, DataType::Type type) {
+ FpUnOp<FRegister, &Riscv64Assembler::FNegS, &Riscv64Assembler::FNegD>(rd, rs1, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FMv(
+ FRegister rd, FRegister rs1, DataType::Type type) {
+ FpUnOp<FRegister, &Riscv64Assembler::FMvS, &Riscv64Assembler::FMvD>(rd, rs1, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FMvX(
+ XRegister rd, FRegister rs1, DataType::Type type) {
+ FpUnOp<XRegister, &Riscv64Assembler::FMvXW, &Riscv64Assembler::FMvXD>(rd, rs1, type);
+}
+
+void InstructionCodeGeneratorRISCV64::FClass(
+ XRegister rd, FRegister rs1, DataType::Type type) {
+ FpUnOp<XRegister, &Riscv64Assembler::FClassS, &Riscv64Assembler::FClassD>(rd, rs1, type);
+}
+
+void InstructionCodeGeneratorRISCV64::Load(
+ Location out, XRegister rs1, int32_t offset, DataType::Type type) {
+ switch (type) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ __ Loadbu(out.AsRegister<XRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kInt8:
+ __ Loadb(out.AsRegister<XRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kUint16:
+ __ Loadhu(out.AsRegister<XRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kInt16:
+ __ Loadh(out.AsRegister<XRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kInt32:
+ __ Loadw(out.AsRegister<XRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kInt64:
+ __ Loadd(out.AsRegister<XRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kReference:
+ __ Loadwu(out.AsRegister<XRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kFloat32:
+ __ FLoadw(out.AsFpuRegister<FRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kFloat64:
+ __ FLoadd(out.AsFpuRegister<FRegister>(), rs1, offset);
+ break;
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
+ case DataType::Type::kVoid:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::Store(
+ Location value, XRegister rs1, int32_t offset, DataType::Type type) {
+ DCHECK_IMPLIES(value.IsConstant(), IsZeroBitPattern(value.GetConstant()));
+ if (kPoisonHeapReferences && type == DataType::Type::kReference && !value.IsConstant()) {
+ riscv64::ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ __ Mv(tmp, value.AsRegister<XRegister>());
+ codegen_->PoisonHeapReference(tmp);
+ __ Storew(tmp, rs1, offset);
+ return;
+ }
+ switch (type) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ __ Storeb(InputXRegisterOrZero(value), rs1, offset);
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ __ Storeh(InputXRegisterOrZero(value), rs1, offset);
+ break;
+ case DataType::Type::kFloat32:
+ if (!value.IsConstant()) {
+ __ FStorew(value.AsFpuRegister<FRegister>(), rs1, offset);
+ break;
+ }
+ FALLTHROUGH_INTENDED;
+ case DataType::Type::kInt32:
+ case DataType::Type::kReference:
+ __ Storew(InputXRegisterOrZero(value), rs1, offset);
+ break;
+ case DataType::Type::kFloat64:
+ if (!value.IsConstant()) {
+ __ FStored(value.AsFpuRegister<FRegister>(), rs1, offset);
+ break;
+ }
+ FALLTHROUGH_INTENDED;
+ case DataType::Type::kInt64:
+ __ Stored(InputXRegisterOrZero(value), rs1, offset);
+ break;
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
+ case DataType::Type::kVoid:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::StoreSeqCst(Location value,
+ XRegister rs1,
+ int32_t offset,
+ DataType::Type type,
+ HInstruction* instruction) {
+ if (DataType::Size(type) >= 4u) {
+ // Use AMOSWAP for 32-bit and 64-bit data types.
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister swap_src = kNoXRegister;
+ if (kPoisonHeapReferences && type == DataType::Type::kReference && !value.IsConstant()) {
+ swap_src = srs.AllocateXRegister();
+ __ Mv(swap_src, value.AsRegister<XRegister>());
+ codegen_->PoisonHeapReference(swap_src);
+ } else if (DataType::IsFloatingPointType(type) && !value.IsConstant()) {
+ swap_src = srs.AllocateXRegister();
+ FMvX(swap_src, value.AsFpuRegister<FRegister>(), type);
+ } else {
+ swap_src = InputXRegisterOrZero(value);
+ }
+ XRegister addr = rs1;
+ if (offset != 0) {
+ addr = srs.AllocateXRegister();
+ __ AddConst64(addr, rs1, offset);
+ }
+ if (DataType::Is64BitType(type)) {
+ __ AmoSwapD(Zero, swap_src, addr, AqRl::kRelease);
+ } else {
+ __ AmoSwapW(Zero, swap_src, addr, AqRl::kRelease);
+ }
+ if (instruction != nullptr) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+ } else {
+ // Use fences for smaller data types.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ Store(value, rs1, offset, type);
+ if (instruction != nullptr) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::ShNAdd(
+ XRegister rd, XRegister rs1, XRegister rs2, DataType::Type type) {
+ switch (type) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(DataType::SizeShift(type), 0u);
+ __ Add(rd, rs1, rs2);
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(DataType::SizeShift(type), 1u);
+ __ Sh1Add(rd, rs1, rs2);
+ break;
+ case DataType::Type::kInt32:
+ case DataType::Type::kReference:
+ case DataType::Type::kFloat32:
+ DCHECK_EQ(DataType::SizeShift(type), 2u);
+ __ Sh2Add(rd, rs1, rs2);
+ break;
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat64:
+ DCHECK_EQ(DataType::SizeShift(type), 3u);
+ __ Sh3Add(rd, rs1, rs2);
+ break;
+ case DataType::Type::kUint32:
+ case DataType::Type::kUint64:
+ case DataType::Type::kVoid:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
+Riscv64Assembler* ParallelMoveResolverRISCV64::GetAssembler() const {
+ return codegen_->GetAssembler();
+}
+
+void ParallelMoveResolverRISCV64::EmitMove(size_t index) {
+ MoveOperands* move = moves_[index];
+ codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType());
+}
+
+void ParallelMoveResolverRISCV64::EmitSwap(size_t index) {
+ MoveOperands* move = moves_[index];
+ codegen_->SwapLocations(move->GetDestination(), move->GetSource(), move->GetType());
+}
+
+void ParallelMoveResolverRISCV64::SpillScratch([[maybe_unused]] int reg) {
+ LOG(FATAL) << "Unimplemented";
+ UNREACHABLE();
+}
+
+void ParallelMoveResolverRISCV64::RestoreScratch([[maybe_unused]] int reg) {
+ LOG(FATAL) << "Unimplemented";
+ UNREACHABLE();
+}
+
+void ParallelMoveResolverRISCV64::Exchange(int index1, int index2, bool double_slot) {
+ // We have 2 scratch X registers and 1 scratch F register that we can use. We prefer
+ // to use X registers for the swap but if both offsets are too big, we need to reserve
+ // one of the X registers for address adjustment and use an F register.
+ bool use_fp_tmp2 = false;
+ if (!IsInt<12>(index2)) {
+ if (!IsInt<12>(index1)) {
+ use_fp_tmp2 = true;
+ } else {
+ std::swap(index1, index2);
+ }
+ }
+ DCHECK_IMPLIES(!IsInt<12>(index2), use_fp_tmp2);
+
+ Location loc1(double_slot ? Location::DoubleStackSlot(index1) : Location::StackSlot(index1));
+ Location loc2(double_slot ? Location::DoubleStackSlot(index2) : Location::StackSlot(index2));
+ riscv64::ScratchRegisterScope srs(GetAssembler());
+ Location tmp = Location::RegisterLocation(srs.AllocateXRegister());
+ DataType::Type tmp_type = double_slot ? DataType::Type::kInt64 : DataType::Type::kInt32;
+ Location tmp2 = use_fp_tmp2
+ ? Location::FpuRegisterLocation(srs.AllocateFRegister())
+ : Location::RegisterLocation(srs.AllocateXRegister());
+ DataType::Type tmp2_type = use_fp_tmp2
+ ? (double_slot ? DataType::Type::kFloat64 : DataType::Type::kFloat32)
+ : tmp_type;
+
+ codegen_->MoveLocation(tmp, loc1, tmp_type);
+ codegen_->MoveLocation(tmp2, loc2, tmp2_type);
+ if (use_fp_tmp2) {
+ codegen_->MoveLocation(loc2, tmp, tmp_type);
+ } else {
+ // We cannot use `Stored()` or `Storew()` via `MoveLocation()` because we have
+ // no more scratch registers available. Use `Sd()` or `Sw()` explicitly.
+ DCHECK(IsInt<12>(index2));
+ if (double_slot) {
+ __ Sd(tmp.AsRegister<XRegister>(), SP, index2);
+ } else {
+ __ Sw(tmp.AsRegister<XRegister>(), SP, index2);
+ }
+ srs.FreeXRegister(tmp.AsRegister<XRegister>()); // Free a temporary for `MoveLocation()`.
+ }
+ codegen_->MoveLocation(loc1, tmp2, tmp2_type);
+}
+
+InstructionCodeGeneratorRISCV64::InstructionCodeGeneratorRISCV64(HGraph* graph,
+ CodeGeneratorRISCV64* codegen)
+ : InstructionCodeGenerator(graph, codegen),
+ assembler_(codegen->GetAssembler()),
+ codegen_(codegen) {}
+
+void InstructionCodeGeneratorRISCV64::GenerateClassInitializationCheck(
+ SlowPathCodeRISCV64* slow_path, XRegister class_reg) {
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ XRegister tmp2 = srs.AllocateXRegister();
+
+ // We shall load the full 32-bit status word with sign-extension and compare as unsigned
+ // to a sign-extended shifted status value. This yields the same comparison as loading and
+ // materializing unsigned but the constant is materialized with a single LUI instruction.
+ __ Loadw(tmp, class_reg, mirror::Class::StatusOffset().SizeValue()); // Sign-extended.
+ __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kVisiblyInitialized>());
+ __ Bltu(tmp, tmp2, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateBitstringTypeCheckCompare(
+ HTypeCheckInstruction* instruction, XRegister temp) {
+ UNUSED(instruction);
+ UNUSED(temp);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateSuspendCheck(HSuspendCheck* instruction,
+ HBasicBlock* successor) {
+ if (instruction->IsNoOp()) {
+ if (successor != nullptr) {
+ __ J(codegen_->GetLabelOf(successor));
+ }
+ return;
+ }
+
+ if (codegen_->CanUseImplicitSuspendCheck()) {
+ LOG(FATAL) << "Unimplemented ImplicitSuspendCheck";
+ return;
+ }
+
+ SuspendCheckSlowPathRISCV64* slow_path =
+ down_cast<SuspendCheckSlowPathRISCV64*>(instruction->GetSlowPath());
+
+ if (slow_path == nullptr) {
+ slow_path =
+ new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathRISCV64(instruction, successor);
+ instruction->SetSlowPath(slow_path);
+ codegen_->AddSlowPath(slow_path);
+ if (successor != nullptr) {
+ DCHECK(successor->IsLoopHeader());
+ }
+ } else {
+ DCHECK_EQ(slow_path->GetSuccessor(), successor);
+ }
+
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ __ Loadw(tmp, TR, Thread::ThreadFlagsOffset<kRiscv64PointerSize>().Int32Value());
+ static_assert(Thread::SuspendOrCheckpointRequestFlags() != std::numeric_limits<uint32_t>::max());
+ static_assert(IsPowerOfTwo(Thread::SuspendOrCheckpointRequestFlags() + 1u));
+ // Shift out other bits. Use an instruction that can be 16-bit with the "C" Standard Extension.
+ __ Slli(tmp, tmp, CLZ(static_cast<uint64_t>(Thread::SuspendOrCheckpointRequestFlags())));
+ if (successor == nullptr) {
+ __ Bnez(tmp, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetReturnLabel());
+ } else {
+ __ Beqz(tmp, codegen_->GetLabelOf(successor));
+ __ J(slow_path->GetEntryLabel());
+ // slow_path will return to GetLabelOf(successor).
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateReferenceLoadOneRegister(
+ HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option) {
+ XRegister out_reg = out.AsRegister<XRegister>();
+ if (read_barrier_option == kWithReadBarrier) {
+ DCHECK(codegen_->EmitReadBarrier());
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out,
+ out_reg,
+ offset,
+ maybe_temp,
+ /* needs_null_check= */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // Save the value of `out` into `maybe_temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ __ Mv(maybe_temp.AsRegister<XRegister>(), out_reg);
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ Loadwu(out_reg, out_reg, offset);
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ Loadwu(out_reg, out_reg, offset);
+ codegen_->MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateReferenceLoadTwoRegisters(
+ HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option) {
+ XRegister out_reg = out.AsRegister<XRegister>();
+ XRegister obj_reg = obj.AsRegister<XRegister>();
+ if (read_barrier_option == kWithReadBarrier) {
+ DCHECK(codegen_->EmitReadBarrier());
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out,
+ obj_reg,
+ offset,
+ maybe_temp,
+ /* needs_null_check= */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ Loadwu(out_reg, obj_reg, offset);
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ Loadwu(out_reg, obj_reg, offset);
+ codegen_->MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+SlowPathCodeRISCV64* CodeGeneratorRISCV64::AddGcRootBakerBarrierBarrierSlowPath(
+ HInstruction* instruction, Location root, Location temp) {
+ SlowPathCodeRISCV64* slow_path =
+ new (GetScopedAllocator()) ReadBarrierMarkSlowPathRISCV64(instruction, root, temp);
+ AddSlowPath(slow_path);
+ return slow_path;
+}
+
+void CodeGeneratorRISCV64::EmitBakerReadBarierMarkingCheck(
+ SlowPathCodeRISCV64* slow_path, Location root, Location temp) {
+ const int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(root);
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ Loadd(temp.AsRegister<XRegister>(), TR, entry_point_offset);
+ __ Bnez(temp.AsRegister<XRegister>(), slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorRISCV64::GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ XRegister obj,
+ uint32_t offset,
+ ReadBarrierOption read_barrier_option,
+ Riscv64Label* label_low) {
+ DCHECK_IMPLIES(label_low != nullptr, offset == kLinkTimeOffsetPlaceholderLow) << offset;
+ XRegister root_reg = root.AsRegister<XRegister>();
+ if (read_barrier_option == kWithReadBarrier) {
+ DCHECK(EmitReadBarrier());
+ if (kUseBakerReadBarrier) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded GC root or not. Instead, we
+ // load into `temp` (T6) the read barrier mark entry point corresponding
+ // to register `root`. If `temp` is null, it means that `GetIsGcMarking()`
+ // is false, and vice versa.
+ //
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp != null) {
+ // root = temp(root)
+ // }
+ //
+ // TODO(riscv64): Introduce a "marking register" that holds the pointer to one of the
+ // register marking entrypoints if marking (null if not marking) and make sure that
+ // marking entrypoints for other registers are at known offsets, so that we can call
+ // them using the "marking register" plus the offset embedded in the JALR instruction.
+
+ if (label_low != nullptr) {
+ __ Bind(label_low);
+ }
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ Loadwu(root_reg, obj, offset);
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // Use RA as temp. It is clobbered in the slow path anyway.
+ Location temp = Location::RegisterLocation(RA);
+ SlowPathCodeRISCV64* slow_path =
+ AddGcRootBakerBarrierBarrierSlowPath(instruction, root, temp);
+ EmitBakerReadBarierMarkingCheck(slow_path, root, temp);
+ } else {
+ // GC root loaded through a slow path for read barriers other
+ // than Baker's.
+ // /* GcRoot<mirror::Object>* */ root = obj + offset
+ if (label_low != nullptr) {
+ __ Bind(label_low);
+ }
+ __ AddConst32(root_reg, obj, offset);
+ // /* mirror::Object* */ root = root->Read()
+ GenerateReadBarrierForRootSlow(instruction, root, root);
+ }
+ } else {
+ // Plain GC root load with no read barrier.
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ if (label_low != nullptr) {
+ __ Bind(label_low);
+ }
+ __ Loadwu(root_reg, obj, offset);
+ // Note that GC roots are not affected by heap poisoning, thus we
+ // do not have to unpoison `root_reg` here.
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateTestAndBranch(HInstruction* instruction,
+ size_t condition_input_index,
+ Riscv64Label* true_target,
+ Riscv64Label* false_target) {
+ HInstruction* cond = instruction->InputAt(condition_input_index);
+
+ if (true_target == nullptr && false_target == nullptr) {
+ // Nothing to do. The code always falls through.
+ return;
+ } else if (cond->IsIntConstant()) {
+ // Constant condition, statically compared against "true" (integer value 1).
+ if (cond->AsIntConstant()->IsTrue()) {
+ if (true_target != nullptr) {
+ __ J(true_target);
+ }
+ } else {
+ DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
+ if (false_target != nullptr) {
+ __ J(false_target);
+ }
+ }
+ return;
+ }
+
+ // The following code generates these patterns:
+ // (1) true_target == nullptr && false_target != nullptr
+ // - opposite condition true => branch to false_target
+ // (2) true_target != nullptr && false_target == nullptr
+ // - condition true => branch to true_target
+ // (3) true_target != nullptr && false_target != nullptr
+ // - condition true => branch to true_target
+ // - branch to false_target
+ if (IsBooleanValueOrMaterializedCondition(cond)) {
+ // The condition instruction has been materialized, compare the output to 0.
+ Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
+ DCHECK(cond_val.IsRegister());
+ if (true_target == nullptr) {
+ __ Beqz(cond_val.AsRegister<XRegister>(), false_target);
+ } else {
+ __ Bnez(cond_val.AsRegister<XRegister>(), true_target);
+ }
+ } else {
+ // The condition instruction has not been materialized, use its inputs as
+ // the comparison and its condition as the branch condition.
+ HCondition* condition = cond->AsCondition();
+ DataType::Type type = condition->InputAt(0)->GetType();
+ LocationSummary* locations = condition->GetLocations();
+ IfCondition if_cond = condition->GetCondition();
+ Riscv64Label* branch_target = true_target;
+
+ if (true_target == nullptr) {
+ if_cond = condition->GetOppositeCondition();
+ branch_target = false_target;
+ }
+
+ switch (type) {
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateFpCondition(if_cond, condition->IsGtBias(), type, locations, branch_target);
+ break;
+ default:
+ // Integral types and reference equality.
+ GenerateIntLongCompareAndBranch(if_cond, locations, branch_target);
+ break;
+ }
+ }
+
+ // If neither branch falls through (case 3), the conditional branch to `true_target`
+ // was already emitted (case 2) and we need to emit a jump to `false_target`.
+ if (true_target != nullptr && false_target != nullptr) {
+ __ J(false_target);
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DataType::Type type = instruction->GetResultType();
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
+ int64_t imm = Int64FromConstant(second.GetConstant());
+ DCHECK(imm == 1 || imm == -1);
+
+ if (instruction->IsRem()) {
+ __ Mv(out, Zero);
+ } else {
+ if (imm == -1) {
+ if (type == DataType::Type::kInt32) {
+ __ Subw(out, Zero, dividend);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt64);
+ __ Sub(out, Zero, dividend);
+ }
+ } else if (out != dividend) {
+ __ Mv(out, dividend);
+ }
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DataType::Type type = instruction->GetResultType();
+ DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type;
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
+ int64_t imm = Int64FromConstant(second.GetConstant());
+ int64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
+ int ctz_imm = CTZ(abs_imm);
+ DCHECK_GE(ctz_imm, 1); // Division by +/-1 is handled by `DivRemOneOrMinusOne()`.
+
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ // Calculate the negative dividend adjustment `tmp = dividend < 0 ? abs_imm - 1 : 0`.
+ // This adjustment is needed for rounding the division result towards zero.
+ if (type == DataType::Type::kInt32 || ctz_imm == 1) {
+ // A 32-bit dividend is sign-extended to 64-bit, so we can use the upper bits.
+ // And for a 64-bit division by +/-2, we need just the sign bit.
+ DCHECK_IMPLIES(type == DataType::Type::kInt32, ctz_imm < 32);
+ __ Srli(tmp, dividend, 64 - ctz_imm);
+ } else {
+ // For other 64-bit divisions, we need to replicate the sign bit.
+ __ Srai(tmp, dividend, 63);
+ __ Srli(tmp, tmp, 64 - ctz_imm);
+ }
+ // The rest of the calculation can use 64-bit operations even for 32-bit div/rem.
+ __ Add(tmp, tmp, dividend);
+ if (instruction->IsDiv()) {
+ __ Srai(out, tmp, ctz_imm);
+ if (imm < 0) {
+ __ Neg(out, out);
+ }
+ } else {
+ if (ctz_imm <= 11) {
+ __ Andi(tmp, tmp, -abs_imm);
+ } else {
+ ScratchRegisterScope srs2(GetAssembler());
+ XRegister tmp2 = srs2.AllocateXRegister();
+ __ Li(tmp2, -abs_imm);
+ __ And(tmp, tmp, tmp2);
+ }
+ __ Sub(out, dividend, tmp);
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ LocationSummary* locations = instruction->GetLocations();
+ XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ Location second = locations->InAt(1);
+ int64_t imm = Int64FromConstant(second.GetConstant());
+ DataType::Type type = instruction->GetResultType();
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+
+ // TODO: optimize with constant.
+ __ LoadConst64(tmp, imm);
+ if (instruction->IsDiv()) {
+ if (type == DataType::Type::kInt32) {
+ __ Divw(out, dividend, tmp);
+ } else {
+ __ Div(out, dividend, tmp);
+ }
+ } else {
+ if (type == DataType::Type::kInt32) {
+ __ Remw(out, dividend, tmp);
+ } else {
+ __ Rem(out, dividend, tmp);
+ }
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ DataType::Type type = instruction->GetResultType();
+ DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type;
+
+ LocationSummary* locations = instruction->GetLocations();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ Location second = locations->InAt(1);
+
+ if (second.IsConstant()) {
+ int64_t imm = Int64FromConstant(second.GetConstant());
+ if (imm == 0) {
+ // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+ } else if (imm == 1 || imm == -1) {
+ DivRemOneOrMinusOne(instruction);
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+ DivRemByPowerOfTwo(instruction);
+ } else {
+ DCHECK(imm <= -2 || imm >= 2);
+ GenerateDivRemWithAnyConstant(instruction);
+ }
+ } else {
+ XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
+ XRegister divisor = second.AsRegister<XRegister>();
+ if (instruction->IsDiv()) {
+ if (type == DataType::Type::kInt32) {
+ __ Divw(out, dividend, divisor);
+ } else {
+ __ Div(out, dividend, divisor);
+ }
+ } else {
+ if (type == DataType::Type::kInt32) {
+ __ Remw(out, dividend, divisor);
+ } else {
+ __ Rem(out, dividend, divisor);
+ }
+ }
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateIntLongCondition(IfCondition cond,
+ LocationSummary* locations) {
+ XRegister rd = locations->Out().AsRegister<XRegister>();
+ GenerateIntLongCondition(cond, locations, rd, /*to_all_bits=*/ false);
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateIntLongCondition(IfCondition cond,
+ LocationSummary* locations,
+ XRegister rd,
+ bool to_all_bits) {
+ XRegister rs1 = locations->InAt(0).AsRegister<XRegister>();
+ Location rs2_location = locations->InAt(1);
+ bool use_imm = rs2_location.IsConstant();
+ int64_t imm = use_imm ? CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant()) : 0;
+ XRegister rs2 = use_imm ? kNoXRegister : rs2_location.AsRegister<XRegister>();
+ bool reverse_condition = false;
+ switch (cond) {
+ case kCondEQ:
+ case kCondNE:
+ if (!use_imm) {
+ __ Sub(rd, rs1, rs2); // SUB is OK here even for 32-bit comparison.
+ } else if (imm != 0) {
+ DCHECK(IsInt<12>(-imm));
+ __ Addi(rd, rs1, -imm); // ADDI is OK here even for 32-bit comparison.
+ } // else test `rs1` directly without subtraction for `use_imm && imm == 0`.
+ if (cond == kCondEQ) {
+ __ Seqz(rd, (use_imm && imm == 0) ? rs1 : rd);
+ } else {
+ __ Snez(rd, (use_imm && imm == 0) ? rs1 : rd);
+ }
+ break;
+
+ case kCondLT:
+ case kCondGE:
+ if (use_imm) {
+ DCHECK(IsInt<12>(imm));
+ __ Slti(rd, rs1, imm);
+ } else {
+ __ Slt(rd, rs1, rs2);
+ }
+ // Calculate `rs1 >= rhs` as `!(rs1 < rhs)` since there's only the SLT but no SGE.
+ reverse_condition = (cond == kCondGE);
+ break;
+
+ case kCondLE:
+ case kCondGT:
+ if (use_imm) {
+ // Calculate `rs1 <= imm` as `rs1 < imm + 1`.
+ DCHECK(IsInt<12>(imm + 1)); // The value that overflows would fail this check.
+ __ Slti(rd, rs1, imm + 1);
+ } else {
+ __ Slt(rd, rs2, rs1);
+ }
+ // Calculate `rs1 > imm` as `!(rs1 < imm + 1)` and calculate
+ // `rs1 <= rs2` as `!(rs2 < rs1)` since there's only the SLT but no SGE.
+ reverse_condition = ((cond == kCondGT) == use_imm);
+ break;
+
+ case kCondB:
+ case kCondAE:
+ if (use_imm) {
+ // Sltiu sign-extends its 12-bit immediate operand before the comparison
+ // and thus lets us compare directly with unsigned values in the ranges
+ // [0, 0x7ff] and [0x[ffffffff]fffff800, 0x[ffffffff]ffffffff].
+ DCHECK(IsInt<12>(imm));
+ __ Sltiu(rd, rs1, imm);
+ } else {
+ __ Sltu(rd, rs1, rs2);
+ }
+ // Calculate `rs1 AE rhs` as `!(rs1 B rhs)` since there's only the SLTU but no SGEU.
+ reverse_condition = (cond == kCondAE);
+ break;
+
+ case kCondBE:
+ case kCondA:
+ if (use_imm) {
+ // Calculate `rs1 BE imm` as `rs1 B imm + 1`.
+ // Sltiu sign-extends its 12-bit immediate operand before the comparison
+ // and thus lets us compare directly with unsigned values in the ranges
+ // [0, 0x7ff] and [0x[ffffffff]fffff800, 0x[ffffffff]ffffffff].
+ DCHECK(IsInt<12>(imm + 1)); // The value that overflows would fail this check.
+ __ Sltiu(rd, rs1, imm + 1);
+ } else {
+ __ Sltu(rd, rs2, rs1);
+ }
+ // Calculate `rs1 A imm` as `!(rs1 B imm + 1)` and calculate
+ // `rs1 BE rs2` as `!(rs2 B rs1)` since there's only the SLTU but no SGEU.
+ reverse_condition = ((cond == kCondA) == use_imm);
+ break;
+ }
+ if (to_all_bits) {
+ // Store the result to all bits; in other words, "true" is represented by -1.
+ if (reverse_condition) {
+ __ Addi(rd, rd, -1); // 0 -> -1, 1 -> 0
+ } else {
+ __ Neg(rd, rd); // 0 -> 0, 1 -> -1
+ }
+ } else {
+ if (reverse_condition) {
+ __ Xori(rd, rd, 1);
+ }
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateIntLongCompareAndBranch(IfCondition cond,
+ LocationSummary* locations,
+ Riscv64Label* label) {
+ XRegister left = locations->InAt(0).AsRegister<XRegister>();
+ Location right_location = locations->InAt(1);
+ if (right_location.IsConstant()) {
+ DCHECK_EQ(CodeGenerator::GetInt64ValueOf(right_location.GetConstant()), 0);
+ switch (cond) {
+ case kCondEQ:
+ case kCondBE: // <= 0 if zero
+ __ Beqz(left, label);
+ break;
+ case kCondNE:
+ case kCondA: // > 0 if non-zero
+ __ Bnez(left, label);
+ break;
+ case kCondLT:
+ __ Bltz(left, label);
+ break;
+ case kCondGE:
+ __ Bgez(left, label);
+ break;
+ case kCondLE:
+ __ Blez(left, label);
+ break;
+ case kCondGT:
+ __ Bgtz(left, label);
+ break;
+ case kCondB: // always false
+ break;
+ case kCondAE: // always true
+ __ J(label);
+ break;
+ }
+ } else {
+ XRegister right_reg = right_location.AsRegister<XRegister>();
+ switch (cond) {
+ case kCondEQ:
+ __ Beq(left, right_reg, label);
+ break;
+ case kCondNE:
+ __ Bne(left, right_reg, label);
+ break;
+ case kCondLT:
+ __ Blt(left, right_reg, label);
+ break;
+ case kCondGE:
+ __ Bge(left, right_reg, label);
+ break;
+ case kCondLE:
+ __ Ble(left, right_reg, label);
+ break;
+ case kCondGT:
+ __ Bgt(left, right_reg, label);
+ break;
+ case kCondB:
+ __ Bltu(left, right_reg, label);
+ break;
+ case kCondAE:
+ __ Bgeu(left, right_reg, label);
+ break;
+ case kCondBE:
+ __ Bleu(left, right_reg, label);
+ break;
+ case kCondA:
+ __ Bgtu(left, right_reg, label);
+ break;
+ }
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateFpCondition(IfCondition cond,
+ bool gt_bias,
+ DataType::Type type,
+ LocationSummary* locations,
+ Riscv64Label* label) {
+ DCHECK_EQ(label != nullptr, locations->Out().IsInvalid());
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister rd =
+ (label != nullptr) ? srs.AllocateXRegister() : locations->Out().AsRegister<XRegister>();
+ GenerateFpCondition(cond, gt_bias, type, locations, label, rd, /*to_all_bits=*/ false);
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateFpCondition(IfCondition cond,
+ bool gt_bias,
+ DataType::Type type,
+ LocationSummary* locations,
+ Riscv64Label* label,
+ XRegister rd,
+ bool to_all_bits) {
+ // RISCV-V FP compare instructions yield the following values:
+ // l<r l=r l>r Unordered
+ // FEQ l,r 0 1 0 0
+ // FLT l,r 1 0 0 0
+ // FLT r,l 0 0 1 0
+ // FLE l,r 1 1 0 0
+ // FLE r,l 0 1 1 0
+ //
+ // We can calculate the `Compare` results using the following formulas:
+ // l<r l=r l>r Unordered
+ // Compare/gt_bias -1 0 1 1 = ((FLE l,r) ^ 1) - (FLT l,r)
+ // Compare/lt_bias -1 0 1 -1 = ((FLE r,l) - 1) + (FLT r,l)
+ // These are emitted in `VisitCompare()`.
+ //
+ // This function emits a fused `Condition(Compare(., .), 0)`. If we compare the
+ // `Compare` results above with 0, we get the following values and formulas:
+ // l<r l=r l>r Unordered
+ // CondEQ/- 0 1 0 0 = (FEQ l, r)
+ // CondNE/- 1 0 1 1 = (FEQ l, r) ^ 1
+ // CondLT/gt_bias 1 0 0 0 = (FLT l,r)
+ // CondLT/lt_bias 1 0 0 1 = (FLE r,l) ^ 1
+ // CondLE/gt_bias 1 1 0 0 = (FLE l,r)
+ // CondLE/lt_bias 1 1 0 1 = (FLT r,l) ^ 1
+ // CondGT/gt_bias 0 0 1 1 = (FLE l,r) ^ 1
+ // CondGT/lt_bias 0 0 1 0 = (FLT r,l)
+ // CondGE/gt_bias 0 1 1 1 = (FLT l,r) ^ 1
+ // CondGE/lt_bias 0 1 1 0 = (FLE r,l)
+ // (CondEQ/CondNE comparison with zero yields the same result with gt_bias and lt_bias.)
+ //
+ // If the condition is not materialized, the `^ 1` is not emitted,
+ // instead the condition is reversed by emitting BEQZ instead of BNEZ.
+
+ FRegister rs1 = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister rs2 = locations->InAt(1).AsFpuRegister<FRegister>();
+
+ bool reverse_condition = false;
+ switch (cond) {
+ case kCondEQ:
+ FEq(rd, rs1, rs2, type);
+ break;
+ case kCondNE:
+ FEq(rd, rs1, rs2, type);
+ reverse_condition = true;
+ break;
+ case kCondLT:
+ if (gt_bias) {
+ FLt(rd, rs1, rs2, type);
+ } else {
+ FLe(rd, rs2, rs1, type);
+ reverse_condition = true;
+ }
+ break;
+ case kCondLE:
+ if (gt_bias) {
+ FLe(rd, rs1, rs2, type);
+ } else {
+ FLt(rd, rs2, rs1, type);
+ reverse_condition = true;
+ }
+ break;
+ case kCondGT:
+ if (gt_bias) {
+ FLe(rd, rs1, rs2, type);
+ reverse_condition = true;
+ } else {
+ FLt(rd, rs2, rs1, type);
+ }
+ break;
+ case kCondGE:
+ if (gt_bias) {
+ FLt(rd, rs1, rs2, type);
+ reverse_condition = true;
+ } else {
+ FLe(rd, rs2, rs1, type);
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unexpected floating-point condition " << cond;
+ UNREACHABLE();
+ }
+
+ if (label != nullptr) {
+ if (reverse_condition) {
+ __ Beqz(rd, label);
+ } else {
+ __ Bnez(rd, label);
+ }
+ } else if (to_all_bits) {
+ // Store the result to all bits; in other words, "true" is represented by -1.
+ if (reverse_condition) {
+ __ Addi(rd, rd, -1); // 0 -> -1, 1 -> 0
+ } else {
+ __ Neg(rd, rd); // 0 -> 0, 1 -> -1
+ }
+ } else {
+ if (reverse_condition) {
+ __ Xori(rd, rd, 1);
+ }
+ }
+}
+
+void CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ XRegister obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check) {
+ GenerateReferenceLoadWithBakerReadBarrier(
+ instruction, ref, obj, offset, /*index=*/ Location::NoLocation(), temp, needs_null_check);
+}
+
+void CodeGeneratorRISCV64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ XRegister obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check) {
+ GenerateReferenceLoadWithBakerReadBarrier(
+ instruction, ref, obj, data_offset, index, temp, needs_null_check);
+}
+
+void CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ XRegister obj,
+ uint32_t offset,
+ Location index,
+ Location temp,
+ bool needs_null_check) {
+ // For now, use the same approach as for GC roots plus unpoison the reference if needed.
+ // TODO(riscv64): Implement checking if the holder is black.
+ UNUSED(temp);
+
+ DCHECK(EmitBakerReadBarrier());
+ XRegister reg = ref.AsRegister<XRegister>();
+ if (index.IsValid()) {
+ DCHECK(!needs_null_check);
+ DCHECK(index.IsRegister());
+ DataType::Type type = DataType::Type::kReference;
+ DCHECK_EQ(type, instruction->GetType());
+ if (instruction->IsArrayGet()) {
+ // /* HeapReference<Object> */ ref = *(obj + index * element_size + offset)
+ instruction_visitor_.ShNAdd(reg, index.AsRegister<XRegister>(), obj, type);
+ } else {
+ // /* HeapReference<Object> */ ref = *(obj + index + offset)
+ DCHECK(instruction->IsInvoke());
+ DCHECK(instruction->GetLocations()->Intrinsified());
+ __ Add(reg, index.AsRegister<XRegister>(), obj);
+ }
+ __ Loadwu(reg, reg, offset);
+ } else {
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ __ Loadwu(reg, obj, offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ }
+ MaybeUnpoisonHeapReference(reg);
+
+ // Slow path marking the reference.
+ XRegister tmp = RA; // Use RA as temp. It is clobbered in the slow path anyway.
+ SlowPathCodeRISCV64* slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathRISCV64(
+ instruction, ref, Location::RegisterLocation(tmp));
+ AddSlowPath(slow_path);
+
+ const int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(ref);
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ Loadd(tmp, TR, entry_point_offset);
+ __ Bnez(tmp, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+SlowPathCodeRISCV64* CodeGeneratorRISCV64::AddReadBarrierSlowPath(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ UNUSED(instruction);
+ UNUSED(out);
+ UNUSED(ref);
+ UNUSED(obj);
+ UNUSED(offset);
+ UNUSED(index);
+ LOG(FATAL) << "Unimplemented";
+ UNREACHABLE();
+}
+
+void CodeGeneratorRISCV64::GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ UNUSED(instruction);
+ UNUSED(out);
+ UNUSED(ref);
+ UNUSED(obj);
+ UNUSED(offset);
+ UNUSED(index);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void CodeGeneratorRISCV64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ if (EmitReadBarrier()) {
+ // Baker's read barriers shall be handled by the fast path
+ // (CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier).
+ DCHECK(!kUseBakerReadBarrier);
+ // If heap poisoning is enabled, unpoisoning will be taken care of
+ // by the runtime within the slow path.
+ GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
+ } else if (kPoisonHeapReferences) {
+ UnpoisonHeapReference(out.AsRegister<XRegister>());
+ }
+}
+
+void CodeGeneratorRISCV64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+ Location out,
+ Location root) {
+ DCHECK(EmitReadBarrier());
+
+ // Insert a slow path based read barrier *after* the GC root load.
+ //
+ // Note that GC roots are not affected by heap poisoning, so we do
+ // not need to do anything special for this here.
+ SlowPathCodeRISCV64* slow_path =
+ new (GetScopedAllocator()) ReadBarrierForRootSlowPathRISCV64(instruction, out, root);
+ AddSlowPath(slow_path);
+
+ __ J(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void InstructionCodeGeneratorRISCV64::HandleGoto(HInstruction* instruction,
+ HBasicBlock* successor) {
+ if (successor->IsExitBlock()) {
+ DCHECK(instruction->GetPrevious()->AlwaysThrows());
+ return; // no code needed
+ }
+
+ HBasicBlock* block = instruction->GetBlock();
+ HInstruction* previous = instruction->GetPrevious();
+ HLoopInformation* info = block->GetLoopInformation();
+
+ if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
+ codegen_->MaybeIncrementHotness(/*is_frame_entry=*/ false);
+ GenerateSuspendCheck(info->GetSuspendCheck(), successor);
+ return; // `GenerateSuspendCheck()` emitted the jump.
+ }
+ if (block->IsEntryBlock() && previous != nullptr && previous->IsSuspendCheck()) {
+ GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+ }
+ if (!codegen_->GoesToNextBlock(block, successor)) {
+ __ J(codegen_->GetLabelOf(successor));
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenPackedSwitchWithCompares(XRegister adjusted,
+ XRegister temp,
+ uint32_t num_entries,
+ HBasicBlock* switch_block) {
+ // Note: The `adjusted` register holds `value - lower_bound`. If the `lower_bound` is 0,
+ // `adjusted` is the original `value` register and we must not clobber it. Otherwise,
+ // `adjusted` is the `temp`. The caller already emitted the `adjusted < num_entries` check.
+
+ // Create a set of compare/jumps.
+ ArrayRef<HBasicBlock* const> successors(switch_block->GetSuccessors());
+ uint32_t index = 0;
+ for (; num_entries - index >= 2u; index += 2u) {
+ // Jump to `successors[index]` if `value == lower_bound + index`.
+ // Note that `adjusted` holds `value - lower_bound - index`.
+ __ Beqz(adjusted, codegen_->GetLabelOf(successors[index]));
+ if (num_entries - index == 2u) {
+ break; // The last entry shall match, so the branch shall be unconditional.
+ }
+ // Jump to `successors[index + 1]` if `value == lower_bound + index + 1`.
+ // Modify `adjusted` to hold `value - lower_bound - index - 2` for this comparison.
+ __ Addi(temp, adjusted, -2);
+ adjusted = temp;
+ __ Bltz(adjusted, codegen_->GetLabelOf(successors[index + 1]));
+ }
+ // For the last entry, unconditionally jump to `successors[num_entries - 1]`.
+ __ J(codegen_->GetLabelOf(successors[num_entries - 1u]));
+}
+
+void InstructionCodeGeneratorRISCV64::GenTableBasedPackedSwitch(XRegister adjusted,
+ XRegister temp,
+ uint32_t num_entries,
+ HBasicBlock* switch_block) {
+ // Note: The `adjusted` register holds `value - lower_bound`. If the `lower_bound` is 0,
+ // `adjusted` is the original `value` register and we must not clobber it. Otherwise,
+ // `adjusted` is the `temp`. The caller already emitted the `adjusted < num_entries` check.
+
+ // Create a jump table.
+ ArenaVector<Riscv64Label*> labels(num_entries,
+ __ GetAllocator()->Adapter(kArenaAllocSwitchTable));
+ const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
+ for (uint32_t i = 0; i < num_entries; i++) {
+ labels[i] = codegen_->GetLabelOf(successors[i]);
+ }
+ JumpTable* table = __ CreateJumpTable(std::move(labels));
+
+ // Load the address of the jump table.
+ // Note: The `LoadLabelAddress()` emits AUIPC+ADD. It is possible to avoid the ADD and
+ // instead embed that offset in the LW below as well as all jump table entries but
+ // that would need some invasive changes in the jump table handling in the assembler.
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister table_base = srs.AllocateXRegister();
+ __ LoadLabelAddress(table_base, table->GetLabel());
+
+ // Load the PC difference from the jump table.
+ // TODO(riscv64): Use SH2ADD from the Zba extension.
+ __ Slli(temp, adjusted, 2);
+ __ Add(temp, temp, table_base);
+ __ Lw(temp, temp, 0);
+
+ // Compute the absolute target address by adding the table start address
+ // (the table contains offsets to targets relative to its start).
+ __ Add(temp, temp, table_base);
+ // And jump.
+ __ Jr(temp);
+}
+
+int32_t InstructionCodeGeneratorRISCV64::VecAddress(LocationSummary* locations,
+ size_t size,
+ /*out*/ XRegister* adjusted_base) {
+ UNUSED(locations);
+ UNUSED(size);
+ UNUSED(adjusted_base);
+ LOG(FATAL) << "Unimplemented";
+ UNREACHABLE();
+}
+
+void LocationsBuilderRISCV64::HandleBinaryOp(HBinaryOperation* instruction) {
+ DCHECK_EQ(instruction->InputCount(), 2u);
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ DataType::Type type = instruction->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ HInstruction* right = instruction->InputAt(1);
+ bool can_use_imm = false;
+ if (instruction->IsMin() || instruction->IsMax()) {
+ can_use_imm = IsZeroBitPattern(instruction);
+ } else if (right->IsConstant()) {
+ int64_t imm = CodeGenerator::GetInt64ValueOf(right->AsConstant());
+ can_use_imm = IsInt<12>(instruction->IsSub() ? -imm : imm);
+ }
+ if (can_use_imm) {
+ locations->SetInAt(1, Location::ConstantLocation(right));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ if (instruction->IsMin() || instruction->IsMax()) {
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap);
+ } else {
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ }
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected " << instruction->DebugName() << " type " << type;
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::HandleBinaryOp(HBinaryOperation* instruction) {
+ DataType::Type type = instruction->GetType();
+ LocationSummary* locations = instruction->GetLocations();
+
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64: {
+ XRegister rd = locations->Out().AsRegister<XRegister>();
+ XRegister rs1 = locations->InAt(0).AsRegister<XRegister>();
+ Location rs2_location = locations->InAt(1);
+
+ bool use_imm = rs2_location.IsConstant();
+ XRegister rs2 = use_imm ? kNoXRegister : rs2_location.AsRegister<XRegister>();
+ int64_t imm = use_imm ? CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant()) : 0;
+
+ if (instruction->IsAnd()) {
+ if (use_imm) {
+ __ Andi(rd, rs1, imm);
+ } else {
+ __ And(rd, rs1, rs2);
+ }
+ } else if (instruction->IsOr()) {
+ if (use_imm) {
+ __ Ori(rd, rs1, imm);
+ } else {
+ __ Or(rd, rs1, rs2);
+ }
+ } else if (instruction->IsXor()) {
+ if (use_imm) {
+ __ Xori(rd, rs1, imm);
+ } else {
+ __ Xor(rd, rs1, rs2);
+ }
+ } else if (instruction->IsAdd() || instruction->IsSub()) {
+ if (type == DataType::Type::kInt32) {
+ if (use_imm) {
+ __ Addiw(rd, rs1, instruction->IsSub() ? -imm : imm);
+ } else if (instruction->IsAdd()) {
+ __ Addw(rd, rs1, rs2);
+ } else {
+ DCHECK(instruction->IsSub());
+ __ Subw(rd, rs1, rs2);
+ }
+ } else {
+ if (use_imm) {
+ __ Addi(rd, rs1, instruction->IsSub() ? -imm : imm);
+ } else if (instruction->IsAdd()) {
+ __ Add(rd, rs1, rs2);
+ } else {
+ DCHECK(instruction->IsSub());
+ __ Sub(rd, rs1, rs2);
+ }
+ }
+ } else if (instruction->IsMin()) {
+ DCHECK_IMPLIES(use_imm, imm == 0);
+ __ Min(rd, rs1, use_imm ? Zero : rs2);
+ } else {
+ DCHECK(instruction->IsMax());
+ DCHECK_IMPLIES(use_imm, imm == 0);
+ __ Max(rd, rs1, use_imm ? Zero : rs2);
+ }
+ break;
+ }
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64: {
+ FRegister rd = locations->Out().AsFpuRegister<FRegister>();
+ FRegister rs1 = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister rs2 = locations->InAt(1).AsFpuRegister<FRegister>();
+ if (instruction->IsAdd()) {
+ FAdd(rd, rs1, rs2, type);
+ } else if (instruction->IsSub()) {
+ FSub(rd, rs1, rs2, type);
+ } else {
+ DCHECK(instruction->IsMin() || instruction->IsMax());
+ // If one of the operands is NaN and the other is not, riscv64 instructions FMIN/FMAX
+ // return the other operand while we want to return the NaN operand.
+ DCHECK_NE(rd, rs1); // Requested `Location::kOutputOverlap`.
+ DCHECK_NE(rd, rs2); // Requested `Location::kOutputOverlap`.
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ XRegister tmp2 = srs.AllocateXRegister();
+ Riscv64Label done;
+ // Return `rs1` if it's NaN.
+ FClass(tmp, rs1, type);
+ __ Li(tmp2, kFClassNaNMinValue);
+ FMv(rd, rs1, type);
+ __ Bgeu(tmp, tmp2, &done);
+ // Return `rs2` if it's NaN.
+ FClass(tmp, rs2, type);
+ FMv(rd, rs2, type);
+ __ Bgeu(tmp, tmp2, &done);
+ // Calculate Min/Max for non-NaN arguments.
+ if (instruction->IsMin()) {
+ FMin(rd, rs1, rs2, type);
+ } else {
+ FMax(rd, rs1, rs2, type);
+ }
+ __ Bind(&done);
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected binary operation type " << type;
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderRISCV64::HandleCondition(HCondition* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ switch (instruction->InputAt(0)->GetType()) {
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ break;
+
+ default: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ HInstruction* rhs = instruction->InputAt(1);
+ bool use_imm = false;
+ if (rhs->IsConstant()) {
+ int64_t imm = CodeGenerator::GetInt64ValueOf(rhs->AsConstant());
+ if (instruction->IsEmittedAtUseSite()) {
+ // For `HIf`, materialize all non-zero constants with an `HParallelMove`.
+ // Note: For certain constants and conditions, the code could be improved.
+ // For example, 2048 takes two instructions to materialize but the negative
+ // -2048 could be embedded in ADDI for EQ/NE comparison.
+ use_imm = (imm == 0);
+ } else {
+ // Constants that cannot be embedded in an instruction's 12-bit immediate shall be
+ // materialized with an `HParallelMove`. This simplifies the code and avoids cases
+ // with arithmetic overflow. Adjust the `imm` if needed for a particular instruction.
+ switch (instruction->GetCondition()) {
+ case kCondEQ:
+ case kCondNE:
+ imm = -imm; // ADDI with negative immediate (there is no SUBI).
+ break;
+ case kCondLE:
+ case kCondGT:
+ case kCondBE:
+ case kCondA:
+ imm += 1; // SLTI/SLTIU with adjusted immediate (there is no SLEI/SLEIU).
+ break;
+ default:
+ break;
+ }
+ use_imm = IsInt<12>(imm);
+ }
+ }
+ if (use_imm) {
+ locations->SetInAt(1, Location::ConstantLocation(rhs));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
+ break;
+ }
+ }
+ if (!instruction->IsEmittedAtUseSite()) {
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::HandleCondition(HCondition* instruction) {
+ if (instruction->IsEmittedAtUseSite()) {
+ return;
+ }
+
+ DataType::Type type = instruction->InputAt(0)->GetType();
+ LocationSummary* locations = instruction->GetLocations();
+ switch (type) {
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateFpCondition(instruction->GetCondition(), instruction->IsGtBias(), type, locations);
+ return;
+ default:
+ // Integral types and reference equality.
+ GenerateIntLongCondition(instruction->GetCondition(), locations);
+ return;
+ }
+}
+
+void LocationsBuilderRISCV64::HandleShift(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsShl() ||
+ instruction->IsShr() ||
+ instruction->IsUShr() ||
+ instruction->IsRor());
+
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ DataType::Type type = instruction->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64: {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected shift type " << type;
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::HandleShift(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsShl() ||
+ instruction->IsShr() ||
+ instruction->IsUShr() ||
+ instruction->IsRor());
+ LocationSummary* locations = instruction->GetLocations();
+ DataType::Type type = instruction->GetType();
+
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64: {
+ XRegister rd = locations->Out().AsRegister<XRegister>();
+ XRegister rs1 = locations->InAt(0).AsRegister<XRegister>();
+ Location rs2_location = locations->InAt(1);
+
+ if (rs2_location.IsConstant()) {
+ int64_t imm = CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant());
+ uint32_t shamt =
+ imm & (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance);
+
+ if (shamt == 0) {
+ if (rd != rs1) {
+ __ Mv(rd, rs1);
+ }
+ } else if (type == DataType::Type::kInt32) {
+ if (instruction->IsShl()) {
+ __ Slliw(rd, rs1, shamt);
+ } else if (instruction->IsShr()) {
+ __ Sraiw(rd, rs1, shamt);
+ } else if (instruction->IsUShr()) {
+ __ Srliw(rd, rs1, shamt);
+ } else {
+ DCHECK(instruction->IsRor());
+ __ Roriw(rd, rs1, shamt);
+ }
+ } else {
+ if (instruction->IsShl()) {
+ __ Slli(rd, rs1, shamt);
+ } else if (instruction->IsShr()) {
+ __ Srai(rd, rs1, shamt);
+ } else if (instruction->IsUShr()) {
+ __ Srli(rd, rs1, shamt);
+ } else {
+ DCHECK(instruction->IsRor());
+ __ Rori(rd, rs1, shamt);
+ }
+ }
+ } else {
+ XRegister rs2 = rs2_location.AsRegister<XRegister>();
+ if (type == DataType::Type::kInt32) {
+ if (instruction->IsShl()) {
+ __ Sllw(rd, rs1, rs2);
+ } else if (instruction->IsShr()) {
+ __ Sraw(rd, rs1, rs2);
+ } else if (instruction->IsUShr()) {
+ __ Srlw(rd, rs1, rs2);
+ } else {
+ DCHECK(instruction->IsRor());
+ __ Rorw(rd, rs1, rs2);
+ }
+ } else {
+ if (instruction->IsShl()) {
+ __ Sll(rd, rs1, rs2);
+ } else if (instruction->IsShr()) {
+ __ Sra(rd, rs1, rs2);
+ } else if (instruction->IsUShr()) {
+ __ Srl(rd, rs1, rs2);
+ } else {
+ DCHECK(instruction->IsRor());
+ __ Ror(rd, rs1, rs2);
+ }
+ }
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected shift operation type " << type;
+ }
+}
+
+void CodeGeneratorRISCV64::MarkGCCard(XRegister object,
+ XRegister value,
+ bool value_can_be_null) {
+ Riscv64Label done;
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister card = srs.AllocateXRegister();
+ XRegister temp = srs.AllocateXRegister();
+ if (value_can_be_null) {
+ __ Beqz(value, &done);
+ }
+ // Load the address of the card table into `card`.
+ __ Loadd(card, TR, Thread::CardTableOffset<kRiscv64PointerSize>().Int32Value());
+
+ // Calculate the address of the card corresponding to `object`.
+ __ Srli(temp, object, gc::accounting::CardTable::kCardShift);
+ __ Add(temp, card, temp);
+ // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
+ // `object`'s card.
+ //
+ // Register `card` contains the address of the card table. Note that the card
+ // table's base is biased during its creation so that it always starts at an
+ // address whose least-significant byte is equal to `kCardDirty` (see
+ // art::gc::accounting::CardTable::Create). Therefore the SB instruction
+ // below writes the `kCardDirty` (byte) value into the `object`'s card
+ // (located at `card + object >> kCardShift`).
+ //
+ // This dual use of the value in register `card` (1. to calculate the location
+ // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
+ // (no need to explicitly load `kCardDirty` as an immediate value).
+ __ Sb(card, temp, 0); // No scratch register left for `Storeb()`.
+ if (value_can_be_null) {
+ __ Bind(&done);
+ }
+}
+
+void LocationsBuilderRISCV64::HandleFieldSet(HInstruction* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, ValueLocationForStore(instruction->InputAt(1)));
+}
+
+void InstructionCodeGeneratorRISCV64::HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info,
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind) {
+ DataType::Type type = field_info.GetFieldType();
+ LocationSummary* locations = instruction->GetLocations();
+ XRegister obj = locations->InAt(0).AsRegister<XRegister>();
+ Location value = locations->InAt(1);
+ DCHECK_IMPLIES(value.IsConstant(), IsZeroBitPattern(value.GetConstant()));
+ bool is_volatile = field_info.IsVolatile();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+
+ if (is_volatile) {
+ StoreSeqCst(value, obj, offset, type, instruction);
+ } else {
+ Store(value, obj, offset, type);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+
+ if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1)) &&
+ write_barrier_kind != WriteBarrierKind::kDontEmit) {
+ codegen_->MarkGCCard(
+ obj,
+ value.AsRegister<XRegister>(),
+ value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck);
+ }
+}
+
+void LocationsBuilderRISCV64::HandleFieldGet(HInstruction* instruction) {
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+ bool object_field_get_with_read_barrier =
+ (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
+ instruction,
+ object_field_get_with_read_barrier
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall);
+
+ // Input for object receiver.
+ locations->SetInAt(0, Location::RequiresRegister());
+
+ if (DataType::IsFloatingPointType(instruction->GetType())) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ } else {
+ // The output overlaps for an object field get when read barriers
+ // are enabled: we do not want the load to overwrite the object's
+ // location, as we need it to emit the read barrier.
+ locations->SetOut(
+ Location::RequiresRegister(),
+ object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+ }
+
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::HandleFieldGet(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+ DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
+ DataType::Type type = instruction->GetType();
+ LocationSummary* locations = instruction->GetLocations();
+ Location obj_loc = locations->InAt(0);
+ XRegister obj = obj_loc.AsRegister<XRegister>();
+ Location dst_loc = locations->Out();
+ bool is_volatile = field_info.IsVolatile();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+
+ if (is_volatile) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+
+ if (type == DataType::Type::kReference && codegen_->EmitBakerReadBarrier()) {
+ // /* HeapReference<Object> */ dst = *(obj + offset)
+ Location temp_loc = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier call.
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ dst_loc,
+ obj,
+ offset,
+ temp_loc,
+ /* needs_null_check= */ true);
+ } else {
+ Load(dst_loc, obj, offset, type);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+
+ if (is_volatile) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+
+ if (type == DataType::Type::kReference && !codegen_->EmitBakerReadBarrier()) {
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset);
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateMethodEntryExitHook(HInstruction* instruction) {
+ SlowPathCodeRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathRISCV64(instruction);
+ codegen_->AddSlowPath(slow_path);
+
+ ScratchRegisterScope temps(GetAssembler());
+ XRegister tmp = temps.AllocateXRegister();
+
+ if (instruction->IsMethodExitHook()) {
+ // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
+ // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
+ // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
+ // disabled in debuggable runtime. The other bit is used when this method itself requires a
+ // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
+ __ Loadwu(tmp, SP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
+ __ Bnez(tmp, slow_path->GetEntryLabel());
+ }
+
+ uint64_t hook_offset = instruction->IsMethodExitHook() ?
+ instrumentation::Instrumentation::HaveMethodExitListenersOffset().SizeValue() :
+ instrumentation::Instrumentation::HaveMethodEntryListenersOffset().SizeValue();
+ auto [base_hook_address, hook_imm12] = SplitJitAddress(
+ reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()) + hook_offset);
+ __ LoadConst64(tmp, base_hook_address);
+ __ Lbu(tmp, tmp, hook_imm12);
+ // Check if there are any method entry / exit listeners. If no, continue.
+ __ Beqz(tmp, slow_path->GetExitLabel());
+ // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
+ // If yes, just take the slow path.
+ static_assert(instrumentation::Instrumentation::kFastTraceListeners == 1u);
+ __ Addi(tmp, tmp, -1);
+ __ Bnez(tmp, slow_path->GetEntryLabel());
+
+ // Check if there is place in the buffer to store a new entry, if no, take the slow path.
+ int32_t trace_buffer_index_offset =
+ Thread::TraceBufferIndexOffset<kRiscv64PointerSize>().Int32Value();
+ __ Loadd(tmp, TR, trace_buffer_index_offset);
+ __ Addi(tmp, tmp, -dchecked_integral_cast<int32_t>(kNumEntriesForWallClock));
+ __ Bltz(tmp, slow_path->GetEntryLabel());
+
+ // Update the index in the `Thread`.
+ __ Stored(tmp, TR, trace_buffer_index_offset);
+
+ // Allocate second core scratch register. We can no longer use `Stored()`
+ // and similar macro instructions because there is no core scratch register left.
+ XRegister tmp2 = temps.AllocateXRegister();
+
+ // Calculate the entry address in the buffer.
+ // /*addr*/ tmp = TR->GetMethodTraceBuffer() + sizeof(void*) * /*index*/ tmp;
+ __ Loadd(tmp2, TR, Thread::TraceBufferPtrOffset<kRiscv64PointerSize>().SizeValue());
+ __ Sh3Add(tmp, tmp, tmp2);
+
+ // Record method pointer and trace action.
+ __ Ld(tmp2, SP, 0);
+ // Use last two bits to encode trace method action. For MethodEntry it is 0
+ // so no need to set the bits since they are 0 already.
+ DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
+ if (instruction->IsMethodExitHook()) {
+ __ Ori(tmp2, tmp2, enum_cast<int32_t>(TraceAction::kTraceMethodExit));
+ }
+ static_assert(IsInt<12>(kMethodOffsetInBytes)); // No free scratch register for `Stored()`.
+ __ Sd(tmp2, tmp, kMethodOffsetInBytes);
+
+ // Record the timestamp.
+ __ RdTime(tmp2);
+ static_assert(IsInt<12>(kTimestampOffsetInBytes)); // No free scratch register for `Stored()`.
+ __ Sd(tmp2, tmp, kTimestampOffsetInBytes);
+
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void LocationsBuilderRISCV64::VisitAbove(HAbove* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitAbove(HAbove* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitAboveOrEqual(HAboveOrEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitAboveOrEqual(HAboveOrEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = abs->GetLocations();
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32: {
+ XRegister in = locations->InAt(0).AsRegister<XRegister>();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ __ Sraiw(tmp, in, 31);
+ __ Xor(out, in, tmp);
+ __ Subw(out, out, tmp);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ XRegister in = locations->InAt(0).AsRegister<XRegister>();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ __ Srai(tmp, in, 63);
+ __ Xor(out, in, tmp);
+ __ Sub(out, out, tmp);
+ break;
+ }
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ FAbs(locations->Out().AsFpuRegister<FRegister>(),
+ locations->InAt(0).AsFpuRegister<FRegister>(),
+ abs->GetResultType());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+ }
+}
+
+void LocationsBuilderRISCV64::VisitAdd(HAdd* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitAdd(HAdd* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitAnd(HAnd* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitAnd(HAnd* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitArrayGet(HArrayGet* instruction) {
+ DataType::Type type = instruction->GetType();
+ bool object_array_get_with_read_barrier =
+ (type == DataType::Type::kReference) && codegen_->EmitReadBarrier();
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(instruction,
+ object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ if (DataType::IsFloatingPointType(type)) {
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ } else {
+ // The output overlaps in the case of an object array get with
+ // read barriers enabled: we do not want the move to overwrite the
+ // array's location, as we need it to emit the read barrier.
+ locations->SetOut(
+ Location::RequiresRegister(),
+ object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+ }
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorRISCV64::GenerateArrayLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitArrayGet(HArrayGet* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location obj_loc = locations->InAt(0);
+ XRegister obj = obj_loc.AsRegister<XRegister>();
+ Location out_loc = locations->Out();
+ Location index = locations->InAt(1);
+ uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+ DataType::Type type = instruction->GetType();
+ const bool maybe_compressed_char_at =
+ mirror::kUseStringCompression && instruction->IsStringCharAt();
+
+ Riscv64Label string_char_at_done;
+ if (maybe_compressed_char_at) {
+ DCHECK_EQ(type, DataType::Type::kUint16);
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ Riscv64Label uncompressed_load;
+ {
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ __ Loadw(tmp, obj, count_offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ Andi(tmp, tmp, 0x1);
+ static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+ "Expecting 0=compressed, 1=uncompressed");
+ __ Bnez(tmp, &uncompressed_load);
+ }
+ XRegister out = out_loc.AsRegister<XRegister>();
+ if (index.IsConstant()) {
+ int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+ __ Loadbu(out, obj, data_offset + const_index);
+ } else {
+ __ Add(out, obj, index.AsRegister<XRegister>());
+ __ Loadbu(out, out, data_offset);
+ }
+ __ J(&string_char_at_done);
+ __ Bind(&uncompressed_load);
+ }
+
+ if (type == DataType::Type::kReference && codegen_->EmitBakerReadBarrier()) {
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ // /* HeapReference<Object> */ out =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ // Note that a potential implicit null check could be handled in these
+ // `CodeGeneratorRISCV64::Generate{Array,Field}LoadWithBakerReadBarrier()` calls
+ // but we currently do not support implicit null checks on `HArrayGet`.
+ DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
+ Location temp = locations->GetTemp(0);
+ if (index.IsConstant()) {
+ // Array load with a constant index can be treated as a field load.
+ static constexpr size_t shift = DataType::SizeShift(DataType::Type::kReference);
+ size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << shift) + data_offset;
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ offset,
+ temp,
+ /* needs_null_check= */ false);
+ } else {
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ data_offset,
+ index,
+ temp,
+ /* needs_null_check= */ false);
+ }
+ } else if (index.IsConstant()) {
+ int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+ int32_t offset = data_offset + (const_index << DataType::SizeShift(type));
+ Load(out_loc, obj, offset, type);
+ if (!maybe_compressed_char_at) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+ if (type == DataType::Type::kReference) {
+ DCHECK(!codegen_->EmitBakerReadBarrier());
+ // If read barriers are enabled, emit read barriers other than Baker's using
+ // a slow path (and also unpoison the loaded reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+ }
+ } else {
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ ShNAdd(tmp, index.AsRegister<XRegister>(), obj, type);
+ Load(out_loc, tmp, data_offset, type);
+ if (!maybe_compressed_char_at) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+ if (type == DataType::Type::kReference) {
+ DCHECK(!codegen_->EmitBakerReadBarrier());
+ // If read barriers are enabled, emit read barriers other than Baker's using
+ // a slow path (and also unpoison the loaded reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(
+ instruction, out_loc, out_loc, obj_loc, data_offset, index);
+ }
+ }
+
+ if (maybe_compressed_char_at) {
+ __ Bind(&string_char_at_done);
+ }
+}
+
+void LocationsBuilderRISCV64::VisitArrayLength(HArrayLength* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitArrayLength(HArrayLength* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
+ XRegister obj = locations->InAt(0).AsRegister<XRegister>();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ __ Loadwu(out, obj, offset); // Unsigned for string length; does not matter for other arrays.
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Mask out compression flag from String's array length.
+ if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+ __ Srli(out, out, 1u);
+ }
+}
+
+void LocationsBuilderRISCV64::VisitArraySet(HArraySet* instruction) {
+ bool needs_type_check = instruction->NeedsTypeCheck();
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
+ instruction,
+ needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ locations->SetInAt(2, ValueLocationForStore(instruction->GetValue()));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitArraySet(HArraySet* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XRegister array = locations->InAt(0).AsRegister<XRegister>();
+ Location index = locations->InAt(1);
+ Location value = locations->InAt(2);
+ DataType::Type value_type = instruction->GetComponentType();
+ bool needs_type_check = instruction->NeedsTypeCheck();
+ bool needs_write_barrier =
+ CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+ size_t data_offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
+ SlowPathCodeRISCV64* slow_path = nullptr;
+
+ if (needs_write_barrier) {
+ DCHECK_EQ(value_type, DataType::Type::kReference);
+ DCHECK(!value.IsConstant());
+ Riscv64Label do_store;
+
+ bool can_value_be_null = instruction->GetValueCanBeNull();
+ if (can_value_be_null) {
+ __ Beqz(value.AsRegister<XRegister>(), &do_store);
+ }
+
+ if (needs_type_check) {
+ slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathRISCV64(instruction);
+ codegen_->AddSlowPath(slow_path);
+
+ uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+ uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister temp1 = srs.AllocateXRegister();
+ XRegister temp2 = srs.AllocateXRegister();
+
+ // Note that when read barriers are enabled, the type checks are performed
+ // without read barriers. This is fine, even in the case where a class object
+ // is in the from-space after the flip, as a comparison involving such a type
+ // would not produce a false positive; it may of course produce a false
+ // negative, in which case we would take the ArraySet slow path.
+
+ // /* HeapReference<Class> */ temp1 = array->klass_
+ __ Loadwu(temp1, array, class_offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ codegen_->MaybeUnpoisonHeapReference(temp1);
+
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ __ Loadwu(temp2, temp1, component_offset);
+ // /* HeapReference<Class> */ temp1 = value->klass_
+ __ Loadwu(temp1, value.AsRegister<XRegister>(), class_offset);
+ // If heap poisoning is enabled, no need to unpoison `temp1`
+ // nor `temp2`, as we are comparing two poisoned references.
+ if (instruction->StaticTypeOfArrayIsObjectArray()) {
+ Riscv64Label do_put;
+ __ Beq(temp1, temp2, &do_put);
+ // If heap poisoning is enabled, the `temp2` reference has
+ // not been unpoisoned yet; unpoison it now.
+ codegen_->MaybeUnpoisonHeapReference(temp2);
+
+ // /* HeapReference<Class> */ temp1 = temp2->super_class_
+ __ Loadwu(temp1, temp2, super_offset);
+ // If heap poisoning is enabled, no need to unpoison
+ // `temp1`, as we are comparing against null below.
+ __ Bnez(temp1, slow_path->GetEntryLabel());
+ __ Bind(&do_put);
+ } else {
+ __ Bne(temp1, temp2, slow_path->GetEntryLabel());
+ }
+ }
+
+ if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
+ DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck)
+ << " Already null checked so we shouldn't do it again.";
+ codegen_->MarkGCCard(array, value.AsRegister<XRegister>(), /* value_can_be_null= */ false);
+ }
+
+ if (can_value_be_null) {
+ __ Bind(&do_store);
+ }
+ }
+
+ if (index.IsConstant()) {
+ int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+ int32_t offset = data_offset + (const_index << DataType::SizeShift(value_type));
+ Store(value, array, offset, value_type);
+ } else {
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ ShNAdd(tmp, index.AsRegister<XRegister>(), array, value_type);
+ Store(value, tmp, data_offset, value_type);
+ }
+ // There must be no instructions between the `Store()` and the `MaybeRecordImplicitNullCheck()`.
+ // We can avoid this if the type check makes the null check unconditionally.
+ DCHECK_IMPLIES(needs_type_check, needs_write_barrier);
+ if (!(needs_type_check && !instruction->GetValueCanBeNull())) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+void LocationsBuilderRISCV64::VisitBelow(HBelow* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitBelow(HBelow* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitBelowOrEqual(HBelowOrEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitBelowOrEqual(HBelowOrEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitBooleanNot(HBooleanNot* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitBooleanNot(HBooleanNot* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ __ Xori(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>(), 1);
+}
+
+void LocationsBuilderRISCV64::VisitBoundsCheck(HBoundsCheck* instruction) {
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
+
+ HInstruction* index = instruction->InputAt(0);
+ HInstruction* length = instruction->InputAt(1);
+
+ bool const_index = false;
+ bool const_length = false;
+
+ if (length->IsConstant()) {
+ if (index->IsConstant()) {
+ const_index = true;
+ const_length = true;
+ } else {
+ int32_t length_value = length->AsIntConstant()->GetValue();
+ if (length_value == 0 || length_value == 1) {
+ const_length = true;
+ }
+ }
+ } else if (index->IsConstant()) {
+ int32_t index_value = index->AsIntConstant()->GetValue();
+ if (index_value <= 0) {
+ const_index = true;
+ }
+ }
+
+ locations->SetInAt(
+ 0,
+ const_index ? Location::ConstantLocation(index) : Location::RequiresRegister());
+ locations->SetInAt(
+ 1,
+ const_length ? Location::ConstantLocation(length) : Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitBoundsCheck(HBoundsCheck* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location index_loc = locations->InAt(0);
+ Location length_loc = locations->InAt(1);
+
+ if (length_loc.IsConstant()) {
+ int32_t length = length_loc.GetConstant()->AsIntConstant()->GetValue();
+ if (index_loc.IsConstant()) {
+ int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue();
+ if (index < 0 || index >= length) {
+ BoundsCheckSlowPathRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathRISCV64(instruction);
+ codegen_->AddSlowPath(slow_path);
+ __ J(slow_path->GetEntryLabel());
+ } else {
+ // Nothing to be done.
+ }
+ return;
+ }
+
+ BoundsCheckSlowPathRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathRISCV64(instruction);
+ codegen_->AddSlowPath(slow_path);
+ XRegister index = index_loc.AsRegister<XRegister>();
+ if (length == 0) {
+ __ J(slow_path->GetEntryLabel());
+ } else {
+ DCHECK_EQ(length, 1);
+ __ Bnez(index, slow_path->GetEntryLabel());
+ }
+ } else {
+ XRegister length = length_loc.AsRegister<XRegister>();
+ BoundsCheckSlowPathRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathRISCV64(instruction);
+ codegen_->AddSlowPath(slow_path);
+ if (index_loc.IsConstant()) {
+ int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue();
+ if (index < 0) {
+ __ J(slow_path->GetEntryLabel());
+ } else {
+ DCHECK_EQ(index, 0);
+ __ Blez(length, slow_path->GetEntryLabel());
+ }
+ } else {
+ XRegister index = index_loc.AsRegister<XRegister>();
+ __ Bgeu(index, length, slow_path->GetEntryLabel());
+ }
+ }
+}
+
+void LocationsBuilderRISCV64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ LOG(FATAL) << "Unreachable";
+}
+
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
+ if (emit_read_barrier &&
+ (kUseBakerReadBarrier ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ return 1;
+ }
+ return 0;
+}
+
+// Interface case has 3 temps, one for holding the number of interfaces, one for the current
+// interface pointer, one for loading the current interface.
+// The other checks have one temp for loading the object's class and maybe a temp for read barrier.
+static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
+ if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+ return 3;
+ }
+ return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
+}
+
+void LocationsBuilderRISCV64::VisitCheckCast(HCheckCast* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
+ locations->SetInAt(0, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
+ locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitCheckCast(HCheckCast* instruction) {
+TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ LocationSummary* locations = instruction->GetLocations();
+ Location obj_loc = locations->InAt(0);
+ XRegister obj = obj_loc.AsRegister<XRegister>();
+ Location cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+ ? Location::NoLocation()
+ : locations->InAt(1);
+ Location temp_loc = locations->GetTemp(0);
+ XRegister temp = temp_loc.AsRegister<XRegister>();
+ const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
+ DCHECK_GE(num_temps, 1u);
+ DCHECK_LE(num_temps, 3u);
+ Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
+ Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
+ const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+ const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+ const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+ const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+ const uint32_t object_array_data_offset =
+ mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+ Riscv64Label done;
+
+ bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
+ SlowPathCodeRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) TypeCheckSlowPathRISCV64(
+ instruction, is_type_check_slow_path_fatal);
+ codegen_->AddSlowPath(slow_path);
+
+ // Avoid this check if we know `obj` is not null.
+ if (instruction->MustDoNullCheck()) {
+ __ Beqz(obj, &done);
+ }
+
+ switch (type_check_kind) {
+ case TypeCheckKind::kExactCheck:
+ case TypeCheckKind::kArrayCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // Jump to slow path for throwing the exception or doing a
+ // more involved array check.
+ __ Bne(temp, cls.AsRegister<XRegister>(), slow_path->GetEntryLabel());
+ break;
+ }
+
+ case TypeCheckKind::kAbstractClassCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // If the class is abstract, we eagerly fetch the super class of the
+ // object to avoid doing a comparison we know will fail.
+ Riscv64Label loop;
+ __ Bind(&loop);
+ // /* HeapReference<Class> */ temp = temp->super_class_
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ super_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // If the class reference currently in `temp` is null, jump to the slow path to throw the
+ // exception.
+ __ Beqz(temp, slow_path->GetEntryLabel());
+ // Otherwise, compare the classes.
+ __ Bne(temp, cls.AsRegister<XRegister>(), &loop);
+ break;
+ }
+
+ case TypeCheckKind::kClassHierarchyCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // Walk over the class hierarchy to find a match.
+ Riscv64Label loop;
+ __ Bind(&loop);
+ __ Beq(temp, cls.AsRegister<XRegister>(), &done);
+ // /* HeapReference<Class> */ temp = temp->super_class_
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ super_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // If the class reference currently in `temp` is null, jump to the slow path to throw the
+ // exception. Otherwise, jump to the beginning of the loop.
+ __ Bnez(temp, &loop);
+ __ J(slow_path->GetEntryLabel());
+ break;
+ }
+
+ case TypeCheckKind::kArrayObjectCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // Do an exact check.
+ __ Beq(temp, cls.AsRegister<XRegister>(), &done);
+ // Otherwise, we need to check that the object's class is a non-primitive array.
+ // /* HeapReference<Class> */ temp = temp->component_type_
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ component_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // If the component type is null, jump to the slow path to throw the exception.
+ __ Beqz(temp, slow_path->GetEntryLabel());
+ // Otherwise, the object is indeed an array, further check that this component
+ // type is not a primitive type.
+ __ Loadhu(temp, temp, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Bnez(temp, slow_path->GetEntryLabel());
+ break;
+ }
+
+ case TypeCheckKind::kUnresolvedCheck:
+ // We always go into the type check slow path for the unresolved check case.
+ // We cannot directly call the CheckCast runtime entry point
+ // without resorting to a type checking slow path here (i.e. by
+ // calling InvokeRuntime directly), as it would require to
+ // assign fixed registers for the inputs of this HInstanceOf
+ // instruction (following the runtime calling convention), which
+ // might be cluttered by the potential first read barrier
+ // emission at the beginning of this method.
+ __ J(slow_path->GetEntryLabel());
+ break;
+
+ case TypeCheckKind::kInterfaceCheck: {
+ // Avoid read barriers to improve performance of the fast path. We can not get false
+ // positives by doing this. False negatives are handled by the slow path.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ // /* HeapReference<Class> */ temp = temp->iftable_
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ iftable_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+ XRegister temp2 = maybe_temp2_loc.AsRegister<XRegister>();
+ XRegister temp3 = maybe_temp3_loc.AsRegister<XRegister>();
+ // Iftable is never null.
+ __ Loadw(temp2, temp, array_length_offset);
+ // Loop through the iftable and check if any class matches.
+ Riscv64Label loop;
+ __ Bind(&loop);
+ __ Beqz(temp2, slow_path->GetEntryLabel());
+ __ Lwu(temp3, temp, object_array_data_offset);
+ codegen_->MaybeUnpoisonHeapReference(temp3);
+ // Go to next interface.
+ __ Addi(temp, temp, 2 * kHeapReferenceSize);
+ __ Addi(temp2, temp2, -2);
+ // Compare the classes and continue the loop if they do not match.
+ __ Bne(temp3, cls.AsRegister<XRegister>(), &loop);
+ break;
+ }
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp);
+ __ Bnez(temp, slow_path->GetEntryLabel());
+ break;
+ }
+ }
+
+ __ Bind(&done);
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void LocationsBuilderRISCV64::VisitClassTableGet(HClassTableGet* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitClassTableGet(HClassTableGet* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XRegister in = locations->InAt(0).AsRegister<XRegister>();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
+ MemberOffset method_offset =
+ mirror::Class::EmbeddedVTableEntryOffset(instruction->GetIndex(), kRiscv64PointerSize);
+ __ Loadd(out, in, method_offset.SizeValue());
+ } else {
+ uint32_t method_offset = dchecked_integral_cast<uint32_t>(
+ ImTable::OffsetOfElement(instruction->GetIndex(), kRiscv64PointerSize));
+ __ Loadd(out, in, mirror::Class::ImtPtrOffset(kRiscv64PointerSize).Uint32Value());
+ __ Loadd(out, out, method_offset);
+ }
+}
+
+static int32_t GetExceptionTlsOffset() {
+ return Thread::ExceptionOffset<kRiscv64PointerSize>().Int32Value();
+}
+
+void LocationsBuilderRISCV64::VisitClearException(HClearException* instruction) {
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitClearException(
+ [[maybe_unused]] HClearException* instruction) {
+ __ Stored(Zero, TR, GetExceptionTlsOffset());
+}
+
+void LocationsBuilderRISCV64::VisitClinitCheck(HClinitCheck* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
+ instruction, LocationSummary::kCallOnSlowPath);
+ locations->SetInAt(0, Location::RequiresRegister());
+ if (instruction->HasUses()) {
+ locations->SetOut(Location::SameAsFirstInput());
+ }
+ // Rely on the type initialization to save everything we need.
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitClinitCheck(HClinitCheck* instruction) {
+ // We assume the class is not null.
+ SlowPathCodeRISCV64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathRISCV64(
+ instruction->GetLoadClass(), instruction);
+ codegen_->AddSlowPath(slow_path);
+ GenerateClassInitializationCheck(slow_path,
+ instruction->GetLocations()->InAt(0).AsRegister<XRegister>());
+}
+
+void LocationsBuilderRISCV64::VisitCompare(HCompare* instruction) {
+ DataType::Type in_type = instruction->InputAt(0)->GetType();
+
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+
+ switch (in_type) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, RegisterOrZeroBitPatternLocation(instruction->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type for compare operation " << in_type;
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitCompare(HCompare* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XRegister result = locations->Out().AsRegister<XRegister>();
+ DataType::Type in_type = instruction->InputAt(0)->GetType();
+
+ // 0 if: left == right
+ // 1 if: left > right
+ // -1 if: left < right
+ switch (in_type) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64: {
+ XRegister left = locations->InAt(0).AsRegister<XRegister>();
+ XRegister right = InputXRegisterOrZero(locations->InAt(1));
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ __ Slt(tmp, left, right);
+ __ Slt(result, right, left);
+ __ Sub(result, result, tmp);
+ break;
+ }
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64: {
+ FRegister left = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister right = locations->InAt(1).AsFpuRegister<FRegister>();
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ if (instruction->IsGtBias()) {
+ // ((FLE l,r) ^ 1) - (FLT l,r); see `GenerateFpCondition()`.
+ FLe(tmp, left, right, in_type);
+ FLt(result, left, right, in_type);
+ __ Xori(tmp, tmp, 1);
+ __ Sub(result, tmp, result);
+ } else {
+ // ((FLE r,l) - 1) + (FLT r,l); see `GenerateFpCondition()`.
+ FLe(tmp, right, left, in_type);
+ FLt(result, right, left, in_type);
+ __ Addi(tmp, tmp, -1);
+ __ Add(result, result, tmp);
+ }
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unimplemented compare type " << in_type;
+ }
+}
+
+void LocationsBuilderRISCV64::VisitConstructorFence(HConstructorFence* instruction) {
+ instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitConstructorFence(
+ [[maybe_unused]] HConstructorFence* instruction) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
+void LocationsBuilderRISCV64::VisitCurrentMethod(HCurrentMethod* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetOut(Location::RegisterLocation(kArtMethodRegister));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitCurrentMethod(
+ [[maybe_unused]] HCurrentMethod* instruction) {
+ // Nothing to do, the method is already at its location.
+}
+
+void LocationsBuilderRISCV64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitShouldDeoptimizeFlag(
+ HShouldDeoptimizeFlag* instruction) {
+ __ Loadw(instruction->GetLocations()->Out().AsRegister<XRegister>(),
+ SP,
+ codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
+}
+
+void LocationsBuilderRISCV64::VisitDeoptimize(HDeoptimize* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ if (IsBooleanValueOrMaterializedCondition(instruction->InputAt(0))) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitDeoptimize(HDeoptimize* instruction) {
+ SlowPathCodeRISCV64* slow_path =
+ deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathRISCV64>(instruction);
+ GenerateTestAndBranch(instruction,
+ /* condition_input_index= */ 0,
+ slow_path->GetEntryLabel(),
+ /* false_target= */ nullptr);
+}
+
+void LocationsBuilderRISCV64::VisitDiv(HDiv* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ switch (instruction->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected div type " << instruction->GetResultType();
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitDiv(HDiv* instruction) {
+ DataType::Type type = instruction->GetType();
+ LocationSummary* locations = instruction->GetLocations();
+
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateDivRemIntegral(instruction);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64: {
+ FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+ FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
+ FDiv(dst, lhs, rhs, type);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected div type " << type;
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderRISCV64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
+ locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+ SlowPathCodeRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathRISCV64(instruction);
+ codegen_->AddSlowPath(slow_path);
+ Location value = instruction->GetLocations()->InAt(0);
+
+ DataType::Type type = instruction->GetType();
+
+ if (!DataType::IsIntegralType(type)) {
+ LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
+ UNREACHABLE();
+ }
+
+ if (value.IsConstant()) {
+ int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant());
+ if (divisor == 0) {
+ __ J(slow_path->GetEntryLabel());
+ } else {
+ // A division by a non-null constant is valid. We don't need to perform
+ // any check, so simply fall through.
+ }
+ } else {
+ __ Beqz(value.AsRegister<XRegister>(), slow_path->GetEntryLabel());
+ }
+}
+
+void LocationsBuilderRISCV64::VisitDoubleConstant(HDoubleConstant* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(instruction));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitDoubleConstant(
+ [[maybe_unused]] HDoubleConstant* instruction) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderRISCV64::VisitEqual(HEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitEqual(HEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitExit(HExit* instruction) {
+ instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitExit([[maybe_unused]] HExit* instruction) {}
+
+void LocationsBuilderRISCV64::VisitFloatConstant(HFloatConstant* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(instruction));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitFloatConstant(
+ [[maybe_unused]] HFloatConstant* instruction) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderRISCV64::VisitGoto(HGoto* instruction) {
+ instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitGoto(HGoto* instruction) {
+ HandleGoto(instruction, instruction->GetSuccessor());
+}
+
+void LocationsBuilderRISCV64::VisitGreaterThan(HGreaterThan* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitGreaterThan(HGreaterThan* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitIf(HIf* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ if (IsBooleanValueOrMaterializedCondition(instruction->InputAt(0))) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ if (GetGraph()->IsCompilingBaseline() &&
+ codegen_->GetCompilerOptions().ProfileBranches() &&
+ !Runtime::Current()->IsAotCompiler()) {
+ DCHECK(instruction->InputAt(0)->IsCondition());
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ BranchCache* cache = info->GetBranchCache(instruction->GetDexPc());
+ if (cache != nullptr) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ }
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitIf(HIf* instruction) {
+ HBasicBlock* true_successor = instruction->IfTrueSuccessor();
+ HBasicBlock* false_successor = instruction->IfFalseSuccessor();
+ Riscv64Label* true_target = codegen_->GoesToNextBlock(instruction->GetBlock(), true_successor)
+ ? nullptr
+ : codegen_->GetLabelOf(true_successor);
+ Riscv64Label* false_target = codegen_->GoesToNextBlock(instruction->GetBlock(), false_successor)
+ ? nullptr
+ : codegen_->GetLabelOf(false_successor);
+ if (IsBooleanValueOrMaterializedCondition(instruction->InputAt(0))) {
+ if (GetGraph()->IsCompilingBaseline() &&
+ codegen_->GetCompilerOptions().ProfileBranches() &&
+ !Runtime::Current()->IsAotCompiler()) {
+ DCHECK(instruction->InputAt(0)->IsCondition());
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ BranchCache* cache = info->GetBranchCache(instruction->GetDexPc());
+ // Currently, not all If branches are profiled.
+ if (cache != nullptr) {
+ uint64_t address =
+ reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
+ static_assert(
+ BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
+ "Unexpected offsets for BranchCache");
+ Riscv64Label done;
+ XRegister condition = instruction->GetLocations()->InAt(0).AsRegister<XRegister>();
+ XRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<XRegister>();
+ __ LoadConst64(temp, address);
+ __ Sh1Add(temp, condition, temp);
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister counter = srs.AllocateXRegister();
+ __ Loadhu(counter, temp, 0);
+ __ Addi(counter, counter, 1);
+ {
+ ScratchRegisterScope srs2(GetAssembler());
+ XRegister overflow = srs2.AllocateXRegister();
+ __ Srli(overflow, counter, 16);
+ __ Bnez(overflow, &done);
+ }
+ __ Storeh(counter, temp, 0);
+ __ Bind(&done);
+ }
+ }
+ }
+ GenerateTestAndBranch(instruction, /* condition_input_index= */ 0, true_target, false_target);
+}
+
+void LocationsBuilderRISCV64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+ HandleFieldGet(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderRISCV64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+ HandleFieldSet(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
+}
+
+void LocationsBuilderRISCV64::VisitInstanceOf(HInstanceOf* instruction) {
+ LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
+ switch (type_check_kind) {
+ case TypeCheckKind::kExactCheck:
+ case TypeCheckKind::kAbstractClassCheck:
+ case TypeCheckKind::kClassHierarchyCheck:
+ case TypeCheckKind::kArrayObjectCheck: {
+ bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
+ call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
+ break;
+ }
+ case TypeCheckKind::kArrayCheck:
+ case TypeCheckKind::kUnresolvedCheck:
+ case TypeCheckKind::kInterfaceCheck:
+ call_kind = LocationSummary::kCallOnSlowPath;
+ break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
+ }
+
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
+ locations->SetInAt(0, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
+ // The output does overlap inputs.
+ // Note that TypeCheckSlowPathRISCV64 uses this register too.
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ locations->AddRegisterTemps(
+ NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInstanceOf(HInstanceOf* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ LocationSummary* locations = instruction->GetLocations();
+ Location obj_loc = locations->InAt(0);
+ XRegister obj = obj_loc.AsRegister<XRegister>();
+ Location cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+ ? Location::NoLocation()
+ : locations->InAt(1);
+ Location out_loc = locations->Out();
+ XRegister out = out_loc.AsRegister<XRegister>();
+ const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
+ DCHECK_LE(num_temps, 1u);
+ Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
+ uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+ uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+ uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ Riscv64Label done;
+ SlowPathCodeRISCV64* slow_path = nullptr;
+
+ // Return 0 if `obj` is null.
+ // Avoid this check if we know `obj` is not null.
+ if (instruction->MustDoNullCheck()) {
+ __ Mv(out, Zero);
+ __ Beqz(obj, &done);
+ }
+
+ switch (type_check_kind) {
+ case TypeCheckKind::kExactCheck: {
+ ReadBarrierOption read_barrier_option =
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(
+ instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, read_barrier_option);
+ // Classes must be equal for the instanceof to succeed.
+ __ Xor(out, out, cls.AsRegister<XRegister>());
+ __ Seqz(out, out);
+ break;
+ }
+
+ case TypeCheckKind::kAbstractClassCheck: {
+ ReadBarrierOption read_barrier_option =
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(
+ instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, read_barrier_option);
+ // If the class is abstract, we eagerly fetch the super class of the
+ // object to avoid doing a comparison we know will fail.
+ Riscv64Label loop;
+ __ Bind(&loop);
+ // /* HeapReference<Class> */ out = out->super_class_
+ GenerateReferenceLoadOneRegister(
+ instruction, out_loc, super_offset, maybe_temp_loc, read_barrier_option);
+ // If `out` is null, we use it for the result, and jump to `done`.
+ __ Beqz(out, &done);
+ __ Bne(out, cls.AsRegister<XRegister>(), &loop);
+ __ LoadConst32(out, 1);
+ break;
+ }
+
+ case TypeCheckKind::kClassHierarchyCheck: {
+ ReadBarrierOption read_barrier_option =
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(
+ instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, read_barrier_option);
+ // Walk over the class hierarchy to find a match.
+ Riscv64Label loop, success;
+ __ Bind(&loop);
+ __ Beq(out, cls.AsRegister<XRegister>(), &success);
+ // /* HeapReference<Class> */ out = out->super_class_
+ GenerateReferenceLoadOneRegister(
+ instruction, out_loc, super_offset, maybe_temp_loc, read_barrier_option);
+ __ Bnez(out, &loop);
+ // If `out` is null, we use it for the result, and jump to `done`.
+ __ J(&done);
+ __ Bind(&success);
+ __ LoadConst32(out, 1);
+ break;
+ }
+
+ case TypeCheckKind::kArrayObjectCheck: {
+ ReadBarrierOption read_barrier_option =
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
+ // FIXME(riscv64): We currently have marking entrypoints for 29 registers.
+ // We need to either store entrypoint for register `N` in entry `N-A` where
+ // `A` can be up to 5 (Zero, RA, SP, GP, TP are not valid registers for
+ // marking), or define two more entrypoints, or request an additional temp
+ // from the register allocator instead of using a scratch register.
+ ScratchRegisterScope srs(GetAssembler());
+ Location tmp = Location::RegisterLocation(srs.AllocateXRegister());
+ // /* HeapReference<Class> */ tmp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(
+ instruction, tmp, obj_loc, class_offset, maybe_temp_loc, read_barrier_option);
+ // Do an exact check.
+ __ LoadConst32(out, 1);
+ __ Beq(tmp.AsRegister<XRegister>(), cls.AsRegister<XRegister>(), &done);
+ // Otherwise, we need to check that the object's class is a non-primitive array.
+ // /* HeapReference<Class> */ out = out->component_type_
+ GenerateReferenceLoadTwoRegisters(
+ instruction, out_loc, tmp, component_offset, maybe_temp_loc, read_barrier_option);
+ // If `out` is null, we use it for the result, and jump to `done`.
+ __ Beqz(out, &done);
+ __ Loadhu(out, out, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Seqz(out, out);
+ break;
+ }
+
+ case TypeCheckKind::kArrayCheck: {
+ // No read barrier since the slow path will retry upon failure.
+ // /* HeapReference<Class> */ out = obj->klass_
+ GenerateReferenceLoadTwoRegisters(
+ instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, kWithoutReadBarrier);
+ DCHECK(locations->OnlyCallsOnSlowPath());
+ slow_path = new (codegen_->GetScopedAllocator())
+ TypeCheckSlowPathRISCV64(instruction, /* is_fatal= */ false);
+ codegen_->AddSlowPath(slow_path);
+ __ Bne(out, cls.AsRegister<XRegister>(), slow_path->GetEntryLabel());
+ __ LoadConst32(out, 1);
+ break;
+ }
+
+ case TypeCheckKind::kUnresolvedCheck:
+ case TypeCheckKind::kInterfaceCheck: {
+ // Note that we indeed only call on slow path, but we always go
+ // into the slow path for the unresolved and interface check
+ // cases.
+ //
+ // We cannot directly call the InstanceofNonTrivial runtime
+ // entry point without resorting to a type checking slow path
+ // here (i.e. by calling InvokeRuntime directly), as it would
+ // require to assign fixed registers for the inputs of this
+ // HInstanceOf instruction (following the runtime calling
+ // convention), which might be cluttered by the potential first
+ // read barrier emission at the beginning of this method.
+ //
+ // TODO: Introduce a new runtime entry point taking the object
+ // to test (instead of its class) as argument, and let it deal
+ // with the read barrier issues. This will let us refactor this
+ // case of the `switch` code as it was previously (with a direct
+ // call to the runtime not using a type checking slow path).
+ // This should also be beneficial for the other cases above.
+ DCHECK(locations->OnlyCallsOnSlowPath());
+ slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathRISCV64(
+ instruction, /* is_fatal= */ false);
+ codegen_->AddSlowPath(slow_path);
+ __ J(slow_path->GetEntryLabel());
+ break;
+ }
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(
+ instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out);
+ __ Beqz(out, out);
+ break;
+ }
+ }
+
+ __ Bind(&done);
+
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+void LocationsBuilderRISCV64::VisitIntConstant(HIntConstant* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetOut(Location::ConstantLocation(instruction));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitIntConstant([[maybe_unused]] HIntConstant* instruction) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderRISCV64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitInvokeUnresolved(HInvokeUnresolved* instruction) {
+ // The trampoline uses the same calling convention as dex calling conventions, except
+ // instead of loading arg0/A0 with the target Method*, arg0/A0 will contain the method_idx.
+ HandleInvoke(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokeUnresolved(HInvokeUnresolved* instruction) {
+ codegen_->GenerateInvokeUnresolvedRuntimeCall(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitInvokeInterface(HInvokeInterface* instruction) {
+ HandleInvoke(instruction);
+ // Use T0 as the hidden argument for `art_quick_imt_conflict_trampoline`.
+ if (instruction->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
+ instruction->GetLocations()->SetInAt(instruction->GetNumberOfArguments() - 1,
+ Location::RegisterLocation(T0));
+ } else {
+ instruction->GetLocations()->AddTemp(Location::RegisterLocation(T0));
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokeInterface(HInvokeInterface* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XRegister temp = locations->GetTemp(0).AsRegister<XRegister>();
+ XRegister receiver = locations->InAt(0).AsRegister<XRegister>();
+ int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kRiscv64PointerSize);
+
+ // /* HeapReference<Class> */ temp = receiver->klass_
+ __ Loadwu(temp, receiver, class_offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
+ codegen_->MaybeUnpoisonHeapReference(temp);
+
+ // If we're compiling baseline, update the inline cache.
+ codegen_->MaybeGenerateInlineCacheCheck(instruction, temp);
+
+ // The register T0 is required to be used for the hidden argument in
+ // `art_quick_imt_conflict_trampoline`.
+ if (instruction->GetHiddenArgumentLoadKind() != MethodLoadKind::kRecursive &&
+ instruction->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
+ Location hidden_reg = instruction->GetLocations()->GetTemp(1);
+ // Load the resolved interface method in the hidden argument register T0.
+ DCHECK_EQ(T0, hidden_reg.AsRegister<XRegister>());
+ codegen_->LoadMethod(instruction->GetHiddenArgumentLoadKind(), hidden_reg, instruction);
+ }
+
+ __ Loadd(temp, temp, mirror::Class::ImtPtrOffset(kRiscv64PointerSize).Uint32Value());
+ uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+ instruction->GetImtIndex(), kRiscv64PointerSize));
+ // temp = temp->GetImtEntryAt(method_offset);
+ __ Loadd(temp, temp, method_offset);
+ if (instruction->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
+ // We pass the method from the IMT in case of a conflict. This will ensure
+ // we go into the runtime to resolve the actual method.
+ Location hidden_reg = instruction->GetLocations()->GetTemp(1);
+ DCHECK_EQ(T0, hidden_reg.AsRegister<XRegister>());
+ __ Mv(hidden_reg.AsRegister<XRegister>(), temp);
+ }
+ // RA = temp->GetEntryPoint();
+ __ Loadd(RA, temp, entry_point.Int32Value());
+
+ // RA();
+ __ Jalr(RA);
+ DCHECK(!codegen_->IsLeafMethod());
+ codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+}
+
+void LocationsBuilderRISCV64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) {
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!instruction->IsStaticWithExplicitClinitCheck());
+
+ IntrinsicLocationsBuilderRISCV64 intrinsic(GetGraph()->GetAllocator(), codegen_);
+ if (intrinsic.TryDispatch(instruction)) {
+ return;
+ }
+
+ if (instruction->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
+ CriticalNativeCallingConventionVisitorRiscv64 calling_convention_visitor(
+ /*for_register_allocation=*/ true);
+ CodeGenerator::CreateCommonInvokeLocationSummary(instruction, &calling_convention_visitor);
+ } else {
+ HandleInvoke(instruction);
+ }
+}
+
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorRISCV64* codegen) {
+ if (invoke->GetLocations()->Intrinsified()) {
+ IntrinsicCodeGeneratorRISCV64 intrinsic(codegen);
+ intrinsic.Dispatch(invoke);
+ return true;
+ }
+ return false;
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokeStaticOrDirect(
+ HInvokeStaticOrDirect* instruction) {
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!instruction->IsStaticWithExplicitClinitCheck());
+
+ if (TryGenerateIntrinsicCode(instruction, codegen_)) {
+ return;
+ }
+
+ LocationSummary* locations = instruction->GetLocations();
+ codegen_->GenerateStaticOrDirectCall(
+ instruction, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
+}
+
+void LocationsBuilderRISCV64::VisitInvokeVirtual(HInvokeVirtual* instruction) {
+ IntrinsicLocationsBuilderRISCV64 intrinsic(GetGraph()->GetAllocator(), codegen_);
+ if (intrinsic.TryDispatch(instruction)) {
+ return;
+ }
+
+ HandleInvoke(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokeVirtual(HInvokeVirtual* instruction) {
+ if (TryGenerateIntrinsicCode(instruction, codegen_)) {
+ return;
+ }
+
+ codegen_->GenerateVirtualCall(instruction, instruction->GetLocations()->GetTemp(0));
+ DCHECK(!codegen_->IsLeafMethod());
+}
+
+void LocationsBuilderRISCV64::VisitInvokePolymorphic(HInvokePolymorphic* instruction) {
+ IntrinsicLocationsBuilderRISCV64 intrinsic(GetGraph()->GetAllocator(), codegen_);
+ if (intrinsic.TryDispatch(instruction)) {
+ return;
+ }
+ HandleInvoke(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokePolymorphic(HInvokePolymorphic* instruction) {
+ if (TryGenerateIntrinsicCode(instruction, codegen_)) {
+ return;
+ }
+ codegen_->GenerateInvokePolymorphicCall(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitInvokeCustom(HInvokeCustom* instruction) {
+ HandleInvoke(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokeCustom(HInvokeCustom* instruction) {
+ codegen_->GenerateInvokeCustomCall(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitLessThan(HLessThan* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLessThan(HLessThan* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitLessThanOrEqual(HLessThanOrEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLessThanOrEqual(HLessThanOrEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitLoadClass(HLoadClass* instruction) {
+ HLoadClass::LoadKind load_kind = instruction->GetLoadKind();
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
+ InvokeRuntimeCallingConvention calling_convention;
+ Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+ DCHECK_EQ(DataType::Type::kReference, instruction->GetType());
+ DCHECK(loc.Equals(calling_convention.GetReturnLocation(DataType::Type::kReference)));
+ CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(instruction, loc, loc);
+ return;
+ }
+ DCHECK_EQ(instruction->NeedsAccessCheck(),
+ load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPackage);
+
+ const bool requires_read_barrier = !instruction->IsInBootImage() && codegen_->EmitReadBarrier();
+ LocationSummary::CallKind call_kind = (instruction->NeedsEnvironment() || requires_read_barrier)
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall;
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
+ if (kUseBakerReadBarrier && requires_read_barrier && !instruction->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
+ if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+ locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
+ // For non-Baker read barriers we have a temp-clobbering call.
+ } else {
+ // Rely on the type resolution or initialization and marking to save everything we need.
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
+ }
+ }
+}
+
+// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
+// move.
+void InstructionCodeGeneratorRISCV64::VisitLoadClass(HLoadClass* instruction)
+ NO_THREAD_SAFETY_ANALYSIS {
+ HLoadClass::LoadKind load_kind = instruction->GetLoadKind();
+ if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
+ codegen_->GenerateLoadClassRuntimeCall(instruction);
+ return;
+ }
+ DCHECK_EQ(instruction->NeedsAccessCheck(),
+ load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPackage);
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location out_loc = locations->Out();
+ XRegister out = out_loc.AsRegister<XRegister>();
+ const ReadBarrierOption read_barrier_option =
+ instruction->IsInBootImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
+ bool generate_null_check = false;
+ switch (load_kind) {
+ case HLoadClass::LoadKind::kReferrersClass: {
+ DCHECK(!instruction->CanCallRuntime());
+ DCHECK(!instruction->MustGenerateClinitCheck());
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ XRegister current_method = locations->InAt(0).AsRegister<XRegister>();
+ codegen_->GenerateGcRootFieldLoad(instruction,
+ out_loc,
+ current_method,
+ ArtMethod::DeclaringClassOffset().Int32Value(),
+ read_barrier_option);
+ break;
+ }
+ case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+ DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
+ codegen_->GetCompilerOptions().IsBootImageExtension());
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_high =
+ codegen_->NewBootImageTypePatch(instruction->GetDexFile(), instruction->GetTypeIndex());
+ codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out);
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_low =
+ codegen_->NewBootImageTypePatch(
+ instruction->GetDexFile(), instruction->GetTypeIndex(), info_high);
+ codegen_->EmitPcRelativeAddiPlaceholder(info_low, out, out);
+ break;
+ }
+ case HLoadClass::LoadKind::kBootImageRelRo: {
+ DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(instruction);
+ codegen_->LoadBootImageRelRoEntry(out, boot_image_offset);
+ break;
+ }
+ case HLoadClass::LoadKind::kBssEntry:
+ case HLoadClass::LoadKind::kBssEntryPublic:
+ case HLoadClass::LoadKind::kBssEntryPackage: {
+ CodeGeneratorRISCV64::PcRelativePatchInfo* bss_info_high =
+ codegen_->NewTypeBssEntryPatch(instruction);
+ codegen_->EmitPcRelativeAuipcPlaceholder(bss_info_high, out);
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = codegen_->NewTypeBssEntryPatch(
+ instruction, bss_info_high);
+ codegen_->GenerateGcRootFieldLoad(instruction,
+ out_loc,
+ out,
+ /* offset= */ kLinkTimeOffsetPlaceholderLow,
+ read_barrier_option,
+ &info_low->label);
+ generate_null_check = true;
+ break;
+ }
+ case HLoadClass::LoadKind::kJitBootImageAddress: {
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
+ uint32_t address = reinterpret_cast32<uint32_t>(instruction->GetClass().Get());
+ DCHECK_NE(address, 0u);
+ __ Loadwu(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+ break;
+ }
+ case HLoadClass::LoadKind::kJitTableAddress:
+ __ Loadwu(out, codegen_->DeduplicateJitClassLiteral(instruction->GetDexFile(),
+ instruction->GetTypeIndex(),
+ instruction->GetClass()));
+ codegen_->GenerateGcRootFieldLoad(
+ instruction, out_loc, out, /* offset= */ 0, read_barrier_option);
+ break;
+ case HLoadClass::LoadKind::kRuntimeCall:
+ case HLoadClass::LoadKind::kInvalid:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
+ }
+
+ if (generate_null_check || instruction->MustGenerateClinitCheck()) {
+ DCHECK(instruction->CanCallRuntime());
+ SlowPathCodeRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) LoadClassSlowPathRISCV64(instruction, instruction);
+ codegen_->AddSlowPath(slow_path);
+ if (generate_null_check) {
+ __ Beqz(out, slow_path->GetEntryLabel());
+ }
+ if (instruction->MustGenerateClinitCheck()) {
+ GenerateClassInitializationCheck(slow_path, out);
+ } else {
+ __ Bind(slow_path->GetExitLabel());
+ }
+ }
+}
+
+void LocationsBuilderRISCV64::VisitLoadException(HLoadException* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLoadException(HLoadException* instruction) {
+ XRegister out = instruction->GetLocations()->Out().AsRegister<XRegister>();
+ __ Loadwu(out, TR, GetExceptionTlsOffset());
+}
+
+void LocationsBuilderRISCV64::VisitLoadMethodHandle(HLoadMethodHandle* instruction) {
+ InvokeRuntimeCallingConvention calling_convention;
+ Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(instruction, loc, loc);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLoadMethodHandle(HLoadMethodHandle* instruction) {
+ codegen_->GenerateLoadMethodHandleRuntimeCall(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitLoadMethodType(HLoadMethodType* instruction) {
+ InvokeRuntimeCallingConvention calling_convention;
+ Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(instruction, loc, loc);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLoadMethodType(HLoadMethodType* instruction) {
+ codegen_->GenerateLoadMethodTypeRuntimeCall(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitLoadString(HLoadString* instruction) {
+ HLoadString::LoadKind load_kind = instruction->GetLoadKind();
+ LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(instruction);
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
+ if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
+ InvokeRuntimeCallingConvention calling_convention;
+ DCHECK_EQ(DataType::Type::kReference, instruction->GetType());
+ locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
+ } else {
+ locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadString::LoadKind::kBssEntry) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
+ // For non-Baker read barriers we have a temp-clobbering call.
+ } else {
+ // Rely on the pResolveString and marking to save everything we need.
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
+ }
+ }
+ }
+}
+
+// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
+// move.
+void InstructionCodeGeneratorRISCV64::VisitLoadString(HLoadString* instruction)
+ NO_THREAD_SAFETY_ANALYSIS {
+ HLoadString::LoadKind load_kind = instruction->GetLoadKind();
+ LocationSummary* locations = instruction->GetLocations();
+ Location out_loc = locations->Out();
+ XRegister out = out_loc.AsRegister<XRegister>();
+
+ switch (load_kind) {
+ case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+ DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
+ codegen_->GetCompilerOptions().IsBootImageExtension());
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = codegen_->NewBootImageStringPatch(
+ instruction->GetDexFile(), instruction->GetStringIndex());
+ codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out);
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = codegen_->NewBootImageStringPatch(
+ instruction->GetDexFile(), instruction->GetStringIndex(), info_high);
+ codegen_->EmitPcRelativeAddiPlaceholder(info_low, out, out);
+ return;
+ }
+ case HLoadString::LoadKind::kBootImageRelRo: {
+ DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(instruction);
+ codegen_->LoadBootImageRelRoEntry(out, boot_image_offset);
+ return;
+ }
+ case HLoadString::LoadKind::kBssEntry: {
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = codegen_->NewStringBssEntryPatch(
+ instruction->GetDexFile(), instruction->GetStringIndex());
+ codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out);
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = codegen_->NewStringBssEntryPatch(
+ instruction->GetDexFile(), instruction->GetStringIndex(), info_high);
+ codegen_->GenerateGcRootFieldLoad(instruction,
+ out_loc,
+ out,
+ /* offset= */ kLinkTimeOffsetPlaceholderLow,
+ codegen_->GetCompilerReadBarrierOption(),
+ &info_low->label);
+ SlowPathCodeRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) LoadStringSlowPathRISCV64(instruction);
+ codegen_->AddSlowPath(slow_path);
+ __ Beqz(out, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
+ }
+ case HLoadString::LoadKind::kJitBootImageAddress: {
+ uint32_t address = reinterpret_cast32<uint32_t>(instruction->GetString().Get());
+ DCHECK_NE(address, 0u);
+ __ Loadwu(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+ return;
+ }
+ case HLoadString::LoadKind::kJitTableAddress:
+ __ Loadwu(
+ out,
+ codegen_->DeduplicateJitStringLiteral(
+ instruction->GetDexFile(), instruction->GetStringIndex(), instruction->GetString()));
+ codegen_->GenerateGcRootFieldLoad(
+ instruction, out_loc, out, 0, codegen_->GetCompilerReadBarrierOption());
+ return;
+ default:
+ break;
+ }
+
+ DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall);
+ InvokeRuntimeCallingConvention calling_convention;
+ DCHECK(calling_convention.GetReturnLocation(DataType::Type::kReference).Equals(out_loc));
+ __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetStringIndex().index_);
+ codegen_->InvokeRuntime(kQuickResolveString, instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+}
+
+void LocationsBuilderRISCV64::VisitLongConstant(HLongConstant* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetOut(Location::ConstantLocation(instruction));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLongConstant(
+ [[maybe_unused]] HLongConstant* instruction) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderRISCV64::VisitMax(HMax* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMax(HMax* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitMemoryBarrier(HMemoryBarrier* instruction) {
+ instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMemoryBarrier(HMemoryBarrier* instruction) {
+ codegen_->GenerateMemoryBarrier(instruction->GetBarrierKind());
+}
+
+void LocationsBuilderRISCV64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
+ DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
+ DCHECK(codegen_->RequiresCurrentMethod());
+ GenerateMethodEntryExitHook(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitMethodExitHook(HMethodExitHook* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ DataType::Type return_type = instruction->InputAt(0)->GetType();
+ locations->SetInAt(0, Riscv64ReturnLocation(return_type));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMethodExitHook(HMethodExitHook* instruction) {
+ DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
+ DCHECK(codegen_->RequiresCurrentMethod());
+ GenerateMethodEntryExitHook(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitMin(HMin* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMin(HMin* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitMonitorOperation(HMonitorOperation* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
+ instruction, LocationSummary::kCallOnMainOnly);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMonitorOperation(HMonitorOperation* instruction) {
+ codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
+ instruction,
+ instruction->GetDexPc());
+ if (instruction->IsEnter()) {
+ CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+ } else {
+ CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+ }
+}
+
+void LocationsBuilderRISCV64::VisitMul(HMul* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ switch (instruction->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected mul type " << instruction->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMul(HMul* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ switch (instruction->GetResultType()) {
+ case DataType::Type::kInt32:
+ __ Mulw(locations->Out().AsRegister<XRegister>(),
+ locations->InAt(0).AsRegister<XRegister>(),
+ locations->InAt(1).AsRegister<XRegister>());
+ break;
+
+ case DataType::Type::kInt64:
+ __ Mul(locations->Out().AsRegister<XRegister>(),
+ locations->InAt(0).AsRegister<XRegister>(),
+ locations->InAt(1).AsRegister<XRegister>());
+ break;
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ FMul(locations->Out().AsFpuRegister<FRegister>(),
+ locations->InAt(0).AsFpuRegister<FRegister>(),
+ locations->InAt(1).AsFpuRegister<FRegister>(),
+ instruction->GetResultType());
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected mul type " << instruction->GetResultType();
+ }
+}
+
+void LocationsBuilderRISCV64::VisitNeg(HNeg* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ switch (instruction->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected neg type " << instruction->GetResultType();
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNeg(HNeg* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ switch (instruction->GetResultType()) {
+ case DataType::Type::kInt32:
+ __ NegW(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+ break;
+
+ case DataType::Type::kInt64:
+ __ Neg(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+ break;
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ FNeg(locations->Out().AsFpuRegister<FRegister>(),
+ locations->InAt(0).AsFpuRegister<FRegister>(),
+ instruction->GetResultType());
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected neg type " << instruction->GetResultType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderRISCV64::VisitNewArray(HNewArray* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNewArray(HNewArray* instruction) {
+ QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
+ codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
+ DCHECK(!codegen_->IsLeafMethod());
+}
+
+void LocationsBuilderRISCV64::VisitNewInstance(HNewInstance* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
+ instruction, LocationSummary::kCallOnMainOnly);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNewInstance(HNewInstance* instruction) {
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+}
+
+void LocationsBuilderRISCV64::VisitNop(HNop* instruction) {
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNop([[maybe_unused]] HNop* instruction) {
+ // The environment recording already happened in CodeGenerator::Compile.
+}
+
+void LocationsBuilderRISCV64::VisitNot(HNot* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNot(HNot* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ switch (instruction->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ __ Not(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderRISCV64::VisitNotEqual(HNotEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNotEqual(HNotEqual* instruction) {
+ HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitNullConstant(HNullConstant* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetOut(Location::ConstantLocation(instruction));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNullConstant(
+ [[maybe_unused]] HNullConstant* instruction) {
+ // Will be generated at use site.
+}
+
+void LocationsBuilderRISCV64::VisitNullCheck(HNullCheck* instruction) {
+ LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
+ locations->SetInAt(0, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNullCheck(HNullCheck* instruction) {
+ codegen_->GenerateNullCheck(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitOr(HOr* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitOr(HOr* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitPackedSwitch(HPackedSwitch* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+ locations->SetInAt(0, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitPackedSwitch(HPackedSwitch* instruction) {
+ int32_t lower_bound = instruction->GetStartValue();
+ uint32_t num_entries = instruction->GetNumEntries();
+ LocationSummary* locations = instruction->GetLocations();
+ XRegister value = locations->InAt(0).AsRegister<XRegister>();
+ HBasicBlock* switch_block = instruction->GetBlock();
+ HBasicBlock* default_block = instruction->GetDefaultBlock();
+
+ // Prepare a temporary register and an adjusted zero-based value.
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister temp = srs.AllocateXRegister();
+ XRegister adjusted = value;
+ if (lower_bound != 0) {
+ adjusted = temp;
+ __ AddConst32(temp, value, -lower_bound);
+ }
+
+ // Jump to the default block if the index is out of the packed switch value range.
+ // Note: We could save one instruction for `num_entries == 1` with BNEZ but the
+ // `HInstructionBuilder` transforms that case to an `HIf`, so let's keep the code simple.
+ CHECK_NE(num_entries, 0u); // `HInstructionBuilder` creates a `HGoto` for empty packed-switch.
+ {
+ ScratchRegisterScope srs2(GetAssembler());
+ XRegister temp2 = srs2.AllocateXRegister();
+ __ LoadConst32(temp2, num_entries);
+ __ Bgeu(adjusted, temp2, codegen_->GetLabelOf(default_block)); // Can clobber `TMP` if taken.
+ }
+
+ if (num_entries >= kPackedSwitchCompareJumpThreshold) {
+ GenTableBasedPackedSwitch(adjusted, temp, num_entries, switch_block);
+ } else {
+ GenPackedSwitchWithCompares(adjusted, temp, num_entries, switch_block);
+ }
+}
+
+void LocationsBuilderRISCV64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
+ LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitParallelMove(HParallelMove* instruction) {
+ if (instruction->GetNext()->IsSuspendCheck() &&
+ instruction->GetBlock()->GetLoopInformation() != nullptr) {
+ HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
+ // The back edge will generate the suspend check.
+ codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
+ }
+
+ codegen_->GetMoveResolver()->EmitNativeCode(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitParameterValue(HParameterValue* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
+ if (location.IsStackSlot()) {
+ location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+ } else if (location.IsDoubleStackSlot()) {
+ location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+ }
+ locations->SetOut(location);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitParameterValue(
+ [[maybe_unused]] HParameterValue* instruction) {
+ // Nothing to do, the parameter is already at its location.
+}
+
+void LocationsBuilderRISCV64::VisitPhi(HPhi* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
+ locations->SetInAt(i, Location::Any());
+ }
+ locations->SetOut(Location::Any());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitPhi([[maybe_unused]] HPhi* instruction) {
+ LOG(FATAL) << "Unreachable";
+}
+
+void LocationsBuilderRISCV64::VisitRem(HRem* instruction) {
+ DataType::Type type = instruction->GetResultType();
+ LocationSummary::CallKind call_kind =
+ DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
+ : LocationSummary::kNoCall;
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
+
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64: {
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+ locations->SetOut(calling_convention.GetReturnLocation(type));
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unexpected rem type " << type;
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitRem(HRem* instruction) {
+ DataType::Type type = instruction->GetType();
+
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateDivRemIntegral(instruction);
+ break;
+
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64: {
+ QuickEntrypointEnum entrypoint =
+ (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod;
+ codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
+ if (type == DataType::Type::kFloat32) {
+ CheckEntrypointTypes<kQuickFmodf, float, float, float>();
+ } else {
+ CheckEntrypointTypes<kQuickFmod, double, double, double>();
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected rem type " << type;
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderRISCV64::VisitReturn(HReturn* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ DataType::Type return_type = instruction->InputAt(0)->GetType();
+ DCHECK_NE(return_type, DataType::Type::kVoid);
+ locations->SetInAt(0, Riscv64ReturnLocation(return_type));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitReturn(HReturn* instruction) {
+ if (GetGraph()->IsCompilingOsr()) {
+ // To simplify callers of an OSR method, we put a floating point return value
+ // in both floating point and core return registers.
+ DataType::Type type = instruction->InputAt(0)->GetType();
+ if (DataType::IsFloatingPointType(type)) {
+ FMvX(A0, FA0, type);
+ }
+ }
+ codegen_->GenerateFrameExit();
+}
+
+void LocationsBuilderRISCV64::VisitReturnVoid(HReturnVoid* instruction) {
+ instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitReturnVoid([[maybe_unused]] HReturnVoid* instruction) {
+ codegen_->GenerateFrameExit();
+}
+
+void LocationsBuilderRISCV64::VisitRor(HRor* instruction) {
+ HandleShift(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitRor(HRor* instruction) {
+ HandleShift(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitShl(HShl* instruction) {
+ HandleShift(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitShl(HShl* instruction) {
+ HandleShift(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitShr(HShr* instruction) {
+ HandleShift(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitShr(HShr* instruction) {
+ HandleShift(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+ HandleFieldGet(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderRISCV64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+ HandleFieldSet(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
+}
+
+void LocationsBuilderRISCV64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+ codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(A0));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+ __ LoadConst32(A0, instruction->GetFormat()->GetValue());
+ codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
+}
+
+void LocationsBuilderRISCV64::VisitUnresolvedInstanceFieldGet(
+ HUnresolvedInstanceFieldGet* instruction) {
+ FieldAccessCallingConventionRISCV64 calling_convention;
+ codegen_->CreateUnresolvedFieldLocationSummary(
+ instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitUnresolvedInstanceFieldGet(
+ HUnresolvedInstanceFieldGet* instruction) {
+ FieldAccessCallingConventionRISCV64 calling_convention;
+ codegen_->GenerateUnresolvedFieldAccess(instruction,
+ instruction->GetFieldType(),
+ instruction->GetFieldIndex(),
+ instruction->GetDexPc(),
+ calling_convention);
+}
+
+void LocationsBuilderRISCV64::VisitUnresolvedInstanceFieldSet(
+ HUnresolvedInstanceFieldSet* instruction) {
+ FieldAccessCallingConventionRISCV64 calling_convention;
+ codegen_->CreateUnresolvedFieldLocationSummary(
+ instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitUnresolvedInstanceFieldSet(
+ HUnresolvedInstanceFieldSet* instruction) {
+ FieldAccessCallingConventionRISCV64 calling_convention;
+ codegen_->GenerateUnresolvedFieldAccess(instruction,
+ instruction->GetFieldType(),
+ instruction->GetFieldIndex(),
+ instruction->GetDexPc(),
+ calling_convention);
+}
+
+void LocationsBuilderRISCV64::VisitUnresolvedStaticFieldGet(
+ HUnresolvedStaticFieldGet* instruction) {
+ FieldAccessCallingConventionRISCV64 calling_convention;
+ codegen_->CreateUnresolvedFieldLocationSummary(
+ instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitUnresolvedStaticFieldGet(
+ HUnresolvedStaticFieldGet* instruction) {
+ FieldAccessCallingConventionRISCV64 calling_convention;
+ codegen_->GenerateUnresolvedFieldAccess(instruction,
+ instruction->GetFieldType(),
+ instruction->GetFieldIndex(),
+ instruction->GetDexPc(),
+ calling_convention);
+}
+
+void LocationsBuilderRISCV64::VisitUnresolvedStaticFieldSet(
+ HUnresolvedStaticFieldSet* instruction) {
+ FieldAccessCallingConventionRISCV64 calling_convention;
+ codegen_->CreateUnresolvedFieldLocationSummary(
+ instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitUnresolvedStaticFieldSet(
+ HUnresolvedStaticFieldSet* instruction) {
+ FieldAccessCallingConventionRISCV64 calling_convention;
+ codegen_->GenerateUnresolvedFieldAccess(instruction,
+ instruction->GetFieldType(),
+ instruction->GetFieldIndex(),
+ instruction->GetDexPc(),
+ calling_convention);
+}
+
+void LocationsBuilderRISCV64::VisitSelect(HSelect* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ if (DataType::IsFloatingPointType(instruction->GetType())) {
+ locations->SetInAt(0, FpuRegisterOrZeroBitPatternLocation(instruction->GetFalseValue()));
+ locations->SetInAt(1, FpuRegisterOrZeroBitPatternLocation(instruction->GetTrueValue()));
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ if (!locations->InAt(0).IsConstant() && !locations->InAt(1).IsConstant()) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ } else {
+ locations->SetInAt(0, RegisterOrZeroBitPatternLocation(instruction->GetFalseValue()));
+ locations->SetInAt(1, RegisterOrZeroBitPatternLocation(instruction->GetTrueValue()));
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ }
+
+ if (IsBooleanValueOrMaterializedCondition(instruction->GetCondition())) {
+ locations->SetInAt(2, Location::RequiresRegister());
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitSelect(HSelect* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ HInstruction* cond = instruction->GetCondition();
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ if (!IsBooleanValueOrMaterializedCondition(cond)) {
+ DataType::Type cond_type = cond->InputAt(0)->GetType();
+ IfCondition if_cond = cond->AsCondition()->GetCondition();
+ if (DataType::IsFloatingPointType(cond_type)) {
+ GenerateFpCondition(if_cond,
+ cond->AsCondition()->IsGtBias(),
+ cond_type,
+ cond->GetLocations(),
+ /*label=*/ nullptr,
+ tmp,
+ /*to_all_bits=*/ true);
+ } else {
+ GenerateIntLongCondition(if_cond, cond->GetLocations(), tmp, /*to_all_bits=*/ true);
+ }
+ } else {
+ // TODO(riscv64): Remove the normalizing SNEZ when we can ensure that booleans
+ // have only values 0 and 1. b/279302742
+ __ Snez(tmp, locations->InAt(2).AsRegister<XRegister>());
+ __ Neg(tmp, tmp);
+ }
+
+ XRegister true_reg, false_reg, xor_reg, out_reg;
+ DataType::Type type = instruction->GetType();
+ if (DataType::IsFloatingPointType(type)) {
+ if (locations->InAt(0).IsConstant()) {
+ DCHECK(locations->InAt(0).GetConstant()->IsZeroBitPattern());
+ false_reg = Zero;
+ } else {
+ false_reg = srs.AllocateXRegister();
+ FMvX(false_reg, locations->InAt(0).AsFpuRegister<FRegister>(), type);
+ }
+ if (locations->InAt(1).IsConstant()) {
+ DCHECK(locations->InAt(1).GetConstant()->IsZeroBitPattern());
+ true_reg = Zero;
+ } else {
+ true_reg = (false_reg == Zero) ? srs.AllocateXRegister()
+ : locations->GetTemp(0).AsRegister<XRegister>();
+ FMvX(true_reg, locations->InAt(1).AsFpuRegister<FRegister>(), type);
+ }
+ // We can clobber the "true value" with the XOR result.
+ // Note: The XOR is not emitted if `true_reg == Zero`, see below.
+ xor_reg = true_reg;
+ out_reg = tmp;
+ } else {
+ false_reg = InputXRegisterOrZero(locations->InAt(0));
+ true_reg = InputXRegisterOrZero(locations->InAt(1));
+ xor_reg = srs.AllocateXRegister();
+ out_reg = locations->Out().AsRegister<XRegister>();
+ }
+
+ // We use a branch-free implementation of `HSelect`.
+ // With `tmp` initialized to 0 for `false` and -1 for `true`:
+ // xor xor_reg, false_reg, true_reg
+ // and tmp, tmp, xor_reg
+ // xor out_reg, tmp, false_reg
+ if (false_reg == Zero) {
+ xor_reg = true_reg;
+ } else if (true_reg == Zero) {
+ xor_reg = false_reg;
+ } else {
+ DCHECK_NE(xor_reg, Zero);
+ __ Xor(xor_reg, false_reg, true_reg);
+ }
+ __ And(tmp, tmp, xor_reg);
+ __ Xor(out_reg, tmp, false_reg);
+
+ if (type == DataType::Type::kFloat64) {
+ __ FMvDX(locations->Out().AsFpuRegister<FRegister>(), out_reg);
+ } else if (type == DataType::Type::kFloat32) {
+ __ FMvWX(locations->Out().AsFpuRegister<FRegister>(), out_reg);
+ }
+}
+
+void LocationsBuilderRISCV64::VisitSub(HSub* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitSub(HSub* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitSuspendCheck(HSuspendCheck* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+ // In suspend check slow path, usually there are no caller-save registers at all.
+ // If SIMD instructions are present, however, we force spilling all live SIMD
+ // registers in full width (since the runtime only saves/restores lower part).
+ locations->SetCustomSlowPathCallerSaves(GetGraph()->HasSIMD() ? RegisterSet::AllFpu() :
+ RegisterSet::Empty());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitSuspendCheck(HSuspendCheck* instruction) {
+ HBasicBlock* block = instruction->GetBlock();
+ if (block->GetLoopInformation() != nullptr) {
+ DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
+ // The back edge will generate the suspend check.
+ return;
+ }
+ if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
+ // The goto will generate the suspend check.
+ return;
+ }
+ GenerateSuspendCheck(instruction, nullptr);
+}
+
+void LocationsBuilderRISCV64::VisitThrow(HThrow* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitThrow(HThrow* instruction) {
+ codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+}
+
+void LocationsBuilderRISCV64::VisitTryBoundary(HTryBoundary* instruction) {
+ instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitTryBoundary(HTryBoundary* instruction) {
+ HBasicBlock* successor = instruction->GetNormalFlowSuccessor();
+ if (!successor->IsExitBlock()) {
+ HandleGoto(instruction, successor);
+ }
+}
+
+void LocationsBuilderRISCV64::VisitTypeConversion(HTypeConversion* instruction) {
+ DataType::Type input_type = instruction->GetInputType();
+ DataType::Type result_type = instruction->GetResultType();
+ DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+ << input_type << " -> " << result_type;
+
+ if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) ||
+ (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) {
+ LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
+ }
+
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+
+ if (DataType::IsFloatingPointType(input_type)) {
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+
+ if (DataType::IsFloatingPointType(result_type)) {
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ } else {
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitTypeConversion(HTypeConversion* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DataType::Type result_type = instruction->GetResultType();
+ DataType::Type input_type = instruction->GetInputType();
+
+ DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+ << input_type << " -> " << result_type;
+
+ if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) {
+ XRegister dst = locations->Out().AsRegister<XRegister>();
+ XRegister src = locations->InAt(0).AsRegister<XRegister>();
+ switch (result_type) {
+ case DataType::Type::kUint8:
+ __ ZextB(dst, src);
+ break;
+ case DataType::Type::kInt8:
+ __ SextB(dst, src);
+ break;
+ case DataType::Type::kUint16:
+ __ ZextH(dst, src);
+ break;
+ case DataType::Type::kInt16:
+ __ SextH(dst, src);
+ break;
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ // Sign-extend 32-bit int into bits 32 through 63 for int-to-long and long-to-int
+ // conversions, except when the input and output registers are the same and we are not
+ // converting longs to shorter types. In these cases, do nothing.
+ if ((input_type == DataType::Type::kInt64) || (dst != src)) {
+ __ Addiw(dst, src, 0);
+ }
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected type conversion from " << input_type
+ << " to " << result_type;
+ UNREACHABLE();
+ }
+ } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) {
+ FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+ XRegister src = locations->InAt(0).AsRegister<XRegister>();
+ if (input_type == DataType::Type::kInt64) {
+ if (result_type == DataType::Type::kFloat32) {
+ __ FCvtSL(dst, src, FPRoundingMode::kRNE);
+ } else {
+ __ FCvtDL(dst, src, FPRoundingMode::kRNE);
+ }
+ } else {
+ if (result_type == DataType::Type::kFloat32) {
+ __ FCvtSW(dst, src, FPRoundingMode::kRNE);
+ } else {
+ __ FCvtDW(dst, src); // No rounding.
+ }
+ }
+ } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) {
+ CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64);
+ XRegister dst = locations->Out().AsRegister<XRegister>();
+ FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+ if (result_type == DataType::Type::kInt64) {
+ if (input_type == DataType::Type::kFloat32) {
+ __ FCvtLS(dst, src, FPRoundingMode::kRTZ);
+ } else {
+ __ FCvtLD(dst, src, FPRoundingMode::kRTZ);
+ }
+ } else {
+ if (input_type == DataType::Type::kFloat32) {
+ __ FCvtWS(dst, src, FPRoundingMode::kRTZ);
+ } else {
+ __ FCvtWD(dst, src, FPRoundingMode::kRTZ);
+ }
+ }
+ // For NaN inputs we need to return 0.
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ FClass(tmp, src, input_type);
+ __ Sltiu(tmp, tmp, kFClassNaNMinValue); // 0 for NaN, 1 otherwise.
+ __ Neg(tmp, tmp); // 0 for NaN, -1 otherwise.
+ __ And(dst, dst, tmp); // Cleared for NaN.
+ } else if (DataType::IsFloatingPointType(result_type) &&
+ DataType::IsFloatingPointType(input_type)) {
+ FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+ FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+ if (result_type == DataType::Type::kFloat32) {
+ __ FCvtSD(dst, src);
+ } else {
+ __ FCvtDS(dst, src);
+ }
+ } else {
+ LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
+ << " to " << result_type;
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderRISCV64::VisitUShr(HUShr* instruction) {
+ HandleShift(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitUShr(HUShr* instruction) {
+ HandleShift(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitXor(HXor* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitXor(HXor* instruction) {
+ HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecReduce(HVecReduce* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecReduce(HVecReduce* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecCnv(HVecCnv* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecCnv(HVecCnv* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecNeg(HVecNeg* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecNeg(HVecNeg* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecAbs(HVecAbs* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecAbs(HVecAbs* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecNot(HVecNot* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecNot(HVecNot* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecAdd(HVecAdd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecAdd(HVecAdd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecSub(HVecSub* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecSub(HVecSub* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecMul(HVecMul* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecMul(HVecMul* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecDiv(HVecDiv* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecDiv(HVecDiv* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecMin(HVecMin* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecMin(HVecMin* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecMax(HVecMax* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecMax(HVecMax* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecAnd(HVecAnd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecAnd(HVecAnd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecAndNot(HVecAndNot* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecAndNot(HVecAndNot* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecOr(HVecOr* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecOr(HVecOr* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecXor(HVecXor* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecXor(HVecXor* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecShl(HVecShl* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecShl(HVecShl* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecShr(HVecShr* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecShr(HVecShr* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecUShr(HVecUShr* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecUShr(HVecUShr* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecMultiplyAccumulate(
+ HVecMultiplyAccumulate* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecDotProd(HVecDotProd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecDotProd(HVecDotProd* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecLoad(HVecLoad* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecLoad(HVecLoad* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecStore(HVecStore* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecStore(HVecStore* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecPredSetAll(HVecPredSetAll* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecPredSetAll(HVecPredSetAll* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecPredWhile(HVecPredWhile* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecPredWhile(HVecPredWhile* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecCondition(HVecCondition* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecCondition(HVecCondition* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecPredNot(HVecPredNot* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecPredNot(HVecPredNot* instruction) {
+ UNUSED(instruction);
+ LOG(FATAL) << "Unimplemented";
+}
+
+namespace detail {
+
+// Mark which intrinsics we don't have handcrafted code for.
+template <Intrinsics T>
+struct IsUnimplemented {
+ bool is_unimplemented = false;
+};
+
+#define TRUE_OVERRIDE(Name) \
+ template <> \
+ struct IsUnimplemented<Intrinsics::k##Name> { \
+ bool is_unimplemented = true; \
+ };
+UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(TRUE_OVERRIDE)
+#undef TRUE_OVERRIDE
+
+static constexpr bool kIsIntrinsicUnimplemented[] = {
+ false, // kNone
+#define IS_UNIMPLEMENTED(Intrinsic, ...) \
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
+#undef IS_UNIMPLEMENTED
+};
+
+} // namespace detail
+
+CodeGeneratorRISCV64::CodeGeneratorRISCV64(HGraph* graph,
+ const CompilerOptions& compiler_options,
+ OptimizingCompilerStats* stats)
+ : CodeGenerator(graph,
+ kNumberOfXRegisters,
+ kNumberOfFRegisters,
+ /*number_of_register_pairs=*/ 0u,
+ ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves)),
+ ComputeRegisterMask(kFpuCalleeSaves, arraysize(kFpuCalleeSaves)),
+ compiler_options,
+ stats,
+ ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
+ assembler_(graph->GetAllocator(),
+ compiler_options.GetInstructionSetFeatures()->AsRiscv64InstructionSetFeatures()),
+ location_builder_(graph, this),
+ instruction_visitor_(graph, this),
+ block_labels_(nullptr),
+ move_resolver_(graph->GetAllocator(), this),
+ uint32_literals_(std::less<uint32_t>(),
+ graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ uint64_literals_(std::less<uint64_t>(),
+ graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ jit_string_patches_(StringReferenceValueComparator(),
+ graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ jit_class_patches_(TypeReferenceValueComparator(),
+ graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
+ // Always mark the RA register to be saved.
+ AddAllocatedRegister(Location::RegisterLocation(RA));
+}
+
+void CodeGeneratorRISCV64::MaybeIncrementHotness(bool is_frame_entry) {
+ if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister method = is_frame_entry ? kArtMethodRegister : srs.AllocateXRegister();
+ if (!is_frame_entry) {
+ __ Loadd(method, SP, 0);
+ }
+ XRegister counter = srs.AllocateXRegister();
+ __ Loadhu(counter, method, ArtMethod::HotnessCountOffset().Int32Value());
+ Riscv64Label done;
+ DCHECK_EQ(0u, interpreter::kNterpHotnessValue);
+ __ Beqz(counter, &done); // Can clobber `TMP` if taken.
+ __ Addi(counter, counter, -1);
+ // We may not have another scratch register available for `Storeh`()`,
+ // so we must use the `Sh()` function directly.
+ static_assert(IsInt<12>(ArtMethod::HotnessCountOffset().Int32Value()));
+ __ Sh(counter, method, ArtMethod::HotnessCountOffset().Int32Value());
+ __ Bind(&done);
+ }
+
+ if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ DCHECK(!HasEmptyFrame());
+ uint64_t address = reinterpret_cast64<uint64_t>(info) +
+ ProfilingInfo::BaselineHotnessCountOffset().SizeValue();
+ auto [base_address, imm12] = SplitJitAddress(address);
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister counter = srs.AllocateXRegister();
+ XRegister tmp = RA;
+ __ LoadConst64(tmp, base_address);
+ SlowPathCodeRISCV64* slow_path =
+ new (GetScopedAllocator()) CompileOptimizedSlowPathRISCV64(tmp, imm12);
+ AddSlowPath(slow_path);
+ __ Lhu(counter, tmp, imm12);
+ __ Beqz(counter, slow_path->GetEntryLabel()); // Can clobber `TMP` if taken.
+ __ Addi(counter, counter, -1);
+ __ Sh(counter, tmp, imm12);
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+bool CodeGeneratorRISCV64::CanUseImplicitSuspendCheck() const {
+ // TODO(riscv64): Implement implicit suspend checks to reduce code size.
+ return false;
+}
+
+void CodeGeneratorRISCV64::GenerateMemoryBarrier(MemBarrierKind kind) {
+ switch (kind) {
+ case MemBarrierKind::kAnyAny:
+ __ Fence(/*pred=*/ kFenceRead | kFenceWrite, /*succ=*/ kFenceRead | kFenceWrite);
+ break;
+ case MemBarrierKind::kAnyStore:
+ __ Fence(/*pred=*/ kFenceRead | kFenceWrite, /*succ=*/ kFenceWrite);
+ break;
+ case MemBarrierKind::kLoadAny:
+ __ Fence(/*pred=*/ kFenceRead, /*succ=*/ kFenceRead | kFenceWrite);
+ break;
+ case MemBarrierKind::kStoreStore:
+ __ Fence(/*pred=*/ kFenceWrite, /*succ=*/ kFenceWrite);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected memory barrier " << kind;
+ UNREACHABLE();
+ }
+}
+
+void CodeGeneratorRISCV64::GenerateFrameEntry() {
+ // Check if we need to generate the clinit check. We will jump to the
+ // resolution stub if the class is not initialized and the executing thread is
+ // not the thread initializing it.
+ // We do this before constructing the frame to get the correct stack trace if
+ // an exception is thrown.
+ if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+ Riscv64Label resolution;
+ Riscv64Label memory_barrier;
+
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ XRegister tmp2 = srs.AllocateXRegister();
+
+ // We don't emit a read barrier here to save on code size. We rely on the
+ // resolution trampoline to do a clinit check before re-entering this code.
+ __ Loadwu(tmp2, kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value());
+
+ // We shall load the full 32-bit status word with sign-extension and compare as unsigned
+ // to sign-extended shifted status values. This yields the same comparison as loading and
+ // materializing unsigned but the constant is materialized with a single LUI instruction.
+ __ Loadw(tmp, tmp2, mirror::Class::StatusOffset().SizeValue()); // Sign-extended.
+
+ // Check if we're visibly initialized.
+ __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kVisiblyInitialized>());
+ __ Bgeu(tmp, tmp2, &frame_entry_label_); // Can clobber `TMP` if taken.
+
+ // Check if we're initialized and jump to code that does a memory barrier if so.
+ __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kInitialized>());
+ __ Bgeu(tmp, tmp2, &memory_barrier); // Can clobber `TMP` if taken.
+
+ // Check if we're initializing and the thread initializing is the one
+ // executing the code.
+ __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kInitializing>());
+ __ Bltu(tmp, tmp2, &resolution); // Can clobber `TMP` if taken.
+
+ __ Loadwu(tmp2, kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value());
+ __ Loadw(tmp, tmp2, mirror::Class::ClinitThreadIdOffset().Int32Value());
+ __ Loadw(tmp2, TR, Thread::TidOffset<kRiscv64PointerSize>().Int32Value());
+ __ Beq(tmp, tmp2, &frame_entry_label_);
+ __ Bind(&resolution);
+
+ // Jump to the resolution stub.
+ ThreadOffset64 entrypoint_offset =
+ GetThreadOffset<kRiscv64PointerSize>(kQuickQuickResolutionTrampoline);
+ __ Loadd(tmp, TR, entrypoint_offset.Int32Value());
+ __ Jr(tmp);
+
+ __ Bind(&memory_barrier);
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+ __ Bind(&frame_entry_label_);
+
+ bool do_overflow_check =
+ FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kRiscv64) || !IsLeafMethod();
+
+ if (do_overflow_check) {
+ DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
+ __ Loadw(
+ Zero, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kRiscv64)));
+ RecordPcInfo(nullptr, 0);
+ }
+
+ if (!HasEmptyFrame()) {
+ // Make sure the frame size isn't unreasonably large.
+ if (GetFrameSize() > GetStackOverflowReservedBytes(InstructionSet::kRiscv64)) {
+ LOG(FATAL) << "Stack frame larger than "
+ << GetStackOverflowReservedBytes(InstructionSet::kRiscv64) << " bytes";
+ }
+
+ // Spill callee-saved registers.
+
+ uint32_t frame_size = GetFrameSize();
+
+ IncreaseFrame(frame_size);
+
+ uint32_t offset = frame_size;
+ for (size_t i = arraysize(kCoreCalleeSaves); i != 0; ) {
+ --i;
+ XRegister reg = kCoreCalleeSaves[i];
+ if (allocated_registers_.ContainsCoreRegister(reg)) {
+ offset -= kRiscv64DoublewordSize;
+ __ Stored(reg, SP, offset);
+ __ cfi().RelOffset(dwarf::Reg::Riscv64Core(reg), offset);
+ }
+ }
+
+ for (size_t i = arraysize(kFpuCalleeSaves); i != 0; ) {
+ --i;
+ FRegister reg = kFpuCalleeSaves[i];
+ if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
+ offset -= kRiscv64DoublewordSize;
+ __ FStored(reg, SP, offset);
+ __ cfi().RelOffset(dwarf::Reg::Riscv64Fp(reg), offset);
+ }
+ }
+
+ // Save the current method if we need it. Note that we do not
+ // do this in HCurrentMethod, as the instruction might have been removed
+ // in the SSA graph.
+ if (RequiresCurrentMethod()) {
+ __ Stored(kArtMethodRegister, SP, 0);
+ }
+
+ if (GetGraph()->HasShouldDeoptimizeFlag()) {
+ // Initialize should_deoptimize flag to 0.
+ __ Storew(Zero, SP, GetStackOffsetOfShouldDeoptimizeFlag());
+ }
+ }
+ MaybeIncrementHotness(/*is_frame_entry=*/ true);
+}
+
+void CodeGeneratorRISCV64::GenerateFrameExit() {
+ __ cfi().RememberState();
+
+ if (!HasEmptyFrame()) {
+ // Restore callee-saved registers.
+
+ // For better instruction scheduling restore RA before other registers.
+ uint32_t offset = GetFrameSize();
+ for (size_t i = arraysize(kCoreCalleeSaves); i != 0; ) {
+ --i;
+ XRegister reg = kCoreCalleeSaves[i];
+ if (allocated_registers_.ContainsCoreRegister(reg)) {
+ offset -= kRiscv64DoublewordSize;
+ __ Loadd(reg, SP, offset);
+ __ cfi().Restore(dwarf::Reg::Riscv64Core(reg));
+ }
+ }
+
+ for (size_t i = arraysize(kFpuCalleeSaves); i != 0; ) {
+ --i;
+ FRegister reg = kFpuCalleeSaves[i];
+ if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
+ offset -= kRiscv64DoublewordSize;
+ __ FLoadd(reg, SP, offset);
+ __ cfi().Restore(dwarf::Reg::Riscv64Fp(reg));
+ }
+ }
+
+ DecreaseFrame(GetFrameSize());
+ }
+
+ __ Jr(RA);
+
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(GetFrameSize());
+}
+
+void CodeGeneratorRISCV64::Bind(HBasicBlock* block) { __ Bind(GetLabelOf(block)); }
+
+void CodeGeneratorRISCV64::MoveConstant(Location destination, int32_t value) {
+ DCHECK(destination.IsRegister());
+ __ LoadConst32(destination.AsRegister<XRegister>(), value);
+}
+
+void CodeGeneratorRISCV64::MoveLocation(Location destination,
+ Location source,
+ DataType::Type dst_type) {
+ if (source.Equals(destination)) {
+ return;
+ }
+
+ // A valid move type can always be inferred from the destination and source locations.
+ // When moving from and to a register, the `dst_type` can be used to generate 32-bit instead
+ // of 64-bit moves but it's generally OK to use 64-bit moves for 32-bit values in registers.
+ bool unspecified_type = (dst_type == DataType::Type::kVoid);
+ // TODO(riscv64): Is the destination type known in all cases?
+ // TODO(riscv64): Can unspecified `dst_type` move 32-bit GPR to FPR without NaN-boxing?
+ CHECK(!unspecified_type);
+
+ if (destination.IsRegister() || destination.IsFpuRegister()) {
+ if (unspecified_type) {
+ HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
+ if (source.IsStackSlot() ||
+ (src_cst != nullptr &&
+ (src_cst->IsIntConstant() || src_cst->IsFloatConstant() || src_cst->IsNullConstant()))) {
+ // For stack slots and 32-bit constants, a 32-bit type is appropriate.
+ dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32;
+ } else {
+ // If the source is a double stack slot or a 64-bit constant, a 64-bit type
+ // is appropriate. Else the source is a register, and since the type has not
+ // been specified, we chose a 64-bit type to force a 64-bit move.
+ dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64;
+ }
+ }
+ DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) ||
+ (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type)));
+
+ if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
+ // Move to GPR/FPR from stack
+ if (DataType::IsFloatingPointType(dst_type)) {
+ if (DataType::Is64BitType(dst_type)) {
+ __ FLoadd(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex());
+ } else {
+ __ FLoadw(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex());
+ }
+ } else {
+ if (DataType::Is64BitType(dst_type)) {
+ __ Loadd(destination.AsRegister<XRegister>(), SP, source.GetStackIndex());
+ } else if (dst_type == DataType::Type::kReference) {
+ __ Loadwu(destination.AsRegister<XRegister>(), SP, source.GetStackIndex());
+ } else {
+ __ Loadw(destination.AsRegister<XRegister>(), SP, source.GetStackIndex());
+ }
+ }
+ } else if (source.IsConstant()) {
+ // Move to GPR/FPR from constant
+ // TODO(riscv64): Consider using literals for difficult-to-materialize 64-bit constants.
+ int64_t value = GetInt64ValueOf(source.GetConstant()->AsConstant());
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister gpr = DataType::IsFloatingPointType(dst_type)
+ ? srs.AllocateXRegister()
+ : destination.AsRegister<XRegister>();
+ if (DataType::IsFloatingPointType(dst_type) && value == 0) {
+ gpr = Zero; // Note: The scratch register allocated above shall not be used.
+ } else {
+ // Note: For `float` we load the sign-extended value here as it can sometimes yield
+ // a shorter instruction sequence. The higher 32 bits shall be ignored during the
+ // transfer to FP reg and the result shall be correctly NaN-boxed.
+ __ LoadConst64(gpr, value);
+ }
+ if (dst_type == DataType::Type::kFloat32) {
+ __ FMvWX(destination.AsFpuRegister<FRegister>(), gpr);
+ } else if (dst_type == DataType::Type::kFloat64) {
+ __ FMvDX(destination.AsFpuRegister<FRegister>(), gpr);
+ }
+ } else if (source.IsRegister()) {
+ if (destination.IsRegister()) {
+ // Move to GPR from GPR
+ __ Mv(destination.AsRegister<XRegister>(), source.AsRegister<XRegister>());
+ } else {
+ DCHECK(destination.IsFpuRegister());
+ if (DataType::Is64BitType(dst_type)) {
+ __ FMvDX(destination.AsFpuRegister<FRegister>(), source.AsRegister<XRegister>());
+ } else {
+ __ FMvWX(destination.AsFpuRegister<FRegister>(), source.AsRegister<XRegister>());
+ }
+ }
+ } else if (source.IsFpuRegister()) {
+ if (destination.IsFpuRegister()) {
+ if (GetGraph()->HasSIMD()) {
+ LOG(FATAL) << "Vector extension is unsupported";
+ UNREACHABLE();
+ } else {
+ // Move to FPR from FPR
+ if (dst_type == DataType::Type::kFloat32) {
+ __ FMvS(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
+ } else {
+ DCHECK_EQ(dst_type, DataType::Type::kFloat64);
+ __ FMvD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
+ }
+ }
+ } else {
+ DCHECK(destination.IsRegister());
+ if (DataType::Is64BitType(dst_type)) {
+ __ FMvXD(destination.AsRegister<XRegister>(), source.AsFpuRegister<FRegister>());
+ } else {
+ __ FMvXW(destination.AsRegister<XRegister>(), source.AsFpuRegister<FRegister>());
+ }
+ }
+ }
+ } else if (destination.IsSIMDStackSlot()) {
+ LOG(FATAL) << "SIMD is unsupported";
+ UNREACHABLE();
+ } else { // The destination is not a register. It must be a stack slot.
+ DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
+ if (source.IsRegister() || source.IsFpuRegister()) {
+ if (unspecified_type) {
+ if (source.IsRegister()) {
+ dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64;
+ } else {
+ dst_type =
+ destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64;
+ }
+ }
+ DCHECK_EQ(source.IsFpuRegister(), DataType::IsFloatingPointType(dst_type));
+ // For direct @CriticalNative calls, we need to sign-extend narrow integral args
+ // to 64 bits, so widening integral values is allowed. Narrowing is forbidden.
+ DCHECK_IMPLIES(DataType::IsFloatingPointType(dst_type) || destination.IsStackSlot(),
+ destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type));
+ // Move to stack from GPR/FPR
+ if (destination.IsDoubleStackSlot()) {
+ if (source.IsRegister()) {
+ __ Stored(source.AsRegister<XRegister>(), SP, destination.GetStackIndex());
+ } else {
+ __ FStored(source.AsFpuRegister<FRegister>(), SP, destination.GetStackIndex());
+ }
+ } else {
+ if (source.IsRegister()) {
+ __ Storew(source.AsRegister<XRegister>(), SP, destination.GetStackIndex());
+ } else {
+ __ FStorew(source.AsFpuRegister<FRegister>(), SP, destination.GetStackIndex());
+ }
+ }
+ } else if (source.IsConstant()) {
+ // Move to stack from constant
+ int64_t value = GetInt64ValueOf(source.GetConstant());
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister gpr = (value != 0) ? srs.AllocateXRegister() : Zero;
+ if (value != 0) {
+ __ LoadConst64(gpr, value);
+ }
+ if (destination.IsStackSlot()) {
+ __ Storew(gpr, SP, destination.GetStackIndex());
+ } else {
+ DCHECK(destination.IsDoubleStackSlot());
+ __ Stored(gpr, SP, destination.GetStackIndex());
+ }
+ } else {
+ DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
+ // For direct @CriticalNative calls, we need to sign-extend narrow integral args
+ // to 64 bits, so widening move is allowed. Narrowing move is forbidden.
+ DCHECK_IMPLIES(destination.IsStackSlot(), source.IsStackSlot());
+ // Move to stack from stack
+ ScratchRegisterScope srs(GetAssembler());
+ XRegister tmp = srs.AllocateXRegister();
+ if (source.IsStackSlot()) {
+ __ Loadw(tmp, SP, source.GetStackIndex());
+ } else {
+ __ Loadd(tmp, SP, source.GetStackIndex());
+ }
+ if (destination.IsStackSlot()) {
+ __ Storew(tmp, SP, destination.GetStackIndex());
+ } else {
+ __ Stored(tmp, SP, destination.GetStackIndex());
+ }
+ }
+ }
+}
+
+void CodeGeneratorRISCV64::AddLocationAsTemp(Location location, LocationSummary* locations) {
+ if (location.IsRegister()) {
+ locations->AddTemp(location);
+ } else {
+ UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
+ }
+}
+
+void CodeGeneratorRISCV64::SetupBlockedRegisters() const {
+ // ZERO, GP, SP, RA, TP and TR(S1) are reserved and can't be allocated.
+ blocked_core_registers_[Zero] = true;
+ blocked_core_registers_[GP] = true;
+ blocked_core_registers_[SP] = true;
+ blocked_core_registers_[RA] = true;
+ blocked_core_registers_[TP] = true;
+ blocked_core_registers_[TR] = true; // ART Thread register.
+
+ // TMP(T6), TMP2(T5) and FTMP(FT11) are used as temporary/scratch registers.
+ blocked_core_registers_[TMP] = true;
+ blocked_core_registers_[TMP2] = true;
+ blocked_fpu_registers_[FTMP] = true;
+
+ if (GetGraph()->IsDebuggable()) {
+ // Stubs do not save callee-save floating point registers. If the graph
+ // is debuggable, we need to deal with these registers differently. For
+ // now, just block them.
+ for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+ blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+ }
+ }
+}
+
+size_t CodeGeneratorRISCV64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
+ __ Stored(XRegister(reg_id), SP, stack_index);
+ return kRiscv64DoublewordSize;
+}
+
+size_t CodeGeneratorRISCV64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
+ __ Loadd(XRegister(reg_id), SP, stack_index);
+ return kRiscv64DoublewordSize;
+}
+
+size_t CodeGeneratorRISCV64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+ if (GetGraph()->HasSIMD()) {
+ // TODO(riscv64): RISC-V vector extension.
+ UNIMPLEMENTED(FATAL) << "Vector extension is unsupported";
+ UNREACHABLE();
+ }
+ __ FStored(FRegister(reg_id), SP, stack_index);
+ return kRiscv64FloatRegSizeInBytes;
+}
+
+size_t CodeGeneratorRISCV64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+ if (GetGraph()->HasSIMD()) {
+ // TODO(riscv64): RISC-V vector extension.
+ UNIMPLEMENTED(FATAL) << "Vector extension is unsupported";
+ UNREACHABLE();
+ }
+ __ FLoadd(FRegister(reg_id), SP, stack_index);
+ return kRiscv64FloatRegSizeInBytes;
+}
+
+void CodeGeneratorRISCV64::DumpCoreRegister(std::ostream& stream, int reg) const {
+ stream << XRegister(reg);
+}
+
+void CodeGeneratorRISCV64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
+ stream << FRegister(reg);
+}
+
+const Riscv64InstructionSetFeatures& CodeGeneratorRISCV64::GetInstructionSetFeatures() const {
+ return *GetCompilerOptions().GetInstructionSetFeatures()->AsRiscv64InstructionSetFeatures();
+}
+
+void CodeGeneratorRISCV64::Finalize() {
+ // Ensure that we fix up branches and literal loads and emit the literal pool.
+ __ FinalizeCode();
+
+ // Adjust native pc offsets in stack maps.
+ StackMapStream* stack_map_stream = GetStackMapStream();
+ for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) {
+ uint32_t old_position = stack_map_stream->GetStackMapNativePcOffset(i);
+ uint32_t new_position = __ GetAdjustedPosition(old_position);
+ DCHECK_GE(new_position, old_position);
+ stack_map_stream->SetStackMapNativePcOffset(i, new_position);
+ }
+
+ // Adjust pc offsets for the disassembly information.
+ if (disasm_info_ != nullptr) {
+ GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval();
+ frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start);
+ frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end);
+ for (auto& entry : *disasm_info_->GetInstructionIntervals()) {
+ entry.second.start = __ GetAdjustedPosition(entry.second.start);
+ entry.second.end = __ GetAdjustedPosition(entry.second.end);
+ }
+ for (auto& entry : *disasm_info_->GetSlowPathIntervals()) {
+ entry.code_interval.start = __ GetAdjustedPosition(entry.code_interval.start);
+ entry.code_interval.end = __ GetAdjustedPosition(entry.code_interval.end);
+ }
+ }
+}
+
+// Generate code to invoke a runtime entry point.
+void CodeGeneratorRISCV64::InvokeRuntime(QuickEntrypointEnum entrypoint,
+ HInstruction* instruction,
+ uint32_t dex_pc,
+ SlowPathCode* slow_path) {
+ ValidateInvokeRuntime(entrypoint, instruction, slow_path);
+
+ ThreadOffset64 entrypoint_offset = GetThreadOffset<kRiscv64PointerSize>(entrypoint);
+
+ // TODO(riscv64): Reduce code size for AOT by using shared trampolines for slow path
+ // runtime calls across the entire oat file.
+ __ Loadd(RA, TR, entrypoint_offset.Int32Value());
+ __ Jalr(RA);
+ if (EntrypointRequiresStackMap(entrypoint)) {
+ RecordPcInfo(instruction, dex_pc, slow_path);
+ }
+}
+
+// Generate code to invoke a runtime entry point, but do not record
+// PC-related information in a stack map.
+void CodeGeneratorRISCV64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path) {
+ ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ __ Loadd(RA, TR, entry_point_offset);
+ __ Jalr(RA);
+}
+
+void CodeGeneratorRISCV64::IncreaseFrame(size_t adjustment) {
+ int32_t adjustment32 = dchecked_integral_cast<int32_t>(adjustment);
+ __ AddConst64(SP, SP, -adjustment32);
+ GetAssembler()->cfi().AdjustCFAOffset(adjustment32);
+}
+
+void CodeGeneratorRISCV64::DecreaseFrame(size_t adjustment) {
+ int32_t adjustment32 = dchecked_integral_cast<int32_t>(adjustment);
+ __ AddConst64(SP, SP, adjustment32);
+ GetAssembler()->cfi().AdjustCFAOffset(-adjustment32);
+}
+
+void CodeGeneratorRISCV64::GenerateNop() {
+ __ Nop();
+}
+
+void CodeGeneratorRISCV64::GenerateImplicitNullCheck(HNullCheck* instruction) {
+ if (CanMoveNullCheckToUser(instruction)) {
+ return;
+ }
+ Location obj = instruction->GetLocations()->InAt(0);
+
+ __ Lw(Zero, obj.AsRegister<XRegister>(), 0);
+ RecordPcInfo(instruction, instruction->GetDexPc());
+}
+
+void CodeGeneratorRISCV64::GenerateExplicitNullCheck(HNullCheck* instruction) {
+ SlowPathCodeRISCV64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathRISCV64(instruction);
+ AddSlowPath(slow_path);
+
+ Location obj = instruction->GetLocations()->InAt(0);
+
+ __ Beqz(obj.AsRegister<XRegister>(), slow_path->GetEntryLabel());
+}
+
+HLoadString::LoadKind CodeGeneratorRISCV64::GetSupportedLoadStringKind(
+ HLoadString::LoadKind desired_string_load_kind) {
+ switch (desired_string_load_kind) {
+ case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ case HLoadString::LoadKind::kBootImageRelRo:
+ case HLoadString::LoadKind::kBssEntry:
+ DCHECK(!Runtime::Current()->UseJitCompilation());
+ break;
+ case HLoadString::LoadKind::kJitBootImageAddress:
+ case HLoadString::LoadKind::kJitTableAddress:
+ DCHECK(Runtime::Current()->UseJitCompilation());
+ break;
+ case HLoadString::LoadKind::kRuntimeCall:
+ break;
+ }
+ return desired_string_load_kind;
+}
+
+HLoadClass::LoadKind CodeGeneratorRISCV64::GetSupportedLoadClassKind(
+ HLoadClass::LoadKind desired_class_load_kind) {
+ switch (desired_class_load_kind) {
+ case HLoadClass::LoadKind::kInvalid:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
+ case HLoadClass::LoadKind::kReferrersClass:
+ break;
+ case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+ case HLoadClass::LoadKind::kBootImageRelRo:
+ case HLoadClass::LoadKind::kBssEntry:
+ case HLoadClass::LoadKind::kBssEntryPublic:
+ case HLoadClass::LoadKind::kBssEntryPackage:
+ DCHECK(!Runtime::Current()->UseJitCompilation());
+ break;
+ case HLoadClass::LoadKind::kJitBootImageAddress:
+ case HLoadClass::LoadKind::kJitTableAddress:
+ DCHECK(Runtime::Current()->UseJitCompilation());
+ break;
+ case HLoadClass::LoadKind::kRuntimeCall:
+ break;
+ }
+ return desired_class_load_kind;
+}
+
+HInvokeStaticOrDirect::DispatchInfo CodeGeneratorRISCV64::GetSupportedInvokeStaticOrDirectDispatch(
+ const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, ArtMethod* method) {
+ UNUSED(method);
+ // On RISCV64 we support all dispatch types.
+ return desired_dispatch_info;
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageIntrinsicPatch(
+ uint32_t intrinsic_data, const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ /* dex_file= */ nullptr, intrinsic_data, info_high, &boot_image_other_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageRelRoPatch(
+ uint32_t boot_image_offset, const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ /* dex_file= */ nullptr, boot_image_offset, info_high, &boot_image_other_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageMethodPatch(
+ MethodReference target_method, const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ target_method.dex_file, target_method.index, info_high, &boot_image_method_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewMethodBssEntryPatch(
+ MethodReference target_method, const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ target_method.dex_file, target_method.index, info_high, &method_bss_entry_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageTypePatch(
+ const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &boot_image_type_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageJniEntrypointPatch(
+ MethodReference target_method, const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ target_method.dex_file, target_method.index, info_high, &boot_image_jni_entrypoint_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewTypeBssEntryPatch(
+ HLoadClass* load_class,
+ const PcRelativePatchInfo* info_high) {
+ const DexFile& dex_file = load_class->GetDexFile();
+ dex::TypeIndex type_index = load_class->GetTypeIndex();
+ ArenaDeque<PcRelativePatchInfo>* patches = nullptr;
+ switch (load_class->GetLoadKind()) {
+ case HLoadClass::LoadKind::kBssEntry:
+ patches = &type_bss_entry_patches_;
+ break;
+ case HLoadClass::LoadKind::kBssEntryPublic:
+ patches = &public_type_bss_entry_patches_;
+ break;
+ case HLoadClass::LoadKind::kBssEntryPackage:
+ patches = &package_type_bss_entry_patches_;
+ break;
+ default:
+ LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
+ UNREACHABLE();
+ }
+ return NewPcRelativePatch(&dex_file, type_index.index_, info_high, patches);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageStringPatch(
+ const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &boot_image_string_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewStringBssEntryPatch(
+ const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &string_bss_entry_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewPcRelativePatch(
+ const DexFile* dex_file,
+ uint32_t offset_or_index,
+ const PcRelativePatchInfo* info_high,
+ ArenaDeque<PcRelativePatchInfo>* patches) {
+ patches->emplace_back(dex_file, offset_or_index, info_high);
+ return &patches->back();
+}
+
+Literal* CodeGeneratorRISCV64::DeduplicateUint32Literal(uint32_t value) {
+ return uint32_literals_.GetOrCreate(value,
+ [this, value]() { return __ NewLiteral<uint32_t>(value); });
+}
+
+Literal* CodeGeneratorRISCV64::DeduplicateUint64Literal(uint64_t value) {
+ return uint64_literals_.GetOrCreate(value,
+ [this, value]() { return __ NewLiteral<uint64_t>(value); });
+}
+
+Literal* CodeGeneratorRISCV64::DeduplicateBootImageAddressLiteral(uint64_t address) {
+ return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
+}
+
+Literal* CodeGeneratorRISCV64::DeduplicateJitStringLiteral(const DexFile& dex_file,
+ dex::StringIndex string_index,
+ Handle<mirror::String> handle) {
+ ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
+ return jit_string_patches_.GetOrCreate(
+ StringReference(&dex_file, string_index),
+ [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); });
+}
+
+Literal* CodeGeneratorRISCV64::DeduplicateJitClassLiteral(const DexFile& dex_file,
+ dex::TypeIndex type_index,
+ Handle<mirror::Class> handle) {
+ ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
+ return jit_class_patches_.GetOrCreate(
+ TypeReference(&dex_file, type_index),
+ [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); });
+}
+
+void CodeGeneratorRISCV64::PatchJitRootUse(uint8_t* code,
+ const uint8_t* roots_data,
+ const Literal* literal,
+ uint64_t index_in_table) const {
+ uint32_t literal_offset = GetAssembler().GetLabelLocation(literal->GetLabel());
+ uintptr_t address =
+ reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+ reinterpret_cast<uint32_t*>(code + literal_offset)[0] = dchecked_integral_cast<uint32_t>(address);
+}
+
+void CodeGeneratorRISCV64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
+ for (const auto& entry : jit_string_patches_) {
+ const StringReference& string_reference = entry.first;
+ Literal* table_entry_literal = entry.second;
+ uint64_t index_in_table = GetJitStringRootIndex(string_reference);
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
+ }
+ for (const auto& entry : jit_class_patches_) {
+ const TypeReference& type_reference = entry.first;
+ Literal* table_entry_literal = entry.second;
+ uint64_t index_in_table = GetJitClassRootIndex(type_reference);
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
+ }
+}
+
+void CodeGeneratorRISCV64::EmitPcRelativeAuipcPlaceholder(PcRelativePatchInfo* info_high,
+ XRegister out) {
+ DCHECK(info_high->pc_insn_label == &info_high->label);
+ __ Bind(&info_high->label);
+ __ Auipc(out, /*imm20=*/ kLinkTimeOffsetPlaceholderHigh);
+}
+
+void CodeGeneratorRISCV64::EmitPcRelativeAddiPlaceholder(PcRelativePatchInfo* info_low,
+ XRegister rd,
+ XRegister rs1) {
+ DCHECK(info_low->pc_insn_label != &info_low->label);
+ __ Bind(&info_low->label);
+ __ Addi(rd, rs1, /*imm12=*/ kLinkTimeOffsetPlaceholderLow);
+}
+
+void CodeGeneratorRISCV64::EmitPcRelativeLwuPlaceholder(PcRelativePatchInfo* info_low,
+ XRegister rd,
+ XRegister rs1) {
+ DCHECK(info_low->pc_insn_label != &info_low->label);
+ __ Bind(&info_low->label);
+ __ Lwu(rd, rs1, /*offset=*/ kLinkTimeOffsetPlaceholderLow);
+}
+
+void CodeGeneratorRISCV64::EmitPcRelativeLdPlaceholder(PcRelativePatchInfo* info_low,
+ XRegister rd,
+ XRegister rs1) {
+ DCHECK(info_low->pc_insn_label != &info_low->label);
+ __ Bind(&info_low->label);
+ __ Ld(rd, rs1, /*offset=*/ kLinkTimeOffsetPlaceholderLow);
+}
+
+template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+inline void CodeGeneratorRISCV64::EmitPcRelativeLinkerPatches(
+ const ArenaDeque<PcRelativePatchInfo>& infos,
+ ArenaVector<linker::LinkerPatch>* linker_patches) {
+ for (const PcRelativePatchInfo& info : infos) {
+ linker_patches->push_back(Factory(__ GetLabelLocation(&info.label),
+ info.target_dex_file,
+ __ GetLabelLocation(info.pc_insn_label),
+ info.offset_or_index));
+ }
+}
+
+template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
+linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
+ return Factory(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
+void CodeGeneratorRISCV64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
+ DCHECK(linker_patches->empty());
+ size_t size =
+ boot_image_method_patches_.size() +
+ method_bss_entry_patches_.size() +
+ boot_image_type_patches_.size() +
+ type_bss_entry_patches_.size() +
+ public_type_bss_entry_patches_.size() +
+ package_type_bss_entry_patches_.size() +
+ boot_image_string_patches_.size() +
+ string_bss_entry_patches_.size() +
+ boot_image_jni_entrypoint_patches_.size() +
+ boot_image_other_patches_.size();
+ linker_patches->reserve(size);
+ if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
+ boot_image_method_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
+ boot_image_type_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
+ boot_image_string_patches_, linker_patches);
+ } else {
+ DCHECK(boot_image_method_patches_.empty());
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
+ }
+ if (GetCompilerOptions().IsBootImage()) {
+ EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
+ boot_image_other_patches_, linker_patches);
+ } else {
+ EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
+ boot_image_other_patches_, linker_patches);
+ }
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
+ method_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
+ type_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
+ public_type_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
+ package_type_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
+ string_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
+ boot_image_jni_entrypoint_patches_, linker_patches);
+ DCHECK_EQ(size, linker_patches->size());
+}
+
+void CodeGeneratorRISCV64::LoadTypeForBootImageIntrinsic(XRegister dest,
+ TypeReference target_type) {
+ // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
+ DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
+ PcRelativePatchInfo* info_high =
+ NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex());
+ EmitPcRelativeAuipcPlaceholder(info_high, dest);
+ PcRelativePatchInfo* info_low =
+ NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex(), info_high);
+ EmitPcRelativeAddiPlaceholder(info_low, dest, dest);
+}
+
+void CodeGeneratorRISCV64::LoadBootImageRelRoEntry(XRegister dest, uint32_t boot_image_offset) {
+ PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset);
+ EmitPcRelativeAuipcPlaceholder(info_high, dest);
+ PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high);
+ // Note: Boot image is in the low 4GiB and the entry is always 32-bit, so emit a 32-bit load.
+ EmitPcRelativeLwuPlaceholder(info_low, dest, dest);
+}
+
+void CodeGeneratorRISCV64::LoadBootImageAddress(XRegister dest, uint32_t boot_image_reference) {
+ if (GetCompilerOptions().IsBootImage()) {
+ PcRelativePatchInfo* info_high = NewBootImageIntrinsicPatch(boot_image_reference);
+ EmitPcRelativeAuipcPlaceholder(info_high, dest);
+ PcRelativePatchInfo* info_low = NewBootImageIntrinsicPatch(boot_image_reference, info_high);
+ EmitPcRelativeAddiPlaceholder(info_low, dest, dest);
+ } else if (GetCompilerOptions().GetCompilePic()) {
+ LoadBootImageRelRoEntry(dest, boot_image_reference);
+ } else {
+ DCHECK(GetCompilerOptions().IsJitCompiler());
+ gc::Heap* heap = Runtime::Current()->GetHeap();
+ DCHECK(!heap->GetBootImageSpaces().empty());
+ const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
+ // Note: Boot image is in the low 4GiB (usually the low 2GiB, requiring just LUI+ADDI).
+ // We may not have an available scratch register for `LoadConst64()` but it never
+ // emits better code than `Li()` for 32-bit unsigned constants anyway.
+ __ Li(dest, reinterpret_cast32<uint32_t>(address));
+ }
+}
+
+void CodeGeneratorRISCV64::LoadIntrinsicDeclaringClass(XRegister dest, HInvoke* invoke) {
+ DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
+ if (GetCompilerOptions().IsBootImage()) {
+ MethodReference target_method = invoke->GetResolvedMethodReference();
+ dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
+ LoadTypeForBootImageIntrinsic(dest, TypeReference(target_method.dex_file, type_idx));
+ } else {
+ uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
+ LoadBootImageAddress(dest, boot_image_offset);
+ }
+}
+
+void CodeGeneratorRISCV64::LoadClassRootForIntrinsic(XRegister dest, ClassRoot class_root) {
+ if (GetCompilerOptions().IsBootImage()) {
+ ScopedObjectAccess soa(Thread::Current());
+ ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
+ TypeReference target_type(&klass->GetDexFile(), klass->GetDexTypeIndex());
+ LoadTypeForBootImageIntrinsic(dest, target_type);
+ } else {
+ uint32_t boot_image_offset = GetBootImageOffset(class_root);
+ LoadBootImageAddress(dest, boot_image_offset);
+ }
+}
+
+void CodeGeneratorRISCV64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
+ switch (load_kind) {
+ case MethodLoadKind::kBootImageLinkTimePcRelative: {
+ DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_high =
+ NewBootImageMethodPatch(invoke->GetResolvedMethodReference());
+ EmitPcRelativeAuipcPlaceholder(info_high, temp.AsRegister<XRegister>());
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_low =
+ NewBootImageMethodPatch(invoke->GetResolvedMethodReference(), info_high);
+ EmitPcRelativeAddiPlaceholder(
+ info_low, temp.AsRegister<XRegister>(), temp.AsRegister<XRegister>());
+ break;
+ }
+ case MethodLoadKind::kBootImageRelRo: {
+ uint32_t boot_image_offset = GetBootImageOffset(invoke);
+ LoadBootImageRelRoEntry(temp.AsRegister<XRegister>(), boot_image_offset);
+ break;
+ }
+ case MethodLoadKind::kBssEntry: {
+ PcRelativePatchInfo* info_high = NewMethodBssEntryPatch(invoke->GetMethodReference());
+ EmitPcRelativeAuipcPlaceholder(info_high, temp.AsRegister<XRegister>());
+ PcRelativePatchInfo* info_low =
+ NewMethodBssEntryPatch(invoke->GetMethodReference(), info_high);
+ EmitPcRelativeLdPlaceholder(
+ info_low, temp.AsRegister<XRegister>(), temp.AsRegister<XRegister>());
+ break;
+ }
+ case MethodLoadKind::kJitDirectAddress: {
+ __ LoadConst64(temp.AsRegister<XRegister>(),
+ reinterpret_cast<uint64_t>(invoke->GetResolvedMethod()));
+ break;
+ }
+ case MethodLoadKind::kRuntimeCall: {
+ // Test situation, don't do anything.
+ break;
+ }
+ default: {
+ LOG(FATAL) << "Load kind should have already been handled " << load_kind;
+ UNREACHABLE();
+ }
+ }
+}
+
+void CodeGeneratorRISCV64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+ Location temp,
+ SlowPathCode* slow_path) {
+ // All registers are assumed to be correctly set up per the calling convention.
+ Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
+
+ switch (invoke->GetMethodLoadKind()) {
+ case MethodLoadKind::kStringInit: {
+ // temp = thread->string_init_entrypoint
+ uint32_t offset =
+ GetThreadOffset<kRiscv64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
+ __ Loadd(temp.AsRegister<XRegister>(), TR, offset);
+ break;
+ }
+ case MethodLoadKind::kRecursive:
+ callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
+ break;
+ case MethodLoadKind::kRuntimeCall:
+ GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
+ return; // No code pointer retrieval; the runtime performs the call directly.
+ case MethodLoadKind::kBootImageLinkTimePcRelative:
+ DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
+ if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
+ // Do not materialize the method pointer, load directly the entrypoint.
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_high =
+ NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference());
+ EmitPcRelativeAuipcPlaceholder(info_high, RA);
+ CodeGeneratorRISCV64::PcRelativePatchInfo* info_low =
+ NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference(), info_high);
+ EmitPcRelativeLdPlaceholder(info_low, RA, RA);
+ break;
+ }
+ FALLTHROUGH_INTENDED;
+ default:
+ LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
+ break;
+ }
+
+ switch (invoke->GetCodePtrLocation()) {
+ case CodePtrLocation::kCallSelf:
+ DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
+ __ Jal(&frame_entry_label_);
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ break;
+ case CodePtrLocation::kCallArtMethod:
+ // RA = callee_method->entry_point_from_quick_compiled_code_;
+ __ Loadd(RA,
+ callee_method.AsRegister<XRegister>(),
+ ArtMethod::EntryPointFromQuickCompiledCodeOffset(kRiscv64PointerSize).Int32Value());
+ // RA()
+ __ Jalr(RA);
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ break;
+ case CodePtrLocation::kCallCriticalNative: {
+ size_t out_frame_size =
+ PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorRiscv64,
+ kNativeStackAlignment,
+ GetCriticalNativeDirectCallFrameSize>(invoke);
+ if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
+ // Entrypoint is already loaded in RA.
+ } else {
+ // RA = callee_method->ptr_sized_fields_.data_; // EntryPointFromJni
+ MemberOffset offset = ArtMethod::EntryPointFromJniOffset(kRiscv64PointerSize);
+ __ Loadd(RA, callee_method.AsRegister<XRegister>(), offset.Int32Value());
+ }
+ __ Jalr(RA);
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+ // The result is returned the same way in native ABI and managed ABI. No result conversion is
+ // needed, see comments in `Riscv64JniCallingConvention::RequiresSmallResultTypeExtension()`.
+ if (out_frame_size != 0u) {
+ DecreaseFrame(out_frame_size);
+ }
+ break;
+ }
+ }
+
+ DCHECK(!IsLeafMethod());
+}
+
+void CodeGeneratorRISCV64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
+ XRegister klass) {
+ if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(info, instruction->AsInvoke());
+ if (cache != nullptr) {
+ uint64_t address = reinterpret_cast64<uint64_t>(cache);
+ Riscv64Label done;
+ // The `art_quick_update_inline_cache` expects the inline cache in T5.
+ XRegister ic_reg = T5;
+ ScratchRegisterScope srs(GetAssembler());
+ DCHECK_EQ(srs.AvailableXRegisters(), 2u);
+ srs.ExcludeXRegister(ic_reg);
+ DCHECK_EQ(srs.AvailableXRegisters(), 1u);
+ __ LoadConst64(ic_reg, address);
+ {
+ ScratchRegisterScope srs2(GetAssembler());
+ XRegister tmp = srs2.AllocateXRegister();
+ __ Loadd(tmp, ic_reg, InlineCache::ClassesOffset().Int32Value());
+ // Fast path for a monomorphic cache.
+ __ Beq(klass, tmp, &done);
+ }
+ InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
+ __ Bind(&done);
+ } else {
+ // This is unexpected, but we don't guarantee stable compilation across
+ // JIT runs so just warn about it.
+ ScopedObjectAccess soa(Thread::Current());
+ LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
+ }
+ }
+}
+
+void CodeGeneratorRISCV64::GenerateVirtualCall(HInvokeVirtual* invoke,
+ Location temp_location,
+ SlowPathCode* slow_path) {
+ // Use the calling convention instead of the location of the receiver, as
+ // intrinsics may have put the receiver in a different register. In the intrinsics
+ // slow path, the arguments have been moved to the right place, so here we are
+ // guaranteed that the receiver is the first register of the calling convention.
+ InvokeDexCallingConvention calling_convention;
+ XRegister receiver = calling_convention.GetRegisterAt(0);
+ XRegister temp = temp_location.AsRegister<XRegister>();
+ MemberOffset method_offset =
+ mirror::Class::EmbeddedVTableEntryOffset(invoke->GetVTableIndex(), kRiscv64PointerSize);
+ MemberOffset class_offset = mirror::Object::ClassOffset();
+ Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kRiscv64PointerSize);
+
+ // temp = object->GetClass();
+ __ Loadwu(temp, receiver, class_offset.Int32Value());
+ MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
+ MaybeUnpoisonHeapReference(temp);
+
+ // If we're compiling baseline, update the inline cache.
+ MaybeGenerateInlineCacheCheck(invoke, temp);
+
+ // temp = temp->GetMethodAt(method_offset);
+ __ Loadd(temp, temp, method_offset.Int32Value());
+ // RA = temp->GetEntryPoint();
+ __ Loadd(RA, temp, entry_point.Int32Value());
+ // RA();
+ __ Jalr(RA);
+ RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+}
+
+void CodeGeneratorRISCV64::MoveFromReturnRegister(Location trg, DataType::Type type) {
+ if (!trg.IsValid()) {
+ DCHECK_EQ(type, DataType::Type::kVoid);
+ return;
+ }
+
+ DCHECK_NE(type, DataType::Type::kVoid);
+
+ if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) {
+ XRegister trg_reg = trg.AsRegister<XRegister>();
+ XRegister res_reg = Riscv64ReturnLocation(type).AsRegister<XRegister>();
+ if (trg_reg != res_reg) {
+ __ Mv(trg_reg, res_reg);
+ }
+ } else {
+ FRegister trg_reg = trg.AsFpuRegister<FRegister>();
+ FRegister res_reg = Riscv64ReturnLocation(type).AsFpuRegister<FRegister>();
+ if (trg_reg != res_reg) {
+ __ FMvD(trg_reg, res_reg); // 64-bit move is OK also for `float`.
+ }
+ }
+}
+
+void CodeGeneratorRISCV64::PoisonHeapReference(XRegister reg) {
+ __ Sub(reg, Zero, reg); // Negate the ref.
+ __ ZextW(reg, reg); // Zero-extend the 32-bit ref.
+}
+
+void CodeGeneratorRISCV64::UnpoisonHeapReference(XRegister reg) {
+ __ Sub(reg, Zero, reg); // Negate the ref.
+ __ ZextW(reg, reg); // Zero-extend the 32-bit ref.
+}
+
+void CodeGeneratorRISCV64::MaybePoisonHeapReference(XRegister reg) {
+ if (kPoisonHeapReferences) {
+ PoisonHeapReference(reg);
+ }
+}
+
+void CodeGeneratorRISCV64::MaybeUnpoisonHeapReference(XRegister reg) {
+ if (kPoisonHeapReferences) {
+ UnpoisonHeapReference(reg);
+ }
+}
+
+void CodeGeneratorRISCV64::SwapLocations(Location loc1, Location loc2, DataType::Type type) {
+ DCHECK(!loc1.IsConstant());
+ DCHECK(!loc2.IsConstant());
+
+ if (loc1.Equals(loc2)) {
+ return;
+ }
+
+ bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot();
+ bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot();
+ bool is_simd1 = loc1.IsSIMDStackSlot();
+ bool is_simd2 = loc2.IsSIMDStackSlot();
+ bool is_fp_reg1 = loc1.IsFpuRegister();
+ bool is_fp_reg2 = loc2.IsFpuRegister();
+
+ if ((is_slot1 != is_slot2) ||
+ (loc2.IsRegister() && loc1.IsRegister()) ||
+ (is_fp_reg2 && is_fp_reg1)) {
+ if ((is_fp_reg2 && is_fp_reg1) && GetGraph()->HasSIMD()) {
+ LOG(FATAL) << "Unsupported";
+ UNREACHABLE();
+ }
+ ScratchRegisterScope srs(GetAssembler());
+ Location tmp = (is_fp_reg2 || is_fp_reg1)
+ ? Location::FpuRegisterLocation(srs.AllocateFRegister())
+ : Location::RegisterLocation(srs.AllocateXRegister());
+ MoveLocation(tmp, loc1, type);
+ MoveLocation(loc1, loc2, type);
+ MoveLocation(loc2, tmp, type);
+ } else if (is_slot1 && is_slot2) {
+ move_resolver_.Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), loc1.IsDoubleStackSlot());
+ } else if (is_simd1 && is_simd2) {
+ // TODO(riscv64): Add VECTOR/SIMD later.
+ UNIMPLEMENTED(FATAL) << "Vector extension is unsupported";
+ } else if ((is_fp_reg1 && is_simd2) || (is_fp_reg2 && is_simd1)) {
+ // TODO(riscv64): Add VECTOR/SIMD later.
+ UNIMPLEMENTED(FATAL) << "Vector extension is unsupported";
+ } else {
+ LOG(FATAL) << "Unimplemented swap between locations " << loc1 << " and " << loc2;
+ }
+}
+
+} // namespace riscv64
+} // namespace art
diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h
index 405b39aa0a..1e0eb51258 100644
--- a/compiler/optimizing/code_generator_riscv64.h
+++ b/compiler/optimizing/code_generator_riscv64.h
@@ -17,7 +17,827 @@
#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_
+#include "android-base/logging.h"
+#include "arch/riscv64/registers_riscv64.h"
+#include "base/macros.h"
#include "code_generator.h"
#include "driver/compiler_options.h"
+#include "intrinsics_list.h"
+#include "optimizing/locations.h"
+#include "parallel_move_resolver.h"
+#include "utils/riscv64/assembler_riscv64.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+// InvokeDexCallingConvention registers
+static constexpr XRegister kParameterCoreRegisters[] = {A1, A2, A3, A4, A5, A6, A7};
+static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+
+static constexpr FRegister kParameterFpuRegisters[] = {FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7};
+static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters);
+
+// InvokeRuntimeCallingConvention registers
+static constexpr XRegister kRuntimeParameterCoreRegisters[] = {A0, A1, A2, A3, A4, A5, A6, A7};
+static constexpr size_t kRuntimeParameterCoreRegistersLength =
+ arraysize(kRuntimeParameterCoreRegisters);
+
+static constexpr FRegister kRuntimeParameterFpuRegisters[] = {
+ FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7
+};
+static constexpr size_t kRuntimeParameterFpuRegistersLength =
+ arraysize(kRuntimeParameterFpuRegisters);
+
+// FCLASS returns a 10-bit classification mask with the two highest bits marking NaNs
+// (signaling and quiet). To detect a NaN, we can compare (either BGE or BGEU, the sign
+// bit is always clear) the result with the `kFClassNaNMinValue`.
+static_assert(kSignalingNaN == 0x100);
+static_assert(kQuietNaN == 0x200);
+static constexpr int32_t kFClassNaNMinValue = 0x100;
+
+#define UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(V) \
+ V(SystemArrayCopyByte) \
+ V(SystemArrayCopyChar) \
+ V(SystemArrayCopyInt) \
+ V(FP16Ceil) \
+ V(FP16Compare) \
+ V(FP16Floor) \
+ V(FP16Rint) \
+ V(FP16ToFloat) \
+ V(FP16ToHalf) \
+ V(FP16Greater) \
+ V(FP16GreaterEquals) \
+ V(FP16Less) \
+ V(FP16LessEquals) \
+ V(FP16Min) \
+ V(FP16Max) \
+ V(StringCompareTo) \
+ V(StringEquals) \
+ V(StringGetCharsNoCheck) \
+ V(StringStringIndexOf) \
+ V(StringStringIndexOfAfter) \
+ V(StringNewStringFromBytes) \
+ V(StringNewStringFromChars) \
+ V(StringNewStringFromString) \
+ V(StringBufferAppend) \
+ V(StringBufferLength) \
+ V(StringBufferToString) \
+ V(StringBuilderAppendObject) \
+ V(StringBuilderAppendString) \
+ V(StringBuilderAppendCharSequence) \
+ V(StringBuilderAppendCharArray) \
+ V(StringBuilderAppendBoolean) \
+ V(StringBuilderAppendChar) \
+ V(StringBuilderAppendInt) \
+ V(StringBuilderAppendLong) \
+ V(StringBuilderAppendFloat) \
+ V(StringBuilderAppendDouble) \
+ V(StringBuilderLength) \
+ V(StringBuilderToString) \
+ V(ThreadInterrupted) \
+ V(CRC32Update) \
+ V(CRC32UpdateBytes) \
+ V(CRC32UpdateByteBuffer) \
+ V(MethodHandleInvokeExact) \
+ V(MethodHandleInvoke)
+
+// Method register on invoke.
+static const XRegister kArtMethodRegister = A0;
+
+// Helper functions used by codegen as well as intrinsics.
+XRegister InputXRegisterOrZero(Location location);
+int32_t ReadBarrierMarkEntrypointOffset(Location ref);
+
+class CodeGeneratorRISCV64;
+
+class InvokeRuntimeCallingConvention : public CallingConvention<XRegister, FRegister> {
+ public:
+ InvokeRuntimeCallingConvention()
+ : CallingConvention(kRuntimeParameterCoreRegisters,
+ kRuntimeParameterCoreRegistersLength,
+ kRuntimeParameterFpuRegisters,
+ kRuntimeParameterFpuRegistersLength,
+ kRiscv64PointerSize) {}
+
+ Location GetReturnLocation(DataType::Type return_type);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
+};
+
+class InvokeDexCallingConvention : public CallingConvention<XRegister, FRegister> {
+ public:
+ InvokeDexCallingConvention()
+ : CallingConvention(kParameterCoreRegisters,
+ kParameterCoreRegistersLength,
+ kParameterFpuRegisters,
+ kParameterFpuRegistersLength,
+ kRiscv64PointerSize) {}
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
+};
+
+class InvokeDexCallingConventionVisitorRISCV64 : public InvokeDexCallingConventionVisitor {
+ public:
+ InvokeDexCallingConventionVisitorRISCV64() {}
+ virtual ~InvokeDexCallingConventionVisitorRISCV64() {}
+
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
+
+ private:
+ InvokeDexCallingConvention calling_convention;
+
+ DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorRISCV64);
+};
+
+class CriticalNativeCallingConventionVisitorRiscv64 : public InvokeDexCallingConventionVisitor {
+ public:
+ explicit CriticalNativeCallingConventionVisitorRiscv64(bool for_register_allocation)
+ : for_register_allocation_(for_register_allocation) {}
+
+ virtual ~CriticalNativeCallingConventionVisitorRiscv64() {}
+
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
+
+ size_t GetStackOffset() const { return stack_offset_; }
+
+ private:
+ // Register allocator does not support adjusting frame size, so we cannot provide final locations
+ // of stack arguments for register allocation. We ask the register allocator for any location and
+ // move these arguments to the right place after adjusting the SP when generating the call.
+ const bool for_register_allocation_;
+ size_t gpr_index_ = 0u;
+ size_t fpr_index_ = 0u;
+ size_t stack_offset_ = 0u;
+
+ DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorRiscv64);
+};
+
+class SlowPathCodeRISCV64 : public SlowPathCode {
+ public:
+ explicit SlowPathCodeRISCV64(HInstruction* instruction)
+ : SlowPathCode(instruction), entry_label_(), exit_label_() {}
+
+ Riscv64Label* GetEntryLabel() { return &entry_label_; }
+ Riscv64Label* GetExitLabel() { return &exit_label_; }
+
+ private:
+ Riscv64Label entry_label_;
+ Riscv64Label exit_label_;
+
+ DISALLOW_COPY_AND_ASSIGN(SlowPathCodeRISCV64);
+};
+
+class ParallelMoveResolverRISCV64 : public ParallelMoveResolverWithSwap {
+ public:
+ ParallelMoveResolverRISCV64(ArenaAllocator* allocator, CodeGeneratorRISCV64* codegen)
+ : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
+
+ void EmitMove(size_t index) override;
+ void EmitSwap(size_t index) override;
+ void SpillScratch(int reg) override;
+ void RestoreScratch(int reg) override;
+
+ void Exchange(int index1, int index2, bool double_slot);
+
+ Riscv64Assembler* GetAssembler() const;
+
+ private:
+ CodeGeneratorRISCV64* const codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverRISCV64);
+};
+
+class FieldAccessCallingConventionRISCV64 : public FieldAccessCallingConvention {
+ public:
+ FieldAccessCallingConventionRISCV64() {}
+
+ Location GetObjectLocation() const override {
+ return Location::RegisterLocation(A1);
+ }
+ Location GetFieldIndexLocation() const override {
+ return Location::RegisterLocation(A0);
+ }
+ Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+ return Location::RegisterLocation(A0);
+ }
+ Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED,
+ bool is_instance) const override {
+ return is_instance
+ ? Location::RegisterLocation(A2)
+ : Location::RegisterLocation(A1);
+ }
+ Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+ return Location::FpuRegisterLocation(FA0);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionRISCV64);
+};
+
+class LocationsBuilderRISCV64 : public HGraphVisitor {
+ public:
+ LocationsBuilderRISCV64(HGraph* graph, CodeGeneratorRISCV64* codegen)
+ : HGraphVisitor(graph), codegen_(codegen) {}
+
+#define DECLARE_VISIT_INSTRUCTION(name, super) void Visit##name(H##name* instr) override;
+
+ FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_RISCV64(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ void VisitInstruction(HInstruction* instruction) override {
+ LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id "
+ << instruction->GetId() << ")";
+ }
+
+ protected:
+ void HandleInvoke(HInvoke* invoke);
+ void HandleBinaryOp(HBinaryOperation* operation);
+ void HandleCondition(HCondition* instruction);
+ void HandleShift(HBinaryOperation* operation);
+ void HandleFieldSet(HInstruction* instruction);
+ void HandleFieldGet(HInstruction* instruction);
+
+ InvokeDexCallingConventionVisitorRISCV64 parameter_visitor_;
+
+ CodeGeneratorRISCV64* const codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(LocationsBuilderRISCV64);
+};
+
+class InstructionCodeGeneratorRISCV64 : public InstructionCodeGenerator {
+ public:
+ InstructionCodeGeneratorRISCV64(HGraph* graph, CodeGeneratorRISCV64* codegen);
+
+#define DECLARE_VISIT_INSTRUCTION(name, super) void Visit##name(H##name* instr) override;
+
+ FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_RISCV64(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ void VisitInstruction(HInstruction* instruction) override {
+ LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id "
+ << instruction->GetId() << ")";
+ }
+
+ Riscv64Assembler* GetAssembler() const { return assembler_; }
+
+ void GenerateMemoryBarrier(MemBarrierKind kind);
+
+ void FAdd(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FClass(XRegister rd, FRegister rs1, DataType::Type type);
+
+ void Load(Location out, XRegister rs1, int32_t offset, DataType::Type type);
+ void Store(Location value, XRegister rs1, int32_t offset, DataType::Type type);
+
+ // Sequentially consistent store. Used for volatile fields and intrinsics.
+ // The `instruction` argument is for recording an implicit null check stack map with the
+ // store instruction which may not be the last instruction emitted by `StoreSeqCst()`.
+ void StoreSeqCst(Location value,
+ XRegister rs1,
+ int32_t offset,
+ DataType::Type type,
+ HInstruction* instruction = nullptr);
+
+ void ShNAdd(XRegister rd, XRegister rs1, XRegister rs2, DataType::Type type);
+
+ protected:
+ void GenerateClassInitializationCheck(SlowPathCodeRISCV64* slow_path, XRegister class_reg);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, XRegister temp);
+ void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
+ void HandleBinaryOp(HBinaryOperation* operation);
+ void HandleCondition(HCondition* instruction);
+ void HandleShift(HBinaryOperation* operation);
+ void HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info,
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind);
+ void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+ // Generate a heap reference load using one register `out`:
+ //
+ // out <- *(out + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ //
+ // Location `maybe_temp` is used when generating a read barrier and
+ // shall be a register in that case; it may be an invalid location
+ // otherwise.
+ void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option);
+ // Generate a heap reference load using two different registers
+ // `out` and `obj`:
+ //
+ // out <- *(obj + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ //
+ // Location `maybe_temp` is used when generating a Baker's (fast
+ // path) read barrier and shall be a register in that case; it may
+ // be an invalid location otherwise.
+ void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option);
+
+ void GenerateTestAndBranch(HInstruction* instruction,
+ size_t condition_input_index,
+ Riscv64Label* true_target,
+ Riscv64Label* false_target);
+ void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+ void DivRemByPowerOfTwo(HBinaryOperation* instruction);
+ void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
+ void GenerateDivRemIntegral(HBinaryOperation* instruction);
+ void GenerateIntLongCondition(IfCondition cond, LocationSummary* locations);
+ void GenerateIntLongCondition(IfCondition cond,
+ LocationSummary* locations,
+ XRegister rd,
+ bool to_all_bits);
+ void GenerateIntLongCompareAndBranch(IfCondition cond,
+ LocationSummary* locations,
+ Riscv64Label* label);
+ void GenerateFpCondition(IfCondition cond,
+ bool gt_bias,
+ DataType::Type type,
+ LocationSummary* locations,
+ Riscv64Label* label = nullptr);
+ void GenerateFpCondition(IfCondition cond,
+ bool gt_bias,
+ DataType::Type type,
+ LocationSummary* locations,
+ Riscv64Label* label,
+ XRegister rd,
+ bool to_all_bits);
+ void GenerateMethodEntryExitHook(HInstruction* instruction);
+ void HandleGoto(HInstruction* got, HBasicBlock* successor);
+ void GenPackedSwitchWithCompares(XRegister adjusted,
+ XRegister temp,
+ uint32_t num_entries,
+ HBasicBlock* switch_block);
+ void GenTableBasedPackedSwitch(XRegister adjusted,
+ XRegister temp,
+ uint32_t num_entries,
+ HBasicBlock* switch_block);
+ int32_t VecAddress(LocationSummary* locations,
+ size_t size,
+ /*out*/ XRegister* adjusted_base);
+
+ template <typename Reg,
+ void (Riscv64Assembler::*opS)(Reg, FRegister, FRegister),
+ void (Riscv64Assembler::*opD)(Reg, FRegister, FRegister)>
+ void FpBinOp(Reg rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FSub(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FDiv(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FMul(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FMin(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FMax(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FEq(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FLt(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+ void FLe(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+
+ template <typename Reg,
+ void (Riscv64Assembler::*opS)(Reg, FRegister),
+ void (Riscv64Assembler::*opD)(Reg, FRegister)>
+ void FpUnOp(Reg rd, FRegister rs1, DataType::Type type);
+ void FAbs(FRegister rd, FRegister rs1, DataType::Type type);
+ void FNeg(FRegister rd, FRegister rs1, DataType::Type type);
+ void FMv(FRegister rd, FRegister rs1, DataType::Type type);
+ void FMvX(XRegister rd, FRegister rs1, DataType::Type type);
+
+ Riscv64Assembler* const assembler_;
+ CodeGeneratorRISCV64* const codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorRISCV64);
+};
+
+class CodeGeneratorRISCV64 : public CodeGenerator {
+ public:
+ CodeGeneratorRISCV64(HGraph* graph,
+ const CompilerOptions& compiler_options,
+ OptimizingCompilerStats* stats = nullptr);
+ virtual ~CodeGeneratorRISCV64() {}
+
+ void GenerateFrameEntry() override;
+ void GenerateFrameExit() override;
+
+ void Bind(HBasicBlock* block) override;
+
+ size_t GetWordSize() const override {
+ // The "word" for the compiler is the core register size (64-bit for riscv64) while the
+ // riscv64 assembler uses "word" for 32-bit values and "double word" for 64-bit values.
+ return kRiscv64DoublewordSize;
+ }
+
+ bool SupportsPredicatedSIMD() const override {
+ // TODO(riscv64): Check the vector extension.
+ return false;
+ }
+
+ // Get FP register width in bytes for spilling/restoring in the slow paths.
+ //
+ // Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers
+ // alias and live SIMD registers are forced to be spilled in full size in the slow paths.
+ size_t GetSlowPathFPWidth() const override {
+ // Default implementation.
+ return GetCalleePreservedFPWidth();
+ }
+
+ size_t GetCalleePreservedFPWidth() const override {
+ return kRiscv64FloatRegSizeInBytes;
+ };
+
+ size_t GetSIMDRegisterWidth() const override {
+ // TODO(riscv64): Implement SIMD with the Vector extension.
+ // Note: HLoopOptimization calls this function even for an ISA without SIMD support.
+ return kRiscv64FloatRegSizeInBytes;
+ };
+
+ uintptr_t GetAddressOf(HBasicBlock* block) override {
+ return assembler_.GetLabelLocation(GetLabelOf(block));
+ };
+
+ Riscv64Label* GetLabelOf(HBasicBlock* block) const {
+ return CommonGetLabelOf<Riscv64Label>(block_labels_, block);
+ }
+
+ void Initialize() override { block_labels_ = CommonInitializeLabels<Riscv64Label>(); }
+
+ void MoveConstant(Location destination, int32_t value) override;
+ void MoveLocation(Location destination, Location source, DataType::Type dst_type) override;
+ void AddLocationAsTemp(Location location, LocationSummary* locations) override;
+
+ Riscv64Assembler* GetAssembler() override { return &assembler_; }
+ const Riscv64Assembler& GetAssembler() const override { return assembler_; }
+
+ HGraphVisitor* GetLocationBuilder() override { return &location_builder_; }
+
+ InstructionCodeGeneratorRISCV64* GetInstructionVisitor() override {
+ return &instruction_visitor_;
+ }
+
+ void MaybeGenerateInlineCacheCheck(HInstruction* instruction, XRegister klass);
+
+ void SetupBlockedRegisters() const override;
+
+ size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
+
+ void DumpCoreRegister(std::ostream& stream, int reg) const override;
+ void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
+
+ InstructionSet GetInstructionSet() const override { return InstructionSet::kRiscv64; }
+
+ const Riscv64InstructionSetFeatures& GetInstructionSetFeatures() const;
+
+ uint32_t GetPreferredSlotsAlignment() const override {
+ return static_cast<uint32_t>(kRiscv64PointerSize);
+ }
+
+ void Finalize() override;
+
+ // Generate code to invoke a runtime entry point.
+ void InvokeRuntime(QuickEntrypointEnum entrypoint,
+ HInstruction* instruction,
+ uint32_t dex_pc,
+ SlowPathCode* slow_path = nullptr) override;
+
+ // Generate code to invoke a runtime entry point, but do not record
+ // PC-related information in a stack map.
+ void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path);
+
+ ParallelMoveResolver* GetMoveResolver() override { return &move_resolver_; }
+
+ bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; }
+
+ void IncreaseFrame(size_t adjustment) override;
+ void DecreaseFrame(size_t adjustment) override;
+
+ void GenerateNop() override;
+
+ void GenerateImplicitNullCheck(HNullCheck* instruction) override;
+ void GenerateExplicitNullCheck(HNullCheck* instruction) override;
+
+ // Check if the desired_string_load_kind is supported. If it is, return it,
+ // otherwise return a fall-back kind that should be used instead.
+ HLoadString::LoadKind GetSupportedLoadStringKind(
+ HLoadString::LoadKind desired_string_load_kind) override;
+
+ // Check if the desired_class_load_kind is supported. If it is, return it,
+ // otherwise return a fall-back kind that should be used instead.
+ HLoadClass::LoadKind GetSupportedLoadClassKind(
+ HLoadClass::LoadKind desired_class_load_kind) override;
+
+ // Check if the desired_dispatch_info is supported. If it is, return it,
+ // otherwise return a fall-back info that should be used instead.
+ HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+ const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, ArtMethod* method) override;
+
+ // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types,
+ // whether through .data.bimg.rel.ro, .bss, or directly in the boot image.
+ //
+ // The 20-bit and 12-bit parts of the 32-bit PC-relative offset are patched separately,
+ // necessitating two patches/infos. There can be more than two patches/infos if the
+ // instruction supplying the high part is shared with e.g. a slow path, while the low
+ // part is supplied by separate instructions, e.g.:
+ // auipc r1, high // patch
+ // lwu r2, low(r1) // patch
+ // beqz r2, slow_path
+ // back:
+ // ...
+ // slow_path:
+ // ...
+ // sw r2, low(r1) // patch
+ // j back
+ struct PcRelativePatchInfo : PatchInfo<Riscv64Label> {
+ PcRelativePatchInfo(const DexFile* dex_file,
+ uint32_t off_or_idx,
+ const PcRelativePatchInfo* info_high)
+ : PatchInfo<Riscv64Label>(dex_file, off_or_idx),
+ pc_insn_label(info_high != nullptr ? &info_high->label : &label) {
+ DCHECK_IMPLIES(info_high != nullptr, info_high->pc_insn_label == &info_high->label);
+ }
+
+ // Pointer to the info for the high part patch or nullptr if this is the high part patch info.
+ const Riscv64Label* pc_insn_label;
+
+ private:
+ PcRelativePatchInfo(PcRelativePatchInfo&& other) = delete;
+ DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo);
+ };
+
+ PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data,
+ const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset,
+ const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method,
+ const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method,
+ const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewBootImageJniEntrypointPatch(
+ MethodReference target_method, const PcRelativePatchInfo* info_high = nullptr);
+
+ PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file,
+ dex::TypeIndex type_index,
+ const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewTypeBssEntryPatch(HLoadClass* load_class,
+ const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewBootImageStringPatch(const DexFile& dex_file,
+ dex::StringIndex string_index,
+ const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file,
+ dex::StringIndex string_index,
+ const PcRelativePatchInfo* info_high = nullptr);
+
+ void EmitPcRelativeAuipcPlaceholder(PcRelativePatchInfo* info_high, XRegister out);
+ void EmitPcRelativeAddiPlaceholder(PcRelativePatchInfo* info_low, XRegister rd, XRegister rs1);
+ void EmitPcRelativeLwuPlaceholder(PcRelativePatchInfo* info_low, XRegister rd, XRegister rs1);
+ void EmitPcRelativeLdPlaceholder(PcRelativePatchInfo* info_low, XRegister rd, XRegister rs1);
+
+ void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
+
+ Literal* DeduplicateBootImageAddressLiteral(uint64_t address);
+ void PatchJitRootUse(uint8_t* code,
+ const uint8_t* roots_data,
+ const Literal* literal,
+ uint64_t index_in_table) const;
+ Literal* DeduplicateJitStringLiteral(const DexFile& dex_file,
+ dex::StringIndex string_index,
+ Handle<mirror::String> handle);
+ Literal* DeduplicateJitClassLiteral(const DexFile& dex_file,
+ dex::TypeIndex type_index,
+ Handle<mirror::Class> handle);
+ void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
+
+ void LoadTypeForBootImageIntrinsic(XRegister dest, TypeReference target_type);
+ void LoadBootImageRelRoEntry(XRegister dest, uint32_t boot_image_offset);
+ void LoadBootImageAddress(XRegister dest, uint32_t boot_image_reference);
+ void LoadIntrinsicDeclaringClass(XRegister dest, HInvoke* invoke);
+ void LoadClassRootForIntrinsic(XRegister dest, ClassRoot class_root);
+
+ void LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke);
+ void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+ Location temp,
+ SlowPathCode* slow_path = nullptr) override;
+ void GenerateVirtualCall(HInvokeVirtual* invoke,
+ Location temp,
+ SlowPathCode* slow_path = nullptr) override;
+ void MoveFromReturnRegister(Location trg, DataType::Type type) override;
+
+ void GenerateMemoryBarrier(MemBarrierKind kind);
+
+ void MaybeIncrementHotness(bool is_frame_entry);
+
+ bool CanUseImplicitSuspendCheck() const;
+
+
+ // Create slow path for a Baker read barrier for a GC root load within `instruction`.
+ SlowPathCodeRISCV64* AddGcRootBakerBarrierBarrierSlowPath(
+ HInstruction* instruction, Location root, Location temp);
+
+ // Emit marking check for a Baker read barrier for a GC root load within `instruction`.
+ void EmitBakerReadBarierMarkingCheck(
+ SlowPathCodeRISCV64* slow_path, Location root, Location temp);
+
+ // Generate a GC root reference load:
+ //
+ // root <- *(obj + offset)
+ //
+ // while honoring read barriers (if any).
+ void GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ XRegister obj,
+ uint32_t offset,
+ ReadBarrierOption read_barrier_option,
+ Riscv64Label* label_low = nullptr);
+
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference field load when Baker's read barriers are used.
+ void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ XRegister obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check);
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference array load when Baker's read barriers are used.
+ void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ XRegister obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check);
+ // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+ // GenerateArrayLoadWithBakerReadBarrier and intrinsics.
+ void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ XRegister obj,
+ uint32_t offset,
+ Location index,
+ Location temp,
+ bool needs_null_check);
+
+ // Create slow path for a read barrier for a heap reference within `instruction`.
+ //
+ // This is a helper function for GenerateReadBarrierSlow() that has the same
+ // arguments. The creation and adding of the slow path is exposed for intrinsics
+ // that cannot use GenerateReadBarrierSlow() from their own slow paths.
+ SlowPathCodeRISCV64* AddReadBarrierSlowPath(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index);
+
+ // Generate a read barrier for a heap reference within `instruction`
+ // using a slow path.
+ //
+ // A read barrier for an object reference read from the heap is
+ // implemented as a call to the artReadBarrierSlow runtime entry
+ // point, which is passed the values in locations `ref`, `obj`, and
+ // `offset`:
+ //
+ // mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+ // mirror::Object* obj,
+ // uint32_t offset);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierSlow.
+ //
+ // When `index` is provided (i.e. for array accesses), the offset
+ // value passed to artReadBarrierSlow is adjusted to take `index`
+ // into account.
+ void GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // If read barriers are enabled, generate a read barrier for a heap
+ // reference using a slow path. If heap poisoning is enabled, also
+ // unpoison the reference in `out`.
+ void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // Generate a read barrier for a GC root within `instruction` using
+ // a slow path.
+ //
+ // A read barrier for an object reference GC root is implemented as
+ // a call to the artReadBarrierForRootSlow runtime entry point,
+ // which is passed the value in location `root`:
+ //
+ // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierForRootSlow.
+ void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+
+ void MarkGCCard(XRegister object, XRegister value, bool value_can_be_null);
+
+ //
+ // Heap poisoning.
+ //
+
+ // Poison a heap reference contained in `reg`.
+ void PoisonHeapReference(XRegister reg);
+
+ // Unpoison a heap reference contained in `reg`.
+ void UnpoisonHeapReference(XRegister reg);
+
+ // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+ void MaybePoisonHeapReference(XRegister reg);
+
+ // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
+ void MaybeUnpoisonHeapReference(XRegister reg);
+
+ void SwapLocations(Location loc1, Location loc2, DataType::Type type);
+
+ private:
+ using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
+ using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, Literal*>;
+ using StringToLiteralMap =
+ ArenaSafeMap<StringReference, Literal*, StringReferenceValueComparator>;
+ using TypeToLiteralMap = ArenaSafeMap<TypeReference, Literal*, TypeReferenceValueComparator>;
+
+ Literal* DeduplicateUint32Literal(uint32_t value);
+ Literal* DeduplicateUint64Literal(uint64_t value);
+
+ PcRelativePatchInfo* NewPcRelativePatch(const DexFile* dex_file,
+ uint32_t offset_or_index,
+ const PcRelativePatchInfo* info_high,
+ ArenaDeque<PcRelativePatchInfo>* patches);
+
+ template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+ void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
+ ArenaVector<linker::LinkerPatch>* linker_patches);
+
+ Riscv64Assembler assembler_;
+ LocationsBuilderRISCV64 location_builder_;
+ InstructionCodeGeneratorRISCV64 instruction_visitor_;
+ Riscv64Label frame_entry_label_;
+
+ // Labels for each block that will be compiled.
+ Riscv64Label* block_labels_; // Indexed by block id.
+
+ ParallelMoveResolverRISCV64 move_resolver_;
+
+ // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
+ Uint32ToLiteralMap uint32_literals_;
+ // Deduplication map for 64-bit literals, used for non-patchable method address or method code
+ // address.
+ Uint64ToLiteralMap uint64_literals_;
+
+ // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
+ // PC-relative method patch info for kBssEntry.
+ ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
+ // PC-relative type patch info for kBootImageLinkTimePcRelative.
+ ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
+ // PC-relative type patch info for kBssEntry.
+ ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+ // PC-relative public type patch info for kBssEntryPublic.
+ ArenaDeque<PcRelativePatchInfo> public_type_bss_entry_patches_;
+ // PC-relative package type patch info for kBssEntryPackage.
+ ArenaDeque<PcRelativePatchInfo> package_type_bss_entry_patches_;
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
+ ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
+ // PC-relative String patch info for kBssEntry.
+ ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
+ // PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative.
+ ArenaDeque<PcRelativePatchInfo> boot_image_jni_entrypoint_patches_;
+ // PC-relative patch info for IntrinsicObjects for the boot image,
+ // and for method/type/string patches for kBootImageRelRo otherwise.
+ ArenaDeque<PcRelativePatchInfo> boot_image_other_patches_;
+
+ // Patches for string root accesses in JIT compiled code.
+ StringToLiteralMap jit_string_patches_;
+ // Patches for class root accesses in JIT compiled code.
+ TypeToLiteralMap jit_class_patches_;
+};
+
+} // namespace riscv64
+} // namespace art
#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_
diff --git a/compiler/optimizing/code_generator_vector_arm64_neon.cc b/compiler/optimizing/code_generator_vector_arm64_neon.cc
index 6b6e25cf0c..848b5e7567 100644
--- a/compiler/optimizing/code_generator_vector_arm64_neon.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_neon.cc
@@ -61,10 +61,8 @@ inline bool NEONCanEncodeConstantAsImmediate(HConstant* constant, HInstruction*
// - constant location - if 'constant' is an actual constant and its value can be
// encoded into the instruction.
// - register location otherwise.
-inline Location NEONEncodableConstantOrRegister(HInstruction* constant,
- HInstruction* instr) {
- if (constant->IsConstant()
- && NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
+inline Location NEONEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
+ if (constant->IsConstant() && NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
return Location::ConstantLocation(constant);
}
@@ -1533,12 +1531,32 @@ void InstructionCodeGeneratorARM64Neon::VisitVecPredWhile(HVecPredWhile* instruc
UNREACHABLE();
}
-void LocationsBuilderARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) {
+void LocationsBuilderARM64Neon::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
-void InstructionCodeGeneratorARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARM64Neon::VisitVecCondition(HVecCondition* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARM64Neon::VisitVecCondition(HVecCondition* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARM64Neon::VisitVecPredNot(HVecPredNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARM64Neon::VisitVecPredNot(HVecPredNot* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
diff --git a/compiler/optimizing/code_generator_vector_arm64_sve.cc b/compiler/optimizing/code_generator_vector_arm64_sve.cc
index fe15791d3f..ef79932899 100644
--- a/compiler/optimizing/code_generator_vector_arm64_sve.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_sve.cc
@@ -62,8 +62,7 @@ static bool SVECanEncodeConstantAsImmediate(HConstant* constant, HInstruction* i
// encoded into the instruction.
// - register location otherwise.
inline Location SVEEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
- if (constant->IsConstant()
- && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
+ if (constant->IsConstant() && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
return Location::ConstantLocation(constant);
}
@@ -246,7 +245,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecReduce(HVecReduce* instruction) {
LocationSummary* locations = instruction->GetLocations();
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const VRegister dst = DRegisterFrom(locations->Out());
- const PRegister p_reg = LoopPReg();
+ const PRegister p_reg = GetVecGoverningPReg(instruction);
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kInt32:
@@ -284,7 +283,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecCnv(HVecCnv* instruction) {
LocationSummary* locations = instruction->GetLocations();
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
DataType::Type from = instruction->GetInputType();
DataType::Type to = instruction->GetResultType();
ValidateVectorLength(instruction);
@@ -304,7 +303,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecNeg(HVecNeg* instruction) {
LocationSummary* locations = instruction->GetLocations();
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
@@ -342,7 +341,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAbs(HVecAbs* instruction) {
LocationSummary* locations = instruction->GetLocations();
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kInt8:
@@ -378,7 +377,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecNot(HVecNot* instruction) {
LocationSummary* locations = instruction->GetLocations();
const ZRegister src = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool: // special case boolean-not
@@ -438,7 +437,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAdd(HVecAdd* instruction) {
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
@@ -497,7 +496,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecSub(HVecSub* instruction) {
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
@@ -546,7 +545,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecMul(HVecMul* instruction) {
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
@@ -585,7 +584,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecDiv(HVecDiv* instruction) {
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
// Note: VIXL guarantees StrictNaNPropagation for Fdiv.
@@ -633,7 +632,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAnd(HVecAnd* instruction) {
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
@@ -678,7 +677,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecOr(HVecOr* instruction) {
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
@@ -714,7 +713,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecXor(HVecXor* instruction) {
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
@@ -769,7 +768,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecShl(HVecShl* instruction) {
LocationSummary* locations = instruction->GetLocations();
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
@@ -802,7 +801,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecShr(HVecShr* instruction) {
LocationSummary* locations = instruction->GetLocations();
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
@@ -835,7 +834,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecUShr(HVecUShr* instruction) {
LocationSummary* locations = instruction->GetLocations();
const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
const ZRegister dst = ZRegisterFrom(locations->Out());
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
@@ -966,7 +965,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecMultiplyAccumulate(
const ZRegister acc = ZRegisterFrom(locations->InAt(0));
const ZRegister left = ZRegisterFrom(locations->InAt(1));
const ZRegister right = ZRegisterFrom(locations->InAt(2));
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
DCHECK(locations->InAt(0).Equals(locations->Out()));
ValidateVectorLength(instruction);
@@ -1029,7 +1028,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecDotProd(HVecDotProd* instruction)
const ZRegister acc = ZRegisterFrom(locations->InAt(0));
const ZRegister left = ZRegisterFrom(locations->InAt(1));
const ZRegister right = ZRegisterFrom(locations->InAt(2));
- const PRegisterM p_reg = LoopPReg().Merging();
+ const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
@@ -1099,7 +1098,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecLoad(HVecLoad* instruction) {
const ZRegister reg = ZRegisterFrom(locations->Out());
UseScratchRegisterScope temps(GetVIXLAssembler());
Register scratch;
- const PRegisterZ p_reg = LoopPReg().Zeroing();
+ const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
@@ -1141,7 +1140,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecStore(HVecStore* instruction) {
const ZRegister reg = ZRegisterFrom(locations->InAt(2));
UseScratchRegisterScope temps(GetVIXLAssembler());
Register scratch;
- const PRegisterZ p_reg = LoopPReg().Zeroing();
+ const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing();
ValidateVectorLength(instruction);
switch (instruction->GetPackedType()) {
@@ -1182,25 +1181,25 @@ void LocationsBuilderARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
// Instruction is not predicated, see nodes_vector.h
DCHECK(!instruction->IsPredicated());
- const PRegister p_reg = LoopPReg();
+ const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);
switch (instruction->GetPackedType()) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- __ Ptrue(p_reg.VnB(), vixl::aarch64::SVE_ALL);
+ __ Ptrue(output_p_reg.VnB(), vixl::aarch64::SVE_ALL);
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- __ Ptrue(p_reg.VnH(), vixl::aarch64::SVE_ALL);
+ __ Ptrue(output_p_reg.VnH(), vixl::aarch64::SVE_ALL);
break;
case DataType::Type::kInt32:
case DataType::Type::kFloat32:
- __ Ptrue(p_reg.VnS(), vixl::aarch64::SVE_ALL);
+ __ Ptrue(output_p_reg.VnS(), vixl::aarch64::SVE_ALL);
break;
case DataType::Type::kInt64:
case DataType::Type::kFloat64:
- __ Ptrue(p_reg.VnD(), vixl::aarch64::SVE_ALL);
+ __ Ptrue(output_p_reg.VnD(), vixl::aarch64::SVE_ALL);
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
@@ -1208,6 +1207,67 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instru
}
}
+void LocationsBuilderARM64Sve::VisitVecCondition(HVecCondition* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecCondition(HVecCondition* instruction) {
+ DCHECK(instruction->IsPredicated());
+ LocationSummary* locations = instruction->GetLocations();
+ const ZRegister left = ZRegisterFrom(locations->InAt(0));
+ const ZRegister right = ZRegisterFrom(locations->InAt(1));
+ const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing();
+ const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);
+
+ HVecOperation* a = instruction->InputAt(0)->AsVecOperation();
+ HVecOperation* b = instruction->InputAt(1)->AsVecOperation();
+ DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
+ HVecOperation::ToSignedType(b->GetPackedType()));
+ ValidateVectorLength(instruction);
+
+ // TODO: Support other condition OPs and types.
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ __ Cmpeq(output_p_reg.VnB(), p_reg, left.VnB(), right.VnB());
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ __ Cmpeq(output_p_reg.VnH(), p_reg, left.VnH(), right.VnH());
+ break;
+ case DataType::Type::kInt32:
+ __ Cmpeq(output_p_reg.VnS(), p_reg, left.VnS(), right.VnS());
+ break;
+ case DataType::Type::kInt64:
+ __ Cmpeq(output_p_reg.VnD(), p_reg, left.VnD(), right.VnD());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64Sve::VisitVecPredNot(HVecPredNot* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ DCHECK(instruction->InputAt(0)->IsVecPredSetOperation());
+ locations->SetInAt(0, Location::NoLocation());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecPredNot(HVecPredNot* instruction) {
+ DCHECK(instruction->IsPredicated());
+
+ const PRegister input_p_reg = GetVecPredSetFixedOutPReg(
+ instruction->InputAt(0)->AsVecPredSetOperation());
+ const PRegister control_p_reg = GetVecGoverningPReg(instruction);
+ const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);
+
+ __ Not(output_p_reg.VnB(), control_p_reg.Zeroing(), input_p_reg.VnB());
+}
+
void LocationsBuilderARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
locations->SetInAt(0, Location::RequiresRegister());
@@ -1218,8 +1278,8 @@ void LocationsBuilderARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
// Semantically, the out location of this instruction and predicate inputs locations of
// its users should be a fixed predicate register (similar to
// Location::RegisterLocation(int reg)). But the register allocator (RA) doesn't support
- // SIMD regs (e.g. predicate), so LoopPReg() is used explicitly without exposing it
- // to the RA.
+ // SIMD regs (e.g. predicate), so fixed registers are used explicitly without exposing it
+ // to the RA (through GetVecPredSetFixedOutPReg()).
//
// To make the RA happy Location::NoLocation() was used for all the vector instructions
// predicate inputs; but for the PredSetOperations (e.g. VecPredWhile) Location::NoLocation()
@@ -1241,21 +1301,22 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruct
DCHECK(instruction->GetCondKind() == HVecPredWhile::CondKind::kLO);
Register left = InputRegisterAt(instruction, 0);
Register right = InputRegisterAt(instruction, 1);
+ const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);
DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % instruction->GetVectorLength(), 0u);
switch (codegen_->GetSIMDRegisterWidth() / instruction->GetVectorLength()) {
case 1u:
- __ Whilelo(LoopPReg().VnB(), left, right);
+ __ Whilelo(output_p_reg.VnB(), left, right);
break;
case 2u:
- __ Whilelo(LoopPReg().VnH(), left, right);
+ __ Whilelo(output_p_reg.VnH(), left, right);
break;
case 4u:
- __ Whilelo(LoopPReg().VnS(), left, right);
+ __ Whilelo(output_p_reg.VnS(), left, right);
break;
case 8u:
- __ Whilelo(LoopPReg().VnD(), left, right);
+ __ Whilelo(output_p_reg.VnD(), left, right);
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
@@ -1263,20 +1324,20 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruct
}
}
-void LocationsBuilderARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) {
+void LocationsBuilderARM64Sve::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
locations->SetInAt(0, Location::NoLocation());
// Result of the operation - a boolean value in a core register.
locations->SetOut(Location::RequiresRegister());
}
-void InstructionCodeGeneratorARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
// Instruction is not predicated, see nodes_vector.h
DCHECK(!instruction->IsPredicated());
Register reg = OutputRegister(instruction);
- // Currently VecPredCondition is only used as part of vectorized loop check condition
+ // Currently VecPredToBoolean is only used as part of vectorized loop check condition
// evaluation.
- DCHECK(instruction->GetPCondKind() == HVecPredCondition::PCondKind::kNFirst);
+ DCHECK(instruction->GetPCondKind() == HVecPredToBoolean::PCondKind::kNFirst);
__ Cset(reg, pl);
}
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index e8ecf28386..70f22af17b 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -1069,12 +1069,32 @@ void InstructionCodeGeneratorARMVIXL::VisitVecPredWhile(HVecPredWhile* instructi
UNREACHABLE();
}
-void LocationsBuilderARMVIXL::VisitVecPredCondition(HVecPredCondition* instruction) {
+void LocationsBuilderARMVIXL::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
-void InstructionCodeGeneratorARMVIXL::VisitVecPredCondition(HVecPredCondition* instruction) {
+void InstructionCodeGeneratorARMVIXL::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARMVIXL::VisitVecCondition(HVecCondition* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecCondition(HVecCondition* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderARMVIXL::VisitVecPredNot(HVecPredNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecPredNot(HVecPredNot* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 343a6e1af4..1f9b2578ac 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -1401,12 +1401,32 @@ void InstructionCodeGeneratorX86::VisitVecPredWhile(HVecPredWhile* instruction)
UNREACHABLE();
}
-void LocationsBuilderX86::VisitVecPredCondition(HVecPredCondition* instruction) {
+void LocationsBuilderX86::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
-void InstructionCodeGeneratorX86::VisitVecPredCondition(HVecPredCondition* instruction) {
+void InstructionCodeGeneratorX86::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86::VisitVecCondition(HVecCondition* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86::VisitVecCondition(HVecCondition* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86::VisitVecPredNot(HVecPredNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86::VisitVecPredNot(HVecPredNot* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index fb6e4e753f..47afa3b4a1 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -1374,12 +1374,32 @@ void InstructionCodeGeneratorX86_64::VisitVecPredWhile(HVecPredWhile* instructio
UNREACHABLE();
}
-void LocationsBuilderX86_64::VisitVecPredCondition(HVecPredCondition* instruction) {
+void LocationsBuilderX86_64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
-void InstructionCodeGeneratorX86_64::VisitVecPredCondition(HVecPredCondition* instruction) {
+void InstructionCodeGeneratorX86_64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitVecCondition(HVecCondition* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecCondition(HVecCondition* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitVecPredNot(HVecPredNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecPredNot(HVecPredNot* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
UNREACHABLE();
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index cb1cecc45a..71db5c99af 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -27,6 +27,7 @@
#include "heap_poisoning.h"
#include "interpreter/mterp/nterp.h"
#include "intrinsics.h"
+#include "intrinsics_list.h"
#include "intrinsics_utils.h"
#include "intrinsics_x86.h"
#include "jit/profiling_info.h"
@@ -36,8 +37,10 @@
#include "mirror/class-inl.h"
#include "mirror/var_handle.h"
#include "optimizing/nodes.h"
+#include "profiling_info_builder.h"
#include "scoped_thread_state_change-inl.h"
#include "thread.h"
+#include "trace.h"
#include "utils/assembler.h"
#include "utils/stack_checks.h"
#include "utils/x86/assembler_x86.h"
@@ -66,7 +69,7 @@ static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
RegisterSet caller_saves = RegisterSet::Empty();
caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
// TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
- // that the the kPrimNot result register is the same as the first argument register.
+ // that the kPrimNot result register is the same as the first argument register.
return caller_saves;
}
@@ -503,18 +506,17 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
: SlowPathCode(instruction),
ref_(ref),
unpoison_ref_before_marking_(unpoison_ref_before_marking) {
- DCHECK(gUseReadBarrier);
}
const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(codegen->EmitReadBarrier());
LocationSummary* locations = instruction_->GetLocations();
Register ref_reg = ref_.AsRegister<Register>();
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
DCHECK(instruction_->IsInstanceFieldGet() ||
- instruction_->IsPredicatedInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
instruction_->IsArraySet() ||
@@ -590,12 +592,12 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
field_addr_(field_addr),
unpoison_ref_before_marking_(unpoison_ref_before_marking),
temp_(temp) {
- DCHECK(gUseReadBarrier);
}
const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(codegen->EmitReadBarrier());
LocationSummary* locations = instruction_->GetLocations();
Register ref_reg = ref_.AsRegister<Register>();
DCHECK(locations->CanCall());
@@ -604,7 +606,9 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
<< "Unexpected instruction in read barrier marking and field updating slow path: "
<< instruction_->DebugName();
HInvoke* invoke = instruction_->AsInvoke();
- DCHECK(IsUnsafeCASObject(invoke) || IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
+ DCHECK(IsUnsafeCASReference(invoke) ||
+ IsUnsafeGetAndSetReference(invoke) ||
+ IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
__ Bind(GetEntryLabel());
if (unpoison_ref_before_marking_) {
@@ -650,7 +654,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
__ cmpl(temp_, ref_reg);
__ j(kEqual, &done);
- // Update the the holder's field atomically. This may fail if
+ // Update the holder's field atomically. This may fail if
// mutator updates before us, but it's OK. This is achieved
// using a strong compare-and-set (CAS) operation with relaxed
// memory synchronization ordering, where the expected value is
@@ -744,7 +748,6 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
obj_(obj),
offset_(offset),
index_(index) {
- DCHECK(gUseReadBarrier);
// If `obj` is equal to `out` or `ref`, it means the initial object
// has been overwritten by (or after) the heap object reference load
// to be instrumented, e.g.:
@@ -759,13 +762,13 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
}
void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(codegen->EmitReadBarrier());
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
LocationSummary* locations = instruction_->GetLocations();
Register reg_out = out_.AsRegister<Register>();
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
DCHECK(instruction_->IsInstanceFieldGet() ||
- instruction_->IsPredicatedInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
@@ -838,9 +841,11 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
DCHECK(instruction_->GetLocations()->Intrinsified());
DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
(instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
- (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObject) ||
- (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectVolatile) ||
- (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectAcquire))
+ (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetReference) ||
+ (instruction_->AsInvoke()->GetIntrinsic() ==
+ Intrinsics::kJdkUnsafeGetReferenceVolatile) ||
+ (instruction_->AsInvoke()->GetIntrinsic() ==
+ Intrinsics::kJdkUnsafeGetReferenceAcquire))
<< instruction_->AsInvoke()->GetIntrinsic();
DCHECK_EQ(offset_, 0U);
DCHECK(index_.IsRegisterPair());
@@ -918,10 +923,10 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
public:
ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
: SlowPathCode(instruction), out_(out), root_(root) {
- DCHECK(gUseReadBarrier);
}
void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(codegen->EmitReadBarrier());
LocationSummary* locations = instruction_->GetLocations();
Register reg_out = out_.AsRegister<Register>();
DCHECK(locations->CanCall());
@@ -985,11 +990,14 @@ class MethodEntryExitHooksSlowPathX86 : public SlowPathCode {
class CompileOptimizedSlowPathX86 : public SlowPathCode {
public:
- CompileOptimizedSlowPathX86() : SlowPathCode(/* instruction= */ nullptr) {}
+ explicit CompileOptimizedSlowPathX86(uint32_t counter_address)
+ : SlowPathCode(/* instruction= */ nullptr),
+ counter_address_(counter_address) {}
void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
+ __ movw(Address::Absolute(counter_address_), Immediate(ProfilingInfo::GetOptimizeThreshold()));
x86_codegen->GenerateInvokeRuntime(
GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
__ jmp(GetExitLabel());
@@ -1000,6 +1008,8 @@ class CompileOptimizedSlowPathX86 : public SlowPathCode {
}
private:
+ uint32_t counter_address_;
+
DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86);
};
@@ -1107,6 +1117,7 @@ void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
}
namespace detail {
+
// Mark which intrinsics we don't have handcrafted code for.
template <Intrinsics T>
struct IsUnimplemented {
@@ -1121,15 +1132,13 @@ struct IsUnimplemented {
UNIMPLEMENTED_INTRINSIC_LIST_X86(TRUE_OVERRIDE)
#undef TRUE_OVERRIDE
-#include "intrinsics_list.h"
static constexpr bool kIsIntrinsicUnimplemented[] = {
- false, // kNone
+ false, // kNone
#define IS_UNIMPLEMENTED(Intrinsic, ...) \
- IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
- INTRINSICS_LIST(IS_UNIMPLEMENTED)
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
#undef IS_UNIMPLEMENTED
};
-#undef INTRINSICS_LIST
} // namespace detail
@@ -1140,8 +1149,7 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
kNumberOfCpuRegisters,
kNumberOfXmmRegisters,
kNumberOfRegisterPairs,
- ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
- arraysize(kCoreCalleeSaves))
+ ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves))
| (1 << kFakeReturnRegister),
0,
compiler_options,
@@ -1221,12 +1229,18 @@ void LocationsBuilderX86::VisitMethodExitHook(HMethodExitHook* method_hook) {
LocationSummary* locations = new (GetGraph()->GetAllocator())
LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
SetInForReturnValue(method_hook, locations);
+ // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX.
+ locations->AddTemp(Location::RegisterLocation(EAX));
+ locations->AddTemp(Location::RegisterLocation(EDX));
+ // An additional temporary register to hold address to store the timestamp counter.
+ locations->AddTemp(Location::RequiresRegister());
}
void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* instruction) {
SlowPathCode* slow_path =
new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction);
codegen_->AddSlowPath(slow_path);
+ LocationSummary* locations = instruction->GetLocations();
if (instruction->IsMethodExitHook()) {
// Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
@@ -1242,8 +1256,51 @@ void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* inst
MemberOffset offset = instruction->IsMethodExitHook() ?
instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
- __ cmpb(Address::Absolute(address + offset.Int32Value()), Immediate(0));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ cmpb(Address::Absolute(address + offset.Int32Value()),
+ Immediate(instrumentation::Instrumentation::kFastTraceListeners));
+ // Check if there are any trace method entry / exit listeners. If no, continue.
+ __ j(kLess, slow_path->GetExitLabel());
+ // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
+ // If yes, just take the slow path.
+ __ j(kGreater, slow_path->GetEntryLabel());
+
+ // For entry_addr use the first temp that isn't EAX or EDX. We need this after
+ // rdtsc which returns values in EAX + EDX.
+ Register entry_addr = locations->GetTemp(2).AsRegister<Register>();
+ Register index = locations->GetTemp(1).AsRegister<Register>();
+
+ // Check if there is place in the buffer for a new entry, if no, take slow path.
+ uint32_t trace_buffer_ptr = Thread::TraceBufferPtrOffset<kX86PointerSize>().Int32Value();
+ uint64_t trace_buffer_index_offset =
+ Thread::TraceBufferIndexOffset<kX86PointerSize>().Int32Value();
+
+ __ fs()->movl(index, Address::Absolute(trace_buffer_index_offset));
+ __ subl(index, Immediate(kNumEntriesForWallClock));
+ __ j(kLess, slow_path->GetEntryLabel());
+
+ // Update the index in the `Thread`.
+ __ fs()->movl(Address::Absolute(trace_buffer_index_offset), index);
+ // Calculate the entry address in the buffer.
+ // entry_addr = base_addr + sizeof(void*) * index
+ __ fs()->movl(entry_addr, Address::Absolute(trace_buffer_ptr));
+ __ leal(entry_addr, Address(entry_addr, index, TIMES_4, 0));
+
+ // Record method pointer and trace action.
+ Register method = index;
+ __ movl(method, Address(ESP, kCurrentMethodStackOffset));
+ // Use last two bits to encode trace method action. For MethodEntry it is 0
+ // so no need to set the bits since they are 0 already.
+ if (instruction->IsMethodExitHook()) {
+ DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
+ __ orl(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
+ }
+ __ movl(Address(entry_addr, kMethodOffsetInBytes), method);
+ // Get the timestamp. rdtsc returns timestamp in EAX + EDX.
+ __ rdtsc();
+ __ movl(Address(entry_addr, kTimestampOffsetInBytes), EAX);
+ __ movl(Address(entry_addr, kHighTimestampOffsetInBytes), EDX);
__ Bind(slow_path->GetExitLabel());
}
@@ -1254,7 +1311,13 @@ void InstructionCodeGeneratorX86::VisitMethodExitHook(HMethodExitHook* instructi
}
void LocationsBuilderX86::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
- new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+ // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX.
+ locations->AddTemp(Location::RegisterLocation(EAX));
+ locations->AddTemp(Location::RegisterLocation(EDX));
+ // An additional temporary register to hold address to store the timestamp counter.
+ locations->AddTemp(Location::RequiresRegister());
}
void InstructionCodeGeneratorX86::VisitMethodEntryHook(HMethodEntryHook* instruction) {
@@ -1286,13 +1349,13 @@ void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) {
}
if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
- SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86();
- AddSlowPath(slow_path);
ProfilingInfo* info = GetGraph()->GetProfilingInfo();
DCHECK(info != nullptr);
uint32_t address = reinterpret_cast32<uint32_t>(info) +
ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
DCHECK(!HasEmptyFrame());
+ SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86(address);
+ AddSlowPath(slow_path);
// With multiple threads, this can overflow. This is OK, we will eventually get to see
// it reaching 0. Also, at this point we have no register available to look
// at the counter directly.
@@ -1700,7 +1763,7 @@ void CodeGeneratorX86::LoadFromMemoryNoBarrier(DataType::Type dst_type,
__ movsd(dst.AsFpuRegister<XmmRegister>(), src);
break;
case DataType::Type::kReference:
- DCHECK(!gUseReadBarrier);
+ DCHECK(!EmitReadBarrier());
__ movl(dst.AsRegister<Register>(), src);
__ MaybeUnpoisonHeapReference(dst.AsRegister<Register>());
break;
@@ -1865,8 +1928,7 @@ void LocationsBuilderX86::VisitExit(HExit* exit) {
exit->SetLocations(nullptr);
}
-void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
-}
+void InstructionCodeGeneratorX86::VisitExit([[maybe_unused]] HExit* exit) {}
template<class LabelType>
void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
@@ -1981,7 +2043,7 @@ void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
Location rhs,
HInstruction* insn,
bool is_double) {
- HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
+ HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTableOrNull();
if (is_double) {
if (rhs.IsFpuRegister()) {
__ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
@@ -2053,14 +2115,18 @@ void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condi
}
}
-static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
+static bool AreEflagsSetFrom(HInstruction* cond,
+ HInstruction* branch,
+ const CompilerOptions& compiler_options) {
// Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
// are set only strictly before `branch`. We can't use the eflags on long/FP
// conditions if they are materialized due to the complex branching.
return cond->IsCondition() &&
cond->GetNext() == branch &&
cond->InputAt(0)->GetType() != DataType::Type::kInt64 &&
- !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
+ !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) &&
+ !(cond->GetBlock()->GetGraph()->IsCompilingBaseline() &&
+ compiler_options.ProfileBranches());
}
template<class LabelType>
@@ -2097,7 +2163,7 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio
// - condition true => branch to true_target
// - branch to false_target
if (IsBooleanValueOrMaterializedCondition(cond)) {
- if (AreEflagsSetFrom(cond, instruction)) {
+ if (AreEflagsSetFrom(cond, instruction, codegen_->GetCompilerOptions())) {
if (true_target == nullptr) {
__ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
} else {
@@ -2151,7 +2217,15 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio
void LocationsBuilderX86::VisitIf(HIf* if_instr) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
- locations->SetInAt(0, Location::Any());
+ if (GetGraph()->IsCompilingBaseline() &&
+ codegen_->GetCompilerOptions().ProfileBranches() &&
+ !Runtime::Current()->IsAotCompiler()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ } else {
+ locations->SetInAt(0, Location::Any());
+ }
}
}
@@ -2162,6 +2236,34 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
nullptr : codegen_->GetLabelOf(true_successor);
Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
nullptr : codegen_->GetLabelOf(false_successor);
+ if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
+ if (GetGraph()->IsCompilingBaseline() &&
+ codegen_->GetCompilerOptions().ProfileBranches() &&
+ !Runtime::Current()->IsAotCompiler()) {
+ DCHECK(if_instr->InputAt(0)->IsCondition());
+ Register temp = if_instr->GetLocations()->GetTemp(0).AsRegister<Register>();
+ Register counter = if_instr->GetLocations()->GetTemp(1).AsRegister<Register>();
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
+ // Currently, not all If branches are profiled.
+ if (cache != nullptr) {
+ uint64_t address =
+ reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
+ static_assert(
+ BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
+ "Unexpected offsets for BranchCache");
+ NearLabel done;
+ Location lhs = if_instr->GetLocations()->InAt(0);
+ __ movl(temp, Immediate(address));
+ __ movzxw(counter, Address(temp, lhs.AsRegister<Register>(), TIMES_2, 0));
+ __ addw(counter, Immediate(1));
+ __ j(kEqual, &done);
+ __ movw(Address(temp, lhs.AsRegister<Register>(), TIMES_2, 0), counter);
+ __ Bind(&done);
+ }
+ }
+ }
GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
}
@@ -2257,7 +2359,7 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
if (!condition->IsEmittedAtUseSite()) {
// This was a previously materialized condition.
// Can we use the existing condition code?
- if (AreEflagsSetFrom(condition, select)) {
+ if (AreEflagsSetFrom(condition, select, codegen_->GetCompilerOptions())) {
// Materialization was the previous instruction. Condition codes are right.
cond = X86Condition(condition->GetCondition());
} else {
@@ -2506,7 +2608,7 @@ void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
// Will be generated at use site.
}
@@ -2516,7 +2618,7 @@ void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
// Will be generated at use site.
}
@@ -2526,7 +2628,7 @@ void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
// Will be generated at use site.
}
@@ -2536,7 +2638,7 @@ void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
// Will be generated at use site.
}
@@ -2546,7 +2648,7 @@ void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitDoubleConstant([[maybe_unused]] HDoubleConstant* constant) {
// Will be generated at use site.
}
@@ -2555,7 +2657,7 @@ void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_f
}
void InstructionCodeGeneratorX86::VisitConstructorFence(
- HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HConstructorFence* constructor_fence) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
}
@@ -2571,7 +2673,7 @@ void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
ret->SetLocations(nullptr);
}
-void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
codegen_->GenerateFrameExit();
}
@@ -2697,7 +2799,7 @@ void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
HandleInvoke(invoke);
- if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ if (ProfilingInfoBuilder::IsInlineCacheUseful(invoke, codegen_)) {
// Add one temporary for inline cache update.
invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
}
@@ -2725,7 +2827,7 @@ void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
// Add the hidden argument.
invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
- if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ if (ProfilingInfoBuilder::IsInlineCacheUseful(invoke, codegen_)) {
// Add one temporary for inline cache update.
invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
}
@@ -2743,29 +2845,30 @@ void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) {
DCHECK_EQ(EAX, klass);
- // We know the destination of an intrinsic, so no need to record inline
- // caches (also the intrinsic location builder doesn't request an additional
- // temporary).
- if (!instruction->GetLocations()->Intrinsified() &&
- GetGraph()->IsCompilingBaseline() &&
- !Runtime::Current()->IsAotCompiler()) {
- DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
+ if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
ProfilingInfo* info = GetGraph()->GetProfilingInfo();
DCHECK(info != nullptr);
- InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
- uint32_t address = reinterpret_cast32<uint32_t>(cache);
- if (kIsDebugBuild) {
- uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
- CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
+ InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(info, instruction->AsInvoke());
+ if (cache != nullptr) {
+ uint32_t address = reinterpret_cast32<uint32_t>(cache);
+ if (kIsDebugBuild) {
+ uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
+ CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
+ }
+ Register temp = EBP;
+ NearLabel done;
+ __ movl(temp, Immediate(address));
+ // Fast path for a monomorphic cache.
+ __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
+ __ j(kEqual, &done);
+ GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
+ __ Bind(&done);
+ } else {
+ // This is unexpected, but we don't guarantee stable compilation across
+ // JIT runs so just warn about it.
+ ScopedObjectAccess soa(Thread::Current());
+ LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
}
- Register temp = EBP;
- NearLabel done;
- __ movl(temp, Immediate(address));
- // Fast path for a monomorphic cache.
- __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
- __ j(kEqual, &done);
- GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
- __ Bind(&done);
}
}
@@ -2954,10 +3057,10 @@ void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
constant_area));
__ xorps(out.AsFpuRegister<XmmRegister>(), mask);
} else {
- __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
- neg->GetBaseMethodAddress(),
- constant_area));
- __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
+ __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
+ neg->GetBaseMethodAddress(),
+ constant_area));
+ __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
}
}
@@ -5086,8 +5189,7 @@ void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
}
void InstructionCodeGeneratorX86::VisitParameterValue(
- HParameterValue* instruction ATTRIBUTE_UNUSED) {
-}
+ [[maybe_unused]] HParameterValue* instruction) {}
void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
LocationSummary* locations =
@@ -5095,7 +5197,7 @@ void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
}
-void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitCurrentMethod([[maybe_unused]] HCurrentMethod* instruction) {
}
void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
@@ -5294,7 +5396,7 @@ void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
locations->SetOut(Location::Any());
}
-void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitPhi([[maybe_unused]] HPhi* instruction) {
LOG(FATAL) << "Unreachable";
}
@@ -5323,8 +5425,8 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
}
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
- const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- ArtMethod* method ATTRIBUTE_UNUSED) {
+ const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+ [[maybe_unused]] ArtMethod* method) {
return desired_dispatch_info;
}
@@ -5679,7 +5781,7 @@ void CodeGeneratorX86::LoadBootImageAddress(Register reg,
void CodeGeneratorX86::LoadIntrinsicDeclaringClass(Register reg, HInvokeStaticOrDirect* invoke) {
DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
if (GetCompilerOptions().IsBootImage()) {
- // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
+ // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
HX86ComputeBaseMethodAddress* method_address =
invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
DCHECK(method_address != nullptr);
@@ -5804,45 +5906,33 @@ void CodeGeneratorX86::MarkGCCard(
}
void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
- DCHECK(instruction->IsInstanceFieldGet() ||
- instruction->IsStaticFieldGet() ||
- instruction->IsPredicatedInstanceFieldGet());
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
bool object_field_get_with_read_barrier =
- gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
- bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
+ (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
- gUseReadBarrier
+ codegen_->EmitReadBarrier()
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall);
if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
// receiver_input
- locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
- if (is_predicated) {
- if (DataType::IsFloatingPointType(instruction->GetType())) {
- locations->SetInAt(0, Location::RequiresFpuRegister());
- } else {
- locations->SetInAt(0, Location::RequiresRegister());
- }
- }
+ locations->SetInAt(0, Location::RequiresRegister());
if (DataType::IsFloatingPointType(instruction->GetType())) {
- locations->SetOut(is_predicated ? Location::SameAsFirstInput()
- : Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
} else {
// The output overlaps in case of long: we don't want the low move
// to overwrite the object's location. Likewise, in the case of
// an object field get with read barriers enabled, we do not want
// the move to overwrite the object's location, as we need it to emit
// the read barrier.
- locations->SetOut(is_predicated ? Location::SameAsFirstInput() : Location::RequiresRegister(),
- (object_field_get_with_read_barrier ||
- instruction->GetType() == DataType::Type::kInt64 ||
- is_predicated)
- ? Location::kOutputOverlap
- : Location::kNoOutputOverlap);
+ locations->SetOut(
+ Location::RequiresRegister(),
+ (object_field_get_with_read_barrier || instruction->GetType() == DataType::Type::kInt64)
+ ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
}
if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) {
@@ -5856,12 +5946,10 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI
void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
const FieldInfo& field_info) {
- DCHECK(instruction->IsInstanceFieldGet() ||
- instruction->IsStaticFieldGet() ||
- instruction->IsPredicatedInstanceFieldGet());
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
LocationSummary* locations = instruction->GetLocations();
- Location base_loc = locations->InAt(instruction->IsPredicatedInstanceFieldGet() ? 1 : 0);
+ Location base_loc = locations->InAt(0);
Register base = base_loc.AsRegister<Register>();
Location out = locations->Out();
bool is_volatile = field_info.IsVolatile();
@@ -5871,7 +5959,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
if (load_type == DataType::Type::kReference) {
// /* HeapReference<Object> */ out = *(base + offset)
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
@@ -6099,17 +6187,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
bool is_volatile = field_info.IsVolatile();
DataType::Type field_type = field_info.GetFieldType();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
- bool is_predicated =
- instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
-
Address field_addr(base, offset);
- NearLabel pred_is_null;
- if (is_predicated) {
- __ testl(base, base);
- __ j(kEqual, &pred_is_null);
- }
-
HandleFieldSet(instruction,
/* value_index= */ 1,
field_type,
@@ -6118,10 +6197,6 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
is_volatile,
value_can_be_null,
write_barrier_kind);
-
- if (is_predicated) {
- __ Bind(&pred_is_null);
- }
}
void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
@@ -6154,25 +6229,10 @@ void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instr
instruction->GetWriteBarrierKind());
}
-void LocationsBuilderX86::VisitPredicatedInstanceFieldGet(
- HPredicatedInstanceFieldGet* instruction) {
- HandleFieldGet(instruction, instruction->GetFieldInfo());
-}
-
void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
HandleFieldGet(instruction, instruction->GetFieldInfo());
}
-void InstructionCodeGeneratorX86::VisitPredicatedInstanceFieldGet(
- HPredicatedInstanceFieldGet* instruction) {
- NearLabel finish;
- LocationSummary* locations = instruction->GetLocations();
- Register recv = locations->InAt(1).AsRegister<Register>();
- __ testl(recv, recv);
- __ j(kZero, &finish);
- HandleFieldGet(instruction, instruction->GetFieldInfo());
- __ Bind(&finish);
-}
void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
HandleFieldGet(instruction, instruction->GetFieldInfo());
}
@@ -6299,7 +6359,7 @@ void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
bool object_array_get_with_read_barrier =
- gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
object_array_get_with_read_barrier
@@ -6341,7 +6401,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -6749,7 +6809,7 @@ void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
}
}
-void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
LOG(FATAL) << "Unreachable";
}
@@ -7163,7 +7223,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
load_kind == HLoadClass::LoadKind::kBssEntryPackage);
- const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage();
+ const bool requires_read_barrier = !cls->IsInBootImage() && codegen_->EmitReadBarrier();
LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
@@ -7177,11 +7237,11 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
}
locations->SetOut(Location::RequiresRegister());
if (call_kind == LocationSummary::kCallOnSlowPath && cls->HasPcRelativeLoadKind()) {
- if (!gUseReadBarrier || kUseBakerReadBarrier) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ } else {
// Rely on the type resolution and/or initialization to save everything.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
- } else {
- // For non-Baker read barrier we have a temp-clobbering call.
}
}
}
@@ -7213,9 +7273,8 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
Register out = out_loc.AsRegister<Register>();
bool generate_null_check = false;
- const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
- ? kWithoutReadBarrier
- : gCompilerReadBarrierOption;
+ const ReadBarrierOption read_barrier_option =
+ cls->IsInBootImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
DCHECK(!cls->CanCallRuntime());
@@ -7383,7 +7442,7 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
}
void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
- LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
+ LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
HLoadString::LoadKind load_kind = load->GetLoadKind();
if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
@@ -7396,11 +7455,11 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
} else {
locations->SetOut(Location::RequiresRegister());
if (load_kind == HLoadString::LoadKind::kBssEntry) {
- if (!gUseReadBarrier || kUseBakerReadBarrier) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ } else {
// Rely on the pResolveString to save everything.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
- } else {
- // For non-Baker read barrier we have a temp-clobbering call.
}
}
}
@@ -7445,7 +7504,8 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
// /* GcRoot<mirror::String> */ out = *address /* PC-relative */
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(
+ load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
// No need for memory fence, thanks to the x86 memory model.
SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
codegen_->AddSlowPath(slow_path);
@@ -7465,14 +7525,14 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
Label* fixup_label = codegen_->NewJitRootStringPatch(
load->GetDexFile(), load->GetStringIndex(), load->GetString());
// /* GcRoot<mirror::String> */ out = *address
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(
+ load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
return;
}
default:
break;
}
- // TODO: Re-add the compiler code to do string dex cache lookup again.
InvokeRuntimeCallingConvention calling_convention;
DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
__ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
@@ -7498,7 +7558,7 @@ void LocationsBuilderX86::VisitClearException(HClearException* clear) {
new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
}
-void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitClearException([[maybe_unused]] HClearException* clear) {
__ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
}
@@ -7515,8 +7575,8 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
}
// Temp is used for read barrier.
-static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
- if (gUseReadBarrier &&
+static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
+ if (emit_read_barrier &&
!kUseBakerReadBarrier &&
(type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
@@ -7529,11 +7589,11 @@ static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
// Interface case has 2 temps, one for holding the number of interfaces, one for the current
// interface pointer, the current interface is compared in memory.
// The other checks have one temp for loading the object's class.
-static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
return 2;
}
- return 1 + NumberOfInstanceOfTemps(type_check_kind);
+ return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
}
void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
@@ -7545,7 +7605,7 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck: {
- bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
+ bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
break;
@@ -7575,7 +7635,8 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
// Note that TypeCheckSlowPathX86 uses this "out" register too.
locations->SetOut(Location::RequiresRegister());
// When read barriers are enabled, we need a temporary register for some cases.
- locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
+ locations->AddRegisterTemps(
+ NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
}
void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
@@ -7586,7 +7647,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
Location cls = locations->InAt(1);
Location out_loc = locations->Out();
Register out = out_loc.AsRegister<Register>();
- const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+ const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
DCHECK_LE(num_temps, 1u);
Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -7606,7 +7667,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
ReadBarrierOption read_barrier_option =
- CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
@@ -7629,7 +7690,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kAbstractClassCheck: {
ReadBarrierOption read_barrier_option =
- CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
@@ -7665,7 +7726,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kClassHierarchyCheck: {
ReadBarrierOption read_barrier_option =
- CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
@@ -7702,7 +7763,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kArrayObjectCheck: {
ReadBarrierOption read_barrier_option =
- CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
@@ -7825,7 +7886,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
- LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
+ LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
@@ -7840,8 +7901,7 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
} else {
locations->SetInAt(1, Location::Any());
}
- // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
- locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
+ locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
}
void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
@@ -7852,7 +7912,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
Location cls = locations->InAt(1);
Location temp_loc = locations->GetTemp(0);
Register temp = temp_loc.AsRegister<Register>();
- const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+ const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
DCHECK_GE(num_temps, 1u);
DCHECK_LE(num_temps, 2u);
Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
@@ -7865,7 +7925,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
const uint32_t object_array_data_offset =
mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
- bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
+ bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
SlowPathCode* type_check_slow_path =
new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
instruction, is_type_check_slow_path_fatal);
@@ -8028,11 +8088,11 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
kWithoutReadBarrier);
// /* HeapReference<Class> */ temp = temp->iftable_
- GenerateReferenceLoadTwoRegisters(instruction,
- temp_loc,
- temp_loc,
- iftable_offset,
- kWithoutReadBarrier);
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ iftable_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// Iftable is never null.
__ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
// Maybe poison the `cls` for direct comparison with memory.
@@ -8288,7 +8348,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
ReadBarrierOption read_barrier_option) {
Register out_reg = out.AsRegister<Register>();
if (read_barrier_option == kWithReadBarrier) {
- CHECK(gUseReadBarrier);
+ DCHECK(codegen_->EmitReadBarrier());
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
@@ -8322,7 +8382,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
Register out_reg = out.AsRegister<Register>();
Register obj_reg = obj.AsRegister<Register>();
if (read_barrier_option == kWithReadBarrier) {
- CHECK(gUseReadBarrier);
+ DCHECK(codegen_->EmitReadBarrier());
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -8350,7 +8410,7 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
ReadBarrierOption read_barrier_option) {
Register root_reg = root.AsRegister<Register>();
if (read_barrier_option == kWithReadBarrier) {
- DCHECK(gUseReadBarrier);
+ DCHECK(codegen_->EmitReadBarrier());
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used:
@@ -8414,8 +8474,7 @@ void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr
Register obj,
uint32_t offset,
bool needs_null_check) {
- DCHECK(gUseReadBarrier);
- DCHECK(kUseBakerReadBarrier);
+ DCHECK(EmitBakerReadBarrier());
// /* HeapReference<Object> */ ref = *(obj + offset)
Address src(obj, offset);
@@ -8428,8 +8487,7 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr
uint32_t data_offset,
Location index,
bool needs_null_check) {
- DCHECK(gUseReadBarrier);
- DCHECK(kUseBakerReadBarrier);
+ DCHECK(EmitBakerReadBarrier());
static_assert(
sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
@@ -8447,8 +8505,7 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
bool needs_null_check,
bool always_update_field,
Register* temp) {
- DCHECK(gUseReadBarrier);
- DCHECK(kUseBakerReadBarrier);
+ DCHECK(EmitBakerReadBarrier());
// In slow path based read barriers, the read barrier call is
// inserted after the original load. However, in fast path based
@@ -8528,7 +8585,7 @@ void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- DCHECK(gUseReadBarrier);
+ DCHECK(EmitReadBarrier());
// Insert a slow path based read barrier *after* the reference load.
//
@@ -8555,7 +8612,7 @@ void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- if (gUseReadBarrier) {
+ if (EmitReadBarrier()) {
// Baker's read barriers shall be handled by the fast path
// (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
DCHECK(!kUseBakerReadBarrier);
@@ -8570,7 +8627,7 @@ void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
Location out,
Location root) {
- DCHECK(gUseReadBarrier);
+ DCHECK(EmitReadBarrier());
// Insert a slow path based read barrier *after* the GC root load.
//
@@ -8584,12 +8641,12 @@ void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
__ Bind(slow_path->GetExitLabel());
}
-void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
}
-void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
}
@@ -8782,13 +8839,15 @@ void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromCons
case DataType::Type::kFloat32:
__ movss(out.AsFpuRegister<XmmRegister>(),
codegen_->LiteralFloatAddress(
- value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
+ value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
break;
case DataType::Type::kFloat64:
__ movsd(out.AsFpuRegister<XmmRegister>(),
codegen_->LiteralDoubleAddress(
- value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
+ value->AsDoubleConstant()->GetValue(),
+ insn->GetBaseMethodAddress(),
+ const_area));
break;
case DataType::Type::kInt32:
@@ -8877,7 +8936,7 @@ class JumpTableRIPFixup : public RIPFixup {
const HX86PackedSwitch* switch_instr_;
};
-void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
+void CodeGeneratorX86::Finalize() {
// Generate the constant area if needed.
X86Assembler* assembler = GetAssembler();
@@ -8897,7 +8956,7 @@ void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
}
// And finish up.
- CodeGenerator::Finalize(allocator);
+ CodeGenerator::Finalize();
}
Address CodeGeneratorX86::LiteralDoubleAddress(double v,
@@ -8968,9 +9027,9 @@ Address CodeGeneratorX86::ArrayAddress(Register obj,
Location index,
ScaleFactor scale,
uint32_t data_offset) {
- return index.IsConstant() ?
- Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
- Address(obj, index.AsRegister<Register>(), scale, data_offset);
+ return index.IsConstant()
+ ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset)
+ : Address(obj, index.AsRegister<Register>(), scale, data_offset);
}
Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
@@ -9025,7 +9084,7 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
- dchecked_integral_cast<uint32_t>(address);
+ dchecked_integral_cast<uint32_t>(address);
}
void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
@@ -9042,13 +9101,13 @@ void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_da
}
}
-void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction
- ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86::VisitIntermediateAddress(
+ [[maybe_unused]] HIntermediateAddress* instruction) {
LOG(FATAL) << "Unreachable";
}
-void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction
- ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitIntermediateAddress(
+ [[maybe_unused]] HIntermediateAddress* instruction) {
LOG(FATAL) << "Unreachable";
}
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index d27155f31d..5b59bfc7e3 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -89,19 +89,8 @@ static constexpr size_t kRuntimeParameterFpuRegistersLength =
V(StringBuilderLength) \
V(StringBuilderToString) \
/* 1.8 */ \
- V(UnsafeGetAndAddInt) \
- V(UnsafeGetAndAddLong) \
- V(UnsafeGetAndSetInt) \
- V(UnsafeGetAndSetLong) \
- V(UnsafeGetAndSetObject) \
V(MethodHandleInvokeExact) \
- V(MethodHandleInvoke) \
- /* OpenJDK 11 */ \
- V(JdkUnsafeGetAndAddInt) \
- V(JdkUnsafeGetAndAddLong) \
- V(JdkUnsafeGetAndSetInt) \
- V(JdkUnsafeGetAndSetLong) \
- V(JdkUnsafeGetAndSetObject)
+ V(MethodHandleInvoke)
class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> {
public:
@@ -196,7 +185,7 @@ class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention {
? Location::RegisterLocation(EDX)
: Location::RegisterLocation(ECX));
}
- Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+ Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override {
return Location::FpuRegisterLocation(XMM0);
}
@@ -635,7 +624,7 @@ class CodeGeneratorX86 : public CodeGenerator {
Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value);
- void Finalize(CodeAllocator* allocator) override;
+ void Finalize() override;
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index eea6b204fa..9d010190f7 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -27,6 +27,7 @@
#include "heap_poisoning.h"
#include "interpreter/mterp/nterp.h"
#include "intrinsics.h"
+#include "intrinsics_list.h"
#include "intrinsics_utils.h"
#include "intrinsics_x86_64.h"
#include "jit/profiling_info.h"
@@ -37,8 +38,10 @@
#include "mirror/object_reference.h"
#include "mirror/var_handle.h"
#include "optimizing/nodes.h"
+#include "profiling_info_builder.h"
#include "scoped_thread_state_change-inl.h"
#include "thread.h"
+#include "trace.h"
#include "utils/assembler.h"
#include "utils/stack_checks.h"
#include "utils/x86_64/assembler_x86_64.h"
@@ -267,6 +270,38 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
};
+class LoadMethodTypeSlowPathX86_64: public SlowPathCode {
+ public:
+ explicit LoadMethodTypeSlowPathX86_64(HLoadMethodType* mt) : SlowPathCode(mt) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ const dex::ProtoIndex proto_index = instruction_->AsLoadMethodType()->GetProtoIndex();
+ // Custom calling convention: RAX serves as both input and output.
+ __ movl(CpuRegister(RAX), Immediate(proto_index.index_));
+ x86_64_codegen->InvokeRuntime(kQuickResolveMethodType,
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickResolveMethodType, void*, uint32_t>();
+ x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
+ RestoreLiveRegisters(codegen, locations);
+
+ __ jmp(GetExitLabel());
+ }
+
+ const char* GetDescription() const override { return "LoadMethodTypeSlowPathX86_64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(LoadMethodTypeSlowPathX86_64);
+};
+
class LoadClassSlowPathX86_64 : public SlowPathCode {
public:
LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
@@ -510,23 +545,23 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
: SlowPathCode(instruction),
ref_(ref),
unpoison_ref_before_marking_(unpoison_ref_before_marking) {
- DCHECK(gUseReadBarrier);
}
const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(codegen->EmitReadBarrier());
LocationSummary* locations = instruction_->GetLocations();
CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
Register ref_reg = ref_cpu_reg.AsRegister();
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
DCHECK(instruction_->IsInstanceFieldGet() ||
- instruction_->IsPredicatedInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
instruction_->IsArraySet() ||
instruction_->IsLoadClass() ||
+ instruction_->IsLoadMethodType() ||
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
@@ -601,7 +636,6 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
unpoison_ref_before_marking_(unpoison_ref_before_marking),
temp1_(temp1),
temp2_(temp2) {
- DCHECK(gUseReadBarrier);
}
const char* GetDescription() const override {
@@ -609,6 +643,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
}
void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(codegen->EmitReadBarrier());
LocationSummary* locations = instruction_->GetLocations();
CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
Register ref_reg = ref_cpu_reg.AsRegister();
@@ -618,7 +653,9 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
<< "Unexpected instruction in read barrier marking and field updating slow path: "
<< instruction_->DebugName();
HInvoke* invoke = instruction_->AsInvoke();
- DCHECK(IsUnsafeCASObject(invoke) || IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
+ DCHECK(IsUnsafeCASReference(invoke) ||
+ IsUnsafeGetAndSetReference(invoke) ||
+ IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
__ Bind(GetEntryLabel());
if (unpoison_ref_before_marking_) {
@@ -665,7 +702,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
__ cmpl(temp1_, ref_cpu_reg);
__ j(kEqual, &done);
- // Update the the holder's field atomically. This may fail if
+ // Update the holder's field atomically. This may fail if
// mutator updates before us, but it's OK. This is achived
// using a strong compare-and-set (CAS) operation with relaxed
// memory synchronization ordering, where the expected value is
@@ -761,7 +798,6 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
obj_(obj),
offset_(offset),
index_(index) {
- DCHECK(gUseReadBarrier);
// If `obj` is equal to `out` or `ref`, it means the initial
// object has been overwritten by (or after) the heap object
// reference load to be instrumented, e.g.:
@@ -776,13 +812,13 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
}
void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(codegen->EmitReadBarrier());
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
LocationSummary* locations = instruction_->GetLocations();
CpuRegister reg_out = out_.AsRegister<CpuRegister>();
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
DCHECK(instruction_->IsInstanceFieldGet() ||
- instruction_->IsPredicatedInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
@@ -855,9 +891,11 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
DCHECK(instruction_->GetLocations()->Intrinsified());
DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
(instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
- (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObject) ||
- (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectVolatile) ||
- (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectAcquire))
+ (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetReference) ||
+ (instruction_->AsInvoke()->GetIntrinsic() ==
+ Intrinsics::kJdkUnsafeGetReferenceVolatile) ||
+ (instruction_->AsInvoke()->GetIntrinsic() ==
+ Intrinsics::kJdkUnsafeGetReferenceAcquire))
<< instruction_->AsInvoke()->GetIntrinsic();
DCHECK_EQ(offset_, 0U);
DCHECK(index_.IsRegister());
@@ -937,10 +975,10 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
public:
ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
: SlowPathCode(instruction), out_(out), root_(root) {
- DCHECK(gUseReadBarrier);
}
void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(codegen->EmitReadBarrier());
LocationSummary* locations = instruction_->GetLocations();
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
@@ -1005,11 +1043,15 @@ class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode {
class CompileOptimizedSlowPathX86_64 : public SlowPathCode {
public:
- CompileOptimizedSlowPathX86_64() : SlowPathCode(/* instruction= */ nullptr) {}
+ explicit CompileOptimizedSlowPathX86_64(uint64_t counter_address)
+ : SlowPathCode(/* instruction= */ nullptr),
+ counter_address_(counter_address) {}
void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
+ __ movq(CpuRegister(TMP), Immediate(counter_address_));
+ __ movw(Address(CpuRegister(TMP), 0), Immediate(ProfilingInfo::GetOptimizeThreshold()));
x86_64_codegen->GenerateInvokeRuntime(
GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
__ jmp(GetExitLabel());
@@ -1020,6 +1062,8 @@ class CompileOptimizedSlowPathX86_64 : public SlowPathCode {
}
private:
+ uint64_t counter_address_;
+
DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86_64);
};
@@ -1070,8 +1114,8 @@ void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* location
}
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
- const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- ArtMethod* method ATTRIBUTE_UNUSED) {
+ const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+ [[maybe_unused]] ArtMethod* method) {
return desired_dispatch_info;
}
@@ -1308,6 +1352,12 @@ Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
return &string_bss_entry_patches_.back().label;
}
+Label* CodeGeneratorX86_64::NewMethodTypeBssEntryPatch(HLoadMethodType* load_method_type) {
+ method_type_bss_entry_patches_.emplace_back(
+ &load_method_type->GetDexFile(), load_method_type->GetProtoIndex().index_);
+ return &method_type_bss_entry_patches_.back().label;
+}
+
void CodeGeneratorX86_64::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
boot_image_jni_entrypoint_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
invoke->GetResolvedMethodReference().index);
@@ -1335,7 +1385,7 @@ void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_im
void CodeGeneratorX86_64::LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke) {
DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
if (GetCompilerOptions().IsBootImage()) {
- // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
+ // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
__ leal(reg,
Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
MethodReference target_method = invoke->GetResolvedMethodReference();
@@ -1395,6 +1445,7 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li
package_type_bss_entry_patches_.size() +
boot_image_string_patches_.size() +
string_bss_entry_patches_.size() +
+ method_type_bss_entry_patches_.size() +
boot_image_jni_entrypoint_patches_.size() +
boot_image_other_patches_.size();
linker_patches->reserve(size);
@@ -1427,6 +1478,8 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li
package_type_bss_entry_patches_, linker_patches);
EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
string_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodTypeBssEntryPatch>(
+ method_type_bss_entry_patches_, linker_patches);
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
boot_image_jni_entrypoint_patches_, linker_patches);
DCHECK_EQ(size, linker_patches->size());
@@ -1495,6 +1548,7 @@ void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
}
namespace detail {
+
// Mark which intrinsics we don't have handcrafted code for.
template <Intrinsics T>
struct IsUnimplemented {
@@ -1509,15 +1563,13 @@ struct IsUnimplemented {
UNIMPLEMENTED_INTRINSIC_LIST_X86_64(TRUE_OVERRIDE)
#undef TRUE_OVERRIDE
-#include "intrinsics_list.h"
static constexpr bool kIsIntrinsicUnimplemented[] = {
- false, // kNone
+ false, // kNone
#define IS_UNIMPLEMENTED(Intrinsic, ...) \
- IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
- INTRINSICS_LIST(IS_UNIMPLEMENTED)
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
#undef IS_UNIMPLEMENTED
};
-#undef INTRINSICS_LIST
} // namespace detail
@@ -1531,11 +1583,9 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
kNumberOfCpuRegisters,
kNumberOfFloatRegisters,
kNumberOfCpuRegisterPairs,
- ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
- arraysize(kCoreCalleeSaves))
+ ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves))
| (1 << kFakeReturnRegister),
- ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
- arraysize(kFpuCalleeSaves)),
+ ComputeRegisterMask(kFpuCalleeSaves, arraysize(kFpuCalleeSaves)),
compiler_options,
stats,
ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
@@ -1554,6 +1604,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ method_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
@@ -1585,12 +1636,18 @@ static dwarf::Reg DWARFReg(FloatRegister reg) {
}
void LocationsBuilderX86_64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
- new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+ LocationSummary* locations = new (GetGraph()->GetAllocator())
+ LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+ // We use rdtsc to record the timestamp for method profiling. rdtsc returns
+ // two 32-bit values in EAX + EDX even on 64-bit architectures.
+ locations->AddTemp(Location::RegisterLocation(RAX));
+ locations->AddTemp(Location::RegisterLocation(RDX));
}
void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* instruction) {
SlowPathCode* slow_path =
new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction);
+ LocationSummary* locations = instruction->GetLocations();
codegen_->AddSlowPath(slow_path);
if (instruction->IsMethodExitHook()) {
@@ -1609,8 +1666,51 @@ void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* i
instrumentation::Instrumentation::HaveMethodExitListenersOffset()
: instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
__ movq(CpuRegister(TMP), Immediate(address + offset.Int32Value()));
- __ cmpb(Address(CpuRegister(TMP), 0), Immediate(0));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ cmpb(Address(CpuRegister(TMP), 0),
+ Immediate(instrumentation::Instrumentation::kFastTraceListeners));
+ // Check if there are any method entry / exit listeners. If no, continue with execution.
+ __ j(kLess, slow_path->GetExitLabel());
+ // Check if there are any slow method entry / exit listeners. If yes, take the slow path.
+ __ j(kGreater, slow_path->GetEntryLabel());
+
+ // Check if there is place in the buffer for a new entry, if no, take slow path.
+ CpuRegister index = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister entry_addr = CpuRegister(TMP);
+ uint64_t trace_buffer_index_offset =
+ Thread::TraceBufferIndexOffset<kX86_64PointerSize>().SizeValue();
+ __ gs()->movq(CpuRegister(index),
+ Address::Absolute(trace_buffer_index_offset, /* no_rip= */ true));
+ __ subq(CpuRegister(index), Immediate(kNumEntriesForWallClock));
+ __ j(kLess, slow_path->GetEntryLabel());
+
+ // Update the index in the `Thread`.
+ __ gs()->movq(Address::Absolute(trace_buffer_index_offset, /* no_rip= */ true),
+ CpuRegister(index));
+ // Calculate the entry address in the buffer.
+ // entry_addr = base_addr + sizeof(void*) * index
+ __ gs()->movq(entry_addr,
+ Address::Absolute(Thread::TraceBufferPtrOffset<kX86_64PointerSize>().SizeValue(),
+ /* no_rip= */ true));
+ __ leaq(CpuRegister(entry_addr),
+ Address(CpuRegister(entry_addr), CpuRegister(index), TIMES_8, 0));
+
+ // Record method pointer and action.
+ CpuRegister method = index;
+ __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
+ // Use last two bits to encode trace method action. For MethodEntry it is 0
+ // so no need to set the bits since they are 0 already.
+ if (instruction->IsMethodExitHook()) {
+ DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
+ static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
+ __ orq(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
+ }
+ __ movq(Address(entry_addr, kMethodOffsetInBytes), CpuRegister(method));
+ // Get the timestamp. rdtsc returns timestamp in RAX + RDX even in 64-bit architectures.
+ __ rdtsc();
+ __ shlq(CpuRegister(RDX), Immediate(32));
+ __ orq(CpuRegister(RAX), CpuRegister(RDX));
+ __ movq(Address(entry_addr, kTimestampOffsetInBytes), CpuRegister(RAX));
__ Bind(slow_path->GetExitLabel());
}
@@ -1651,6 +1751,10 @@ void LocationsBuilderX86_64::VisitMethodExitHook(HMethodExitHook* method_hook) {
LocationSummary* locations = new (GetGraph()->GetAllocator())
LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
SetInForReturnValue(method_hook, locations);
+ // We use rdtsc to record the timestamp for method profiling. rdtsc returns
+ // two 32-bit values in EAX + EDX even on 64-bit architectures.
+ locations->AddTemp(Location::RegisterLocation(RAX));
+ locations->AddTemp(Location::RegisterLocation(RDX));
}
void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instruction) {
@@ -1677,20 +1781,20 @@ void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) {
}
if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
- SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86_64();
- AddSlowPath(slow_path);
ProfilingInfo* info = GetGraph()->GetProfilingInfo();
DCHECK(info != nullptr);
CHECK(!HasEmptyFrame());
- uint64_t address = reinterpret_cast64<uint64_t>(info);
+ uint64_t address = reinterpret_cast64<uint64_t>(info) +
+ ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
+ SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86_64(address);
+ AddSlowPath(slow_path);
// Note: if the address was in the 32bit range, we could use
// Address::Absolute and avoid this movq.
__ movq(CpuRegister(TMP), Immediate(address));
// With multiple threads, this can overflow. This is OK, we will eventually get to see
// it reaching 0. Also, at this point we have no register available to look
// at the counter directly.
- __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
- Immediate(-1));
+ __ addw(Address(CpuRegister(TMP), 0), Immediate(-1));
__ j(kEqual, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -1949,8 +2053,9 @@ void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
}
-void CodeGeneratorX86_64::MoveLocation(
- Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) {
+void CodeGeneratorX86_64::MoveLocation(Location dst,
+ Location src,
+ [[maybe_unused]] DataType::Type dst_type) {
Move(dst, src);
}
@@ -2009,8 +2114,7 @@ void LocationsBuilderX86_64::VisitExit(HExit* exit) {
exit->SetLocations(nullptr);
}
-void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
-}
+void InstructionCodeGeneratorX86_64::VisitExit([[maybe_unused]] HExit* exit) {}
template<class LabelType>
void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
@@ -2051,7 +2155,7 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition)
} else if (right.IsConstant()) {
__ ucomiss(left.AsFpuRegister<XmmRegister>(),
codegen_->LiteralFloatAddress(
- right.GetConstant()->AsFloatConstant()->GetValue()));
+ right.GetConstant()->AsFloatConstant()->GetValue()));
} else {
DCHECK(right.IsStackSlot());
__ ucomiss(left.AsFpuRegister<XmmRegister>(),
@@ -2065,7 +2169,7 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition)
} else if (right.IsConstant()) {
__ ucomisd(left.AsFpuRegister<XmmRegister>(),
codegen_->LiteralDoubleAddress(
- right.GetConstant()->AsDoubleConstant()->GetValue()));
+ right.GetConstant()->AsDoubleConstant()->GetValue()));
} else {
DCHECK(right.IsDoubleStackSlot());
__ ucomisd(left.AsFpuRegister<XmmRegister>(),
@@ -2119,13 +2223,17 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* co
}
}
-static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
+static bool AreEflagsSetFrom(HInstruction* cond,
+ HInstruction* branch,
+ const CompilerOptions& compiler_options) {
// Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
// are set only strictly before `branch`. We can't use the eflags on long
// conditions if they are materialized due to the complex branching.
return cond->IsCondition() &&
cond->GetNext() == branch &&
- !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
+ !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) &&
+ !(cond->GetBlock()->GetGraph()->IsCompilingBaseline() &&
+ compiler_options.ProfileBranches());
}
template<class LabelType>
@@ -2162,7 +2270,7 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc
// - condition true => branch to true_target
// - branch to false_target
if (IsBooleanValueOrMaterializedCondition(cond)) {
- if (AreEflagsSetFrom(cond, instruction)) {
+ if (AreEflagsSetFrom(cond, instruction, codegen_->GetCompilerOptions())) {
if (true_target == nullptr) {
__ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
} else {
@@ -2215,7 +2323,14 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc
void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
- locations->SetInAt(0, Location::Any());
+ if (GetGraph()->IsCompilingBaseline() &&
+ codegen_->GetCompilerOptions().ProfileBranches() &&
+ !Runtime::Current()->IsAotCompiler()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ } else {
+ locations->SetInAt(0, Location::Any());
+ }
}
}
@@ -2226,6 +2341,33 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
nullptr : codegen_->GetLabelOf(true_successor);
Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
nullptr : codegen_->GetLabelOf(false_successor);
+ if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
+ if (GetGraph()->IsCompilingBaseline() &&
+ codegen_->GetCompilerOptions().ProfileBranches() &&
+ !Runtime::Current()->IsAotCompiler()) {
+ DCHECK(if_instr->InputAt(0)->IsCondition());
+ CpuRegister temp = if_instr->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
+ // Currently, not all If branches are profiled.
+ if (cache != nullptr) {
+ uint64_t address =
+ reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
+ static_assert(
+ BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
+ "Unexpected offsets for BranchCache");
+ NearLabel done;
+ Location lhs = if_instr->GetLocations()->InAt(0);
+ __ movq(CpuRegister(TMP), Immediate(address));
+ __ movzxw(temp, Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0));
+ __ addw(temp, Immediate(1));
+ __ j(kZero, &done);
+ __ movw(Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0), temp);
+ __ Bind(&done);
+ }
+ }
+ }
GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
}
@@ -2318,7 +2460,7 @@ void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
if (!condition->IsEmittedAtUseSite()) {
// This was a previously materialized condition.
// Can we use the existing condition code?
- if (AreEflagsSetFrom(condition, select)) {
+ if (AreEflagsSetFrom(condition, select, codegen_->GetCompilerOptions())) {
// Materialization was the previous instruction. Condition codes are right.
cond = X86_64IntegerCondition(condition->GetCondition());
} else {
@@ -2657,7 +2799,7 @@ void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
// Will be generated at use site.
}
@@ -2667,7 +2809,7 @@ void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
// Will be generated at use site.
}
@@ -2677,7 +2819,7 @@ void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
// Will be generated at use site.
}
@@ -2687,7 +2829,7 @@ void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
locations->SetOut(Location::ConstantLocation(constant));
}
-void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
// Will be generated at use site.
}
@@ -2698,7 +2840,7 @@ void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
}
void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
- HDoubleConstant* constant ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HDoubleConstant* constant) {
// Will be generated at use site.
}
@@ -2707,7 +2849,7 @@ void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructo
}
void InstructionCodeGeneratorX86_64::VisitConstructorFence(
- HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HConstructorFence* constructor_fence) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
}
@@ -2723,7 +2865,7 @@ void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
ret->SetLocations(nullptr);
}
-void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
codegen_->GenerateFrameExit();
}
@@ -2996,23 +3138,26 @@ void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
CpuRegister klass) {
DCHECK_EQ(RDI, klass.AsRegister());
- // We know the destination of an intrinsic, so no need to record inline
- // caches.
- if (!instruction->GetLocations()->Intrinsified() &&
- GetGraph()->IsCompilingBaseline() &&
- !Runtime::Current()->IsAotCompiler()) {
+ if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
ProfilingInfo* info = GetGraph()->GetProfilingInfo();
DCHECK(info != nullptr);
- InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
- uint64_t address = reinterpret_cast64<uint64_t>(cache);
- NearLabel done;
- __ movq(CpuRegister(TMP), Immediate(address));
- // Fast path for a monomorphic cache.
- __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
- __ j(kEqual, &done);
- GenerateInvokeRuntime(
- GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
- __ Bind(&done);
+ InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(info, instruction->AsInvoke());
+ if (cache != nullptr) {
+ uint64_t address = reinterpret_cast64<uint64_t>(cache);
+ NearLabel done;
+ __ movq(CpuRegister(TMP), Immediate(address));
+ // Fast path for a monomorphic cache.
+ __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
+ __ j(kEqual, &done);
+ GenerateInvokeRuntime(
+ GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
+ __ Bind(&done);
+ } else {
+ // This is unexpected, but we don't guarantee stable compilation across
+ // JIT runs so just warn about it.
+ ScopedObjectAccess soa(Thread::Current());
+ LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
+ }
}
}
@@ -4972,7 +5117,7 @@ void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
}
void InstructionCodeGeneratorX86_64::VisitParameterValue(
- HParameterValue* instruction ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HParameterValue* instruction) {
// Nothing to do, the parameter is already at its location.
}
@@ -4983,7 +5128,7 @@ void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
}
void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
- HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HCurrentMethod* instruction) {
// Nothing to do, the method is already at its location.
}
@@ -5062,7 +5207,7 @@ void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
locations->SetOut(Location::Any());
}
-void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitPhi([[maybe_unused]] HPhi* instruction) {
LOG(FATAL) << "Unimplemented";
}
@@ -5091,13 +5236,10 @@ void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
}
void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
- DCHECK(instruction->IsInstanceFieldGet() ||
- instruction->IsStaticFieldGet() ||
- instruction->IsPredicatedInstanceFieldGet());
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
bool object_field_get_with_read_barrier =
- gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
- bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
+ (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
object_field_get_with_read_barrier
@@ -5107,37 +5249,26 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
// receiver_input
- locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
- if (is_predicated) {
- if (DataType::IsFloatingPointType(instruction->GetType())) {
- locations->SetInAt(0, Location::RequiresFpuRegister());
- } else {
- locations->SetInAt(0, Location::RequiresRegister());
- }
- }
+ locations->SetInAt(0, Location::RequiresRegister());
if (DataType::IsFloatingPointType(instruction->GetType())) {
- locations->SetOut(is_predicated ? Location::SameAsFirstInput()
- : Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
} else {
// The output overlaps for an object field get when read barriers are
// enabled: we do not want the move to overwrite the object's location, as
// we need it to emit the read barrier. For predicated instructions we can
// always overlap since the output is SameAsFirst and the default value.
- locations->SetOut(is_predicated ? Location::SameAsFirstInput() : Location::RequiresRegister(),
- object_field_get_with_read_barrier || is_predicated
- ? Location::kOutputOverlap
- : Location::kNoOutputOverlap);
+ locations->SetOut(
+ Location::RequiresRegister(),
+ object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
}
void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
const FieldInfo& field_info) {
- DCHECK(instruction->IsInstanceFieldGet() ||
- instruction->IsStaticFieldGet() ||
- instruction->IsPredicatedInstanceFieldGet());
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
LocationSummary* locations = instruction->GetLocations();
- Location base_loc = locations->InAt(instruction->IsPredicatedInstanceFieldGet() ? 1 : 0);
+ Location base_loc = locations->InAt(0);
CpuRegister base = base_loc.AsRegister<CpuRegister>();
Location out = locations->Out();
bool is_volatile = field_info.IsVolatile();
@@ -5147,7 +5278,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
if (load_type == DataType::Type::kReference) {
// /* HeapReference<Object> */ out = *(base + offset)
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
@@ -5413,14 +5544,6 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
bool is_volatile = field_info.IsVolatile();
DataType::Type field_type = field_info.GetFieldType();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
- bool is_predicated =
- instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
-
- NearLabel pred_is_null;
- if (is_predicated) {
- __ testl(base, base);
- __ j(kZero, &pred_is_null);
- }
HandleFieldSet(instruction,
/*value_index=*/ 1,
@@ -5433,10 +5556,6 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
value_can_be_null,
/*byte_swap=*/ false,
write_barrier_kind);
-
- if (is_predicated) {
- __ Bind(&pred_is_null);
- }
}
void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
@@ -5450,26 +5569,10 @@ void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* in
instruction->GetWriteBarrierKind());
}
-void LocationsBuilderX86_64::VisitPredicatedInstanceFieldGet(
- HPredicatedInstanceFieldGet* instruction) {
- HandleFieldGet(instruction);
-}
-
void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
HandleFieldGet(instruction);
}
-void InstructionCodeGeneratorX86_64::VisitPredicatedInstanceFieldGet(
- HPredicatedInstanceFieldGet* instruction) {
- NearLabel finish;
- LocationSummary* locations = instruction->GetLocations();
- CpuRegister target = locations->InAt(1).AsRegister<CpuRegister>();
- __ testl(target, target);
- __ j(kZero, &finish);
- HandleFieldGet(instruction, instruction->GetFieldInfo());
- __ Bind(&finish);
-}
-
void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
HandleFieldGet(instruction, instruction->GetFieldInfo());
}
@@ -5615,7 +5718,7 @@ void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
bool object_array_get_with_read_barrier =
- gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
object_array_get_with_read_barrier
@@ -5653,7 +5756,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -5930,8 +6033,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
__ movsd(address, value.AsFpuRegister<XmmRegister>());
codegen_->MaybeRecordImplicitNullCheck(instruction);
} else {
- int64_t v =
- bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+ int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
Address address_high =
CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
@@ -6084,7 +6186,7 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
}
}
-void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86_64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
LOG(FATAL) << "Unimplemented";
}
@@ -6458,7 +6560,7 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
load_kind == HLoadClass::LoadKind::kBssEntryPackage);
- const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage();
+ const bool requires_read_barrier = !cls->IsInBootImage() && codegen_->EmitReadBarrier();
LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
@@ -6471,12 +6573,14 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
locations->SetInAt(0, Location::RequiresRegister());
}
locations->SetOut(Location::RequiresRegister());
- if (load_kind == HLoadClass::LoadKind::kBssEntry) {
- if (!gUseReadBarrier || kUseBakerReadBarrier) {
+ if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+ load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ } else {
// Rely on the type resolution and/or initialization to save everything.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
- } else {
- // For non-Baker read barrier we have a temp-clobbering call.
}
}
}
@@ -6507,9 +6611,8 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
Location out_loc = locations->Out();
CpuRegister out = out_loc.AsRegister<CpuRegister>();
- const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
- ? kWithoutReadBarrier
- : gCompilerReadBarrierOption;
+ const ReadBarrierOption read_barrier_option =
+ cls->IsInBootImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
bool generate_null_check = false;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -6612,13 +6715,50 @@ void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* lo
}
void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
- // Custom calling convention: RAX serves as both input and output.
- Location location = Location::RegisterLocation(RAX);
- CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
+ LocationSummary* locations =
+ new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+ if (load->GetLoadKind() == HLoadMethodType::LoadKind::kRuntimeCall) {
+ Location location = Location::RegisterLocation(RAX);
+ CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
+ } else {
+ DCHECK_EQ(load->GetLoadKind(), HLoadMethodType::LoadKind::kBssEntry);
+ locations->SetOut(Location::RequiresRegister());
+ if (codegen_->EmitNonBakerReadBarrier()) {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ } else {
+ // Rely on the pResolveMethodType to save everything.
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
+ }
+ }
}
void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
- codegen_->GenerateLoadMethodTypeRuntimeCall(load);
+ LocationSummary* locations = load->GetLocations();
+ Location out_loc = locations->Out();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
+
+ switch (load->GetLoadKind()) {
+ case HLoadMethodType::LoadKind::kBssEntry: {
+ Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
+ /* no_rip= */ false);
+ Label* fixup_label = codegen_->NewMethodTypeBssEntryPatch(load);
+ // /* GcRoot<mirror::MethodType> */ out = *address /* PC-relative */
+ GenerateGcRootFieldLoad(
+ load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
+ // No need for memory fence, thanks to the x86-64 memory model.
+ SlowPathCode* slow_path =
+ new (codegen_->GetScopedAllocator()) LoadMethodTypeSlowPathX86_64(load);
+ codegen_->AddSlowPath(slow_path);
+ __ testl(out, out);
+ __ j(kEqual, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
+ }
+ default:
+ DCHECK_EQ(load->GetLoadKind(), HLoadMethodType::LoadKind::kRuntimeCall);
+ codegen_->GenerateLoadMethodTypeRuntimeCall(load);
+ break;
+ }
}
void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
@@ -6649,18 +6789,18 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
}
void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
- LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
+ LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
locations->SetOut(Location::RegisterLocation(RAX));
} else {
locations->SetOut(Location::RequiresRegister());
if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
- if (!gUseReadBarrier || kUseBakerReadBarrier) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ } else {
// Rely on the pResolveString to save everything.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
- } else {
- // For non-Baker read barrier we have a temp-clobbering call.
}
}
}
@@ -6704,7 +6844,8 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
/* no_rip= */ false);
Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
// /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(
+ load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
// No need for memory fence, thanks to the x86-64 memory model.
SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
codegen_->AddSlowPath(slow_path);
@@ -6725,14 +6866,14 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
Label* fixup_label = codegen_->NewJitRootStringPatch(
load->GetDexFile(), load->GetStringIndex(), load->GetString());
// /* GcRoot<mirror::String> */ out = *address
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(
+ load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
return;
}
default:
break;
}
- // TODO: Re-add the compiler code to do string dex cache lookup again.
// Custom calling convention: RAX serves as both input and output.
__ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
codegen_->InvokeRuntime(kQuickResolveString,
@@ -6760,7 +6901,7 @@ void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
}
-void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitClearException([[maybe_unused]] HClearException* clear) {
__ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
}
@@ -6777,8 +6918,8 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
}
// Temp is used for read barrier.
-static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
- if (gUseReadBarrier &&
+static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
+ if (emit_read_barrier &&
!kUseBakerReadBarrier &&
(type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
@@ -6791,11 +6932,11 @@ static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
// Interface case has 2 temps, one for holding the number of interfaces, one for the current
// interface pointer, the current interface is compared in memory.
// The other checks have one temp for loading the object's class.
-static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
return 2;
}
- return 1 + NumberOfInstanceOfTemps(type_check_kind);
+ return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
}
void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -6807,7 +6948,7 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck: {
- bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
+ bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
break;
@@ -6836,7 +6977,8 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
}
// Note that TypeCheckSlowPathX86_64 uses this "out" register too.
locations->SetOut(Location::RequiresRegister());
- locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
+ locations->AddRegisterTemps(
+ NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
}
void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -6847,7 +6989,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
Location cls = locations->InAt(1);
Location out_loc = locations->Out();
CpuRegister out = out_loc.AsRegister<CpuRegister>();
- const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+ const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
DCHECK_LE(num_temps, 1u);
Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -6867,7 +7009,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
ReadBarrierOption read_barrier_option =
- CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
@@ -6895,7 +7037,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kAbstractClassCheck: {
ReadBarrierOption read_barrier_option =
- CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
@@ -6931,7 +7073,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kClassHierarchyCheck: {
ReadBarrierOption read_barrier_option =
- CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
@@ -6968,7 +7110,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kArrayObjectCheck: {
ReadBarrierOption read_barrier_option =
- CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+ codegen_->ReadBarrierOptionForInstanceOf(instruction);
// /* HeapReference<Class> */ out = obj->klass_
GenerateReferenceLoadTwoRegisters(instruction,
out_loc,
@@ -7097,7 +7239,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
- LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
+ LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
@@ -7112,8 +7254,7 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
} else {
locations->SetInAt(1, Location::Any());
}
- // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
- locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
+ locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
}
void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
@@ -7124,7 +7265,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
Location cls = locations->InAt(1);
Location temp_loc = locations->GetTemp(0);
CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
- const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+ const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
DCHECK_GE(num_temps, 1u);
DCHECK_LE(num_temps, 2u);
Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
@@ -7137,7 +7278,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
const uint32_t object_array_data_offset =
mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
- bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
+ bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
SlowPathCode* type_check_slow_path =
new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
instruction, is_type_check_slow_path_fatal);
@@ -7301,11 +7442,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
kWithoutReadBarrier);
// /* HeapReference<Class> */ temp = temp->iftable_
- GenerateReferenceLoadTwoRegisters(instruction,
- temp_loc,
- temp_loc,
- iftable_offset,
- kWithoutReadBarrier);
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ iftable_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// Iftable is never null.
__ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
// Maybe poison the `cls` for direct comparison with memory.
@@ -7532,7 +7673,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
ReadBarrierOption read_barrier_option) {
CpuRegister out_reg = out.AsRegister<CpuRegister>();
if (read_barrier_option == kWithReadBarrier) {
- CHECK(gUseReadBarrier);
+ DCHECK(codegen_->EmitReadBarrier());
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
@@ -7566,7 +7707,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
CpuRegister out_reg = out.AsRegister<CpuRegister>();
CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
if (read_barrier_option == kWithReadBarrier) {
- CHECK(gUseReadBarrier);
+ DCHECK(codegen_->EmitReadBarrier());
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -7594,7 +7735,7 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
ReadBarrierOption read_barrier_option) {
CpuRegister root_reg = root.AsRegister<CpuRegister>();
if (read_barrier_option == kWithReadBarrier) {
- DCHECK(gUseReadBarrier);
+ DCHECK(codegen_->EmitReadBarrier());
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used:
@@ -7658,8 +7799,7 @@ void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in
CpuRegister obj,
uint32_t offset,
bool needs_null_check) {
- DCHECK(gUseReadBarrier);
- DCHECK(kUseBakerReadBarrier);
+ DCHECK(EmitBakerReadBarrier());
// /* HeapReference<Object> */ ref = *(obj + offset)
Address src(obj, offset);
@@ -7672,8 +7812,7 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in
uint32_t data_offset,
Location index,
bool needs_null_check) {
- DCHECK(gUseReadBarrier);
- DCHECK(kUseBakerReadBarrier);
+ DCHECK(EmitBakerReadBarrier());
static_assert(
sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
@@ -7692,8 +7831,7 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
bool always_update_field,
CpuRegister* temp1,
CpuRegister* temp2) {
- DCHECK(gUseReadBarrier);
- DCHECK(kUseBakerReadBarrier);
+ DCHECK(EmitBakerReadBarrier());
// In slow path based read barriers, the read barrier call is
// inserted after the original load. However, in fast path based
@@ -7774,7 +7912,7 @@ void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- DCHECK(gUseReadBarrier);
+ DCHECK(EmitReadBarrier());
// Insert a slow path based read barrier *after* the reference load.
//
@@ -7801,7 +7939,7 @@ void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction
Location obj,
uint32_t offset,
Location index) {
- if (gUseReadBarrier) {
+ if (EmitReadBarrier()) {
// Baker's read barriers shall be handled by the fast path
// (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
DCHECK(!kUseBakerReadBarrier);
@@ -7816,7 +7954,7 @@ void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction
void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
Location out,
Location root) {
- DCHECK(gUseReadBarrier);
+ DCHECK(EmitReadBarrier());
// Insert a slow path based read barrier *after* the GC root load.
//
@@ -7830,12 +7968,12 @@ void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instructi
__ Bind(slow_path->GetExitLabel());
}
-void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
}
-void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
}
@@ -7930,13 +8068,13 @@ void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_ins
__ jmp(temp_reg);
}
-void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
- ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86_64::VisitIntermediateAddress(
+ [[maybe_unused]] HIntermediateAddress* instruction) {
LOG(FATAL) << "Unreachable";
}
-void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
- ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(
+ [[maybe_unused]] HIntermediateAddress* instruction) {
LOG(FATAL) << "Unreachable";
}
@@ -8037,9 +8175,9 @@ Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
Location index,
ScaleFactor scale,
uint32_t data_offset) {
- return index.IsConstant() ?
- Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
- Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
+ return index.IsConstant()
+ ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset)
+ : Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
}
void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
@@ -8119,7 +8257,7 @@ class JumpTableRIPFixup : public RIPFixup {
const HPackedSwitch* switch_instr_;
};
-void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
+void CodeGeneratorX86_64::Finalize() {
// Generate the constant area if needed.
X86_64Assembler* assembler = GetAssembler();
if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
@@ -8137,7 +8275,7 @@ void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
}
// And finish up.
- CodeGenerator::Finalize(allocator);
+ CodeGenerator::Finalize();
}
Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
@@ -8217,7 +8355,7 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
- dchecked_integral_cast<uint32_t>(address);
+ dchecked_integral_cast<uint32_t>(address);
}
void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index dff2e799e0..e4d3eac6bc 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -87,19 +87,8 @@ static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14,
V(StringBuilderLength) \
V(StringBuilderToString) \
/* 1.8 */ \
- V(UnsafeGetAndAddInt) \
- V(UnsafeGetAndAddLong) \
- V(UnsafeGetAndSetInt) \
- V(UnsafeGetAndSetLong) \
- V(UnsafeGetAndSetObject) \
V(MethodHandleInvokeExact) \
- V(MethodHandleInvoke) \
- /* OpenJDK 11 */ \
- V(JdkUnsafeGetAndAddInt) \
- V(JdkUnsafeGetAndAddLong) \
- V(JdkUnsafeGetAndSetInt) \
- V(JdkUnsafeGetAndSetLong) \
- V(JdkUnsafeGetAndSetObject)
+ V(MethodHandleInvoke)
class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> {
public:
@@ -162,16 +151,16 @@ class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention {
Location GetFieldIndexLocation() const override {
return Location::RegisterLocation(RDI);
}
- Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+ Location GetReturnLocation([[maybe_unused]] DataType::Type type) const override {
return Location::RegisterLocation(RAX);
}
- Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, bool is_instance)
- const override {
+ Location GetSetValueLocation([[maybe_unused]] DataType::Type type,
+ bool is_instance) const override {
return is_instance
? Location::RegisterLocation(RDX)
: Location::RegisterLocation(RSI);
}
- Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+ Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override {
return Location::FpuRegisterLocation(XMM0);
}
@@ -468,7 +457,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
void SetupBlockedRegisters() const override;
void DumpCoreRegister(std::ostream& stream, int reg) const override;
void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
- void Finalize(CodeAllocator* allocator) override;
+ void Finalize() override;
InstructionSet GetInstructionSet() const override {
return InstructionSet::kX86_64;
@@ -502,9 +491,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
block_labels_ = CommonInitializeLabels<Label>();
}
- bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override {
- return false;
- }
+ bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; }
// Check if the desired_string_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
@@ -536,6 +523,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
Label* NewTypeBssEntryPatch(HLoadClass* load_class);
void RecordBootImageStringPatch(HLoadString* load_string);
Label* NewStringBssEntryPatch(HLoadString* load_string);
+ Label* NewMethodTypeBssEntryPatch(HLoadMethodType* load_method_type);
void RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke);
Label* NewJitRootStringPatch(const DexFile& dex_file,
dex::StringIndex string_index,
@@ -748,6 +736,8 @@ class CodeGeneratorX86_64 : public CodeGenerator {
ArenaDeque<PatchInfo<Label>> boot_image_string_patches_;
// PC-relative String patch info for kBssEntry.
ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_;
+ // PC-relative MethodType patch info for kBssEntry.
+ ArenaDeque<PatchInfo<Label>> method_type_bss_entry_patches_;
// PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative.
ArenaDeque<PatchInfo<Label>> boot_image_jni_entrypoint_patches_;
// PC-relative patch info for IntrinsicObjects for the boot image,
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index d759a16f48..a2371817ee 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -16,6 +16,9 @@
#include "code_sinking.h"
+#include <sstream>
+
+#include "android-base/logging.h"
#include "base/arena_bit_vector.h"
#include "base/array_ref.h"
#include "base/bit_vector-inl.h"
@@ -134,7 +137,6 @@ static bool IsInterestingInstruction(HInstruction* instruction) {
// hard to test, as LSE removes them.
if (instruction->IsStaticFieldGet() ||
instruction->IsInstanceFieldGet() ||
- instruction->IsPredicatedInstanceFieldGet() ||
instruction->IsArrayGet()) {
return false;
}
@@ -335,10 +337,6 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
processed_instructions.ClearAllBits();
ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable= */ false);
post_dominated.ClearAllBits();
- ArenaBitVector instructions_that_can_move(
- &allocator, number_of_instructions, /* expandable= */ false);
- instructions_that_can_move.ClearAllBits();
- ScopedArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc));
// Step (1): Visit post order to get a subset of blocks post dominated by `end_block`.
// TODO(ngeoffray): Getting the full set of post-dominated should be done by
@@ -411,6 +409,13 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
HBasicBlock* common_dominator = finder.Get();
// Step (2): iterate over the worklist to find sinking candidates.
+ ArenaBitVector instructions_that_can_move(
+ &allocator, number_of_instructions, /* expandable= */ false);
+ instructions_that_can_move.ClearAllBits();
+ ScopedArenaVector<ScopedArenaVector<HInstruction*>> instructions_to_move(
+ graph_->GetBlocks().size(),
+ ScopedArenaVector<HInstruction*>(allocator.Adapter(kArenaAllocMisc)),
+ allocator.Adapter(kArenaAllocMisc));
while (!worklist.empty()) {
HInstruction* instruction = worklist.back();
if (processed_instructions.IsBitSet(instruction->GetId())) {
@@ -467,7 +472,7 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
// Instruction is a candidate for being sunk. Mark it as such, remove it from the
// work list, and add its inputs to the work list.
instructions_that_can_move.SetBit(instruction->GetId());
- move_in_order.push_back(instruction);
+ instructions_to_move[instruction->GetBlock()->GetBlockId()].push_back(instruction);
processed_instructions.SetBit(instruction->GetId());
worklist.pop_back();
AddInputs(instruction, processed_instructions, post_dominated, &worklist);
@@ -493,14 +498,50 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
}
}
- // Make sure we process instructions in dominated order. This is required for heap
- // stores.
- std::sort(move_in_order.begin(), move_in_order.end(), [](HInstruction* a, HInstruction* b) {
- return b->StrictlyDominates(a);
- });
+ // We want to process the instructions in reverse dominated order. This is required for heap
+ // stores. To guarantee this (including the transitivity of incomparability) we have some extra
+ // bookkeeping.
+ ScopedArenaVector<HInstruction*> instructions_to_move_sorted(allocator.Adapter(kArenaAllocMisc));
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
+ const int block_id = block->GetBlockId();
+
+ // Order the block itself first.
+ std::sort(instructions_to_move[block_id].begin(),
+ instructions_to_move[block_id].end(),
+ [&block](HInstruction* a, HInstruction* b) {
+ return block->GetInstructions().FoundBefore(b, a);
+ });
+
+ for (HInstruction* instruction : instructions_to_move[block_id]) {
+ instructions_to_move_sorted.push_back(instruction);
+ }
+ }
+
+ if (kIsDebugBuild) {
+ // We should have ordered the instructions in reverse dominated order. This means that
+ // instructions shouldn't dominate instructions that come after it in the vector.
+ for (size_t i = 0; i < instructions_to_move_sorted.size(); ++i) {
+ for (size_t j = i + 1; j < instructions_to_move_sorted.size(); ++j) {
+ if (instructions_to_move_sorted[i]->StrictlyDominates(instructions_to_move_sorted[j])) {
+ std::stringstream ss;
+ graph_->Dump(ss, nullptr);
+ ss << "\n"
+ << "{";
+ for (HInstruction* instr : instructions_to_move_sorted) {
+ ss << *instr << " in block: " << instr->GetBlock() << ", ";
+ }
+ ss << "}\n";
+ ss << "i = " << i << " which is " << *instructions_to_move_sorted[i]
+ << "strictly dominates j = " << j << " which is " << *instructions_to_move_sorted[j]
+ << "\n";
+ LOG(FATAL) << "Unexpected ordering of code sinking instructions: " << ss.str();
+ }
+ }
+ }
+ }
// Step (3): Try to move sinking candidates.
- for (HInstruction* instruction : move_in_order) {
+ for (HInstruction* instruction : instructions_to_move_sorted) {
HInstruction* position = nullptr;
if (instruction->IsArraySet()
|| instruction->IsInstanceFieldSet()
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 2d9acc49b3..c72d3ea24a 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -733,8 +733,7 @@ TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) {
move->AddMove(Location::StackSlot(8192), Location::StackSlot(0), DataType::Type::kInt32, nullptr);
codegen.GetMoveResolver()->EmitNativeCode(move);
- InternalCodeAllocator code_allocator;
- codegen.Finalize(&code_allocator);
+ codegen.Finalize();
}
#endif
@@ -785,8 +784,7 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) {
nullptr);
codegen.GetMoveResolver()->EmitNativeCode(move);
- InternalCodeAllocator code_allocator;
- codegen.Finalize(&code_allocator);
+ codegen.Finalize();
}
// Check that ParallelMoveResolver works fine for ARM64 for both cases when SIMD is on and off.
@@ -798,7 +796,7 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) {
codegen.Initialize();
- graph->SetHasSIMD(true);
+ graph->SetHasTraditionalSIMD(true);
for (int i = 0; i < 2; i++) {
HParallelMove* move = new (graph->GetAllocator()) HParallelMove(graph->GetAllocator());
move->AddMove(Location::SIMDStackSlot(0),
@@ -818,11 +816,10 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) {
DataType::Type::kFloat64,
nullptr);
codegen.GetMoveResolver()->EmitNativeCode(move);
- graph->SetHasSIMD(false);
+ graph->SetHasTraditionalSIMD(false);
}
- InternalCodeAllocator code_allocator;
- codegen.Finalize(&code_allocator);
+ codegen.Finalize();
}
// Check that ART ISA Features are propagated to VIXL for arm64 (using cortex-a75 as example).
@@ -867,7 +864,7 @@ TEST_F(CodegenTest, ARM64FrameSizeSIMD) {
arm64::CodeGeneratorARM64 codegen(graph, *compiler_options);
codegen.Initialize();
- graph->SetHasSIMD(true);
+ graph->SetHasTraditionalSIMD(true);
DCHECK_EQ(arm64::callee_saved_fp_registers.GetCount(), 8);
vixl::aarch64::CPURegList reg_list = arm64::callee_saved_fp_registers;
@@ -887,7 +884,8 @@ TEST_F(CodegenTest, ARM64FrameSizeNoSIMD) {
arm64::CodeGeneratorARM64 codegen(graph, *compiler_options);
codegen.Initialize();
- graph->SetHasSIMD(false);
+ graph->SetHasTraditionalSIMD(false);
+ graph->SetHasPredicatedSIMD(false);
DCHECK_EQ(arm64::callee_saved_fp_registers.GetCount(), 8);
vixl::aarch64::CPURegList reg_list = arm64::callee_saved_fp_registers;
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index 7af9d0f44c..a8425c9915 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -103,8 +103,8 @@ class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL {
blocked_core_registers_[arm::R7] = false;
}
- void MaybeGenerateMarkingRegisterCheck(int code ATTRIBUTE_UNUSED,
- Location temp_loc ATTRIBUTE_UNUSED) override {
+ void MaybeGenerateMarkingRegisterCheck([[maybe_unused]] int code,
+ [[maybe_unused]] Location temp_loc) override {
// When turned on, the marking register checks in
// CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck expects the
// Thread Register and the Marking Register to be set to
@@ -135,8 +135,8 @@ class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 {
TestCodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options)
: arm64::CodeGeneratorARM64(graph, compiler_options) {}
- void MaybeGenerateMarkingRegisterCheck(int codem ATTRIBUTE_UNUSED,
- Location temp_loc ATTRIBUTE_UNUSED) override {
+ void MaybeGenerateMarkingRegisterCheck([[maybe_unused]] int codem,
+ [[maybe_unused]] Location temp_loc) override {
// When turned on, the marking register checks in
// CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck expect the
// Thread Register and the Marking Register to be set to
@@ -167,28 +167,6 @@ class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
};
#endif
-class InternalCodeAllocator : public CodeAllocator {
- public:
- InternalCodeAllocator() : size_(0) { }
-
- uint8_t* Allocate(size_t size) override {
- size_ = size;
- memory_.reset(new uint8_t[size]);
- return memory_.get();
- }
-
- size_t GetSize() const { return size_; }
- ArrayRef<const uint8_t> GetMemory() const override {
- return ArrayRef<const uint8_t>(memory_.get(), size_);
- }
-
- private:
- size_t size_;
- std::unique_ptr<uint8_t[]> memory_;
-
- DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
-};
-
static bool CanExecuteOnHardware(InstructionSet target_isa) {
return (target_isa == kRuntimeISA)
// Handle the special case of ARM, with two instructions sets (ARM32 and Thumb-2).
@@ -247,8 +225,7 @@ static void VerifyGeneratedCode(InstructionSet target_isa,
}
template <typename Expected>
-static void Run(const InternalCodeAllocator& allocator,
- const CodeGenerator& codegen,
+static void Run(const CodeGenerator& codegen,
bool has_result,
Expected expected) {
InstructionSet target_isa = codegen.GetInstructionSet();
@@ -260,7 +237,7 @@ static void Run(const InternalCodeAllocator& allocator,
};
CodeHolder code_holder;
const void* method_code =
- code_holder.MakeExecutable(allocator.GetMemory(), ArrayRef<const uint8_t>(), target_isa);
+ code_holder.MakeExecutable(codegen.GetCode(), ArrayRef<const uint8_t>(), target_isa);
using fptr = Expected (*)();
fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(method_code));
@@ -294,9 +271,8 @@ static void RunCodeNoCheck(CodeGenerator* codegen,
register_allocator->AllocateRegisters();
}
hook_before_codegen(graph);
- InternalCodeAllocator allocator;
- codegen->Compile(&allocator);
- Run(allocator, *codegen, has_result, expected);
+ codegen->Compile();
+ Run(*codegen, has_result, expected);
}
template <typename Expected>
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 20b0e38af5..e2ef8d52f2 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -311,10 +311,8 @@ inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction*
}
}
-inline Location ARM64EncodableConstantOrRegister(HInstruction* constant,
- HInstruction* instr) {
- if (constant->IsConstant()
- && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
+inline Location ARM64EncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
+ if (constant->IsConstant() && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
return Location::ConstantLocation(constant);
}
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index 06d19e3f29..66bbf548bb 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -18,7 +18,11 @@
#include <algorithm>
+#include "base/bit_utils.h"
+#include "base/casts.h"
+#include "base/logging.h"
#include "dex/dex_file-inl.h"
+#include "intrinsics_enum.h"
#include "optimizing/data_type.h"
#include "optimizing/nodes.h"
@@ -37,13 +41,31 @@ class HConstantFoldingVisitor final : public HGraphDelegateVisitor {
void VisitUnaryOperation(HUnaryOperation* inst) override;
void VisitBinaryOperation(HBinaryOperation* inst) override;
+ // Tries to replace constants in binary operations like:
+ // * BinaryOp(Select(false_constant, true_constant, condition), other_constant), or
+ // * BinaryOp(other_constant, Select(false_constant, true_constant, condition))
+ // with consolidated constants. For example, Add(Select(10, 20, condition), 5) can be replaced
+ // with Select(15, 25, condition).
+ bool TryRemoveBinaryOperationViaSelect(HBinaryOperation* inst);
+
void VisitArrayLength(HArrayLength* inst) override;
void VisitDivZeroCheck(HDivZeroCheck* inst) override;
void VisitIf(HIf* inst) override;
+ void VisitInvoke(HInvoke* inst) override;
void VisitTypeConversion(HTypeConversion* inst) override;
void PropagateValue(HBasicBlock* starting_block, HInstruction* variable, HConstant* constant);
+ // Intrinsics foldings
+ void FoldReverseIntrinsic(HInvoke* invoke);
+ void FoldReverseBytesIntrinsic(HInvoke* invoke);
+ void FoldBitCountIntrinsic(HInvoke* invoke);
+ void FoldDivideUnsignedIntrinsic(HInvoke* invoke);
+ void FoldHighestOneBitIntrinsic(HInvoke* invoke);
+ void FoldLowestOneBitIntrinsic(HInvoke* invoke);
+ void FoldNumberOfLeadingZerosIntrinsic(HInvoke* invoke);
+ void FoldNumberOfTrailingZerosIntrinsic(HInvoke* invoke);
+
// Use all optimizations without restrictions.
bool use_all_optimizations_;
@@ -113,7 +135,67 @@ void HConstantFoldingVisitor::VisitUnaryOperation(HUnaryOperation* inst) {
if (constant != nullptr) {
inst->ReplaceWith(constant);
inst->GetBlock()->RemoveInstruction(inst);
+ } else if (inst->InputAt(0)->IsSelect() && inst->InputAt(0)->HasOnlyOneNonEnvironmentUse()) {
+ // Try to replace the select's inputs in Select+UnaryOperation cases. We can do this if both
+ // inputs to the select are constants, and this is the only use of the select.
+ HSelect* select = inst->InputAt(0)->AsSelect();
+ HConstant* false_constant = inst->TryStaticEvaluation(select->GetFalseValue());
+ if (false_constant == nullptr) {
+ return;
+ }
+ HConstant* true_constant = inst->TryStaticEvaluation(select->GetTrueValue());
+ if (true_constant == nullptr) {
+ return;
+ }
+ DCHECK_EQ(select->InputAt(0), select->GetFalseValue());
+ DCHECK_EQ(select->InputAt(1), select->GetTrueValue());
+ select->ReplaceInput(false_constant, 0);
+ select->ReplaceInput(true_constant, 1);
+ select->UpdateType();
+ inst->ReplaceWith(select);
+ inst->GetBlock()->RemoveInstruction(inst);
+ }
+}
+
+bool HConstantFoldingVisitor::TryRemoveBinaryOperationViaSelect(HBinaryOperation* inst) {
+ if (inst->GetLeft()->IsSelect() == inst->GetRight()->IsSelect()) {
+ // If both of them are constants, VisitBinaryOperation already tried the static evaluation. If
+ // both of them are selects, then we can't simplify.
+ // TODO(solanes): Technically, if both of them are selects we could simplify iff both select's
+ // conditions are equal e.g. Add(Select(1, 2, cond), Select(3, 4, cond)) could be replaced with
+ // Select(4, 6, cond). This seems very unlikely to happen so we don't implement it.
+ return false;
+ }
+
+ const bool left_is_select = inst->GetLeft()->IsSelect();
+ HSelect* select = left_is_select ? inst->GetLeft()->AsSelect() : inst->GetRight()->AsSelect();
+ HInstruction* maybe_constant = left_is_select ? inst->GetRight() : inst->GetLeft();
+
+ if (select->HasOnlyOneNonEnvironmentUse()) {
+ // Try to replace the select's inputs in Select+BinaryOperation. We can do this if both
+ // inputs to the select are constants, and this is the only use of the select.
+ HConstant* false_constant =
+ inst->TryStaticEvaluation(left_is_select ? select->GetFalseValue() : maybe_constant,
+ left_is_select ? maybe_constant : select->GetFalseValue());
+ if (false_constant == nullptr) {
+ return false;
+ }
+ HConstant* true_constant =
+ inst->TryStaticEvaluation(left_is_select ? select->GetTrueValue() : maybe_constant,
+ left_is_select ? maybe_constant : select->GetTrueValue());
+ if (true_constant == nullptr) {
+ return false;
+ }
+ DCHECK_EQ(select->InputAt(0), select->GetFalseValue());
+ DCHECK_EQ(select->InputAt(1), select->GetTrueValue());
+ select->ReplaceInput(false_constant, 0);
+ select->ReplaceInput(true_constant, 1);
+ select->UpdateType();
+ inst->ReplaceWith(select);
+ inst->GetBlock()->RemoveInstruction(inst);
+ return true;
}
+ return false;
}
void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) {
@@ -123,6 +205,8 @@ void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) {
if (constant != nullptr) {
inst->ReplaceWith(constant);
inst->GetBlock()->RemoveInstruction(inst);
+ } else if (TryRemoveBinaryOperationViaSelect(inst)) {
+ // Already replaced inside TryRemoveBinaryOperationViaSelect.
} else {
InstructionWithAbsorbingInputSimplifier simplifier(GetGraph());
inst->Accept(&simplifier);
@@ -281,6 +365,245 @@ void HConstantFoldingVisitor::VisitIf(HIf* inst) {
}
}
+void HConstantFoldingVisitor::VisitInvoke(HInvoke* inst) {
+ switch (inst->GetIntrinsic()) {
+ case Intrinsics::kIntegerReverse:
+ case Intrinsics::kLongReverse:
+ FoldReverseIntrinsic(inst);
+ break;
+ case Intrinsics::kIntegerReverseBytes:
+ case Intrinsics::kLongReverseBytes:
+ case Intrinsics::kShortReverseBytes:
+ FoldReverseBytesIntrinsic(inst);
+ break;
+ case Intrinsics::kIntegerBitCount:
+ case Intrinsics::kLongBitCount:
+ FoldBitCountIntrinsic(inst);
+ break;
+ case Intrinsics::kIntegerDivideUnsigned:
+ case Intrinsics::kLongDivideUnsigned:
+ FoldDivideUnsignedIntrinsic(inst);
+ break;
+ case Intrinsics::kIntegerHighestOneBit:
+ case Intrinsics::kLongHighestOneBit:
+ FoldHighestOneBitIntrinsic(inst);
+ break;
+ case Intrinsics::kIntegerLowestOneBit:
+ case Intrinsics::kLongLowestOneBit:
+ FoldLowestOneBitIntrinsic(inst);
+ break;
+ case Intrinsics::kIntegerNumberOfLeadingZeros:
+ case Intrinsics::kLongNumberOfLeadingZeros:
+ FoldNumberOfLeadingZerosIntrinsic(inst);
+ break;
+ case Intrinsics::kIntegerNumberOfTrailingZeros:
+ case Intrinsics::kLongNumberOfTrailingZeros:
+ FoldNumberOfTrailingZerosIntrinsic(inst);
+ break;
+ default:
+ break;
+ }
+}
+
+void HConstantFoldingVisitor::FoldReverseIntrinsic(HInvoke* inst) {
+ DCHECK(inst->GetIntrinsic() == Intrinsics::kIntegerReverse ||
+ inst->GetIntrinsic() == Intrinsics::kLongReverse);
+
+ HInstruction* input = inst->InputAt(0);
+ if (!input->IsConstant()) {
+ return;
+ }
+
+ // Integer and Long intrinsics have different return types.
+ if (inst->GetIntrinsic() == Intrinsics::kIntegerReverse) {
+ DCHECK(input->IsIntConstant());
+ inst->ReplaceWith(
+ GetGraph()->GetIntConstant(ReverseBits32(input->AsIntConstant()->GetValue())));
+ } else {
+ DCHECK(input->IsLongConstant());
+ inst->ReplaceWith(
+ GetGraph()->GetLongConstant(ReverseBits64(input->AsLongConstant()->GetValue())));
+ }
+ inst->GetBlock()->RemoveInstruction(inst);
+}
+
+void HConstantFoldingVisitor::FoldReverseBytesIntrinsic(HInvoke* inst) {
+ DCHECK(inst->GetIntrinsic() == Intrinsics::kIntegerReverseBytes ||
+ inst->GetIntrinsic() == Intrinsics::kLongReverseBytes ||
+ inst->GetIntrinsic() == Intrinsics::kShortReverseBytes);
+
+ HInstruction* input = inst->InputAt(0);
+ if (!input->IsConstant()) {
+ return;
+ }
+
+ // Integer, Long, and Short intrinsics have different return types.
+ if (inst->GetIntrinsic() == Intrinsics::kIntegerReverseBytes) {
+ DCHECK(input->IsIntConstant());
+ inst->ReplaceWith(GetGraph()->GetIntConstant(BSWAP(input->AsIntConstant()->GetValue())));
+ } else if (inst->GetIntrinsic() == Intrinsics::kLongReverseBytes) {
+ DCHECK(input->IsLongConstant());
+ inst->ReplaceWith(GetGraph()->GetLongConstant(BSWAP(input->AsLongConstant()->GetValue())));
+ } else {
+ DCHECK(input->IsIntConstant());
+ inst->ReplaceWith(GetGraph()->GetIntConstant(
+ BSWAP(dchecked_integral_cast<int16_t>(input->AsIntConstant()->GetValue()))));
+ }
+ inst->GetBlock()->RemoveInstruction(inst);
+}
+
+void HConstantFoldingVisitor::FoldBitCountIntrinsic(HInvoke* inst) {
+ DCHECK(inst->GetIntrinsic() == Intrinsics::kIntegerBitCount ||
+ inst->GetIntrinsic() == Intrinsics::kLongBitCount);
+
+ HInstruction* input = inst->InputAt(0);
+ if (!input->IsConstant()) {
+ return;
+ }
+
+ DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kIntegerBitCount, input->IsIntConstant());
+ DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kLongBitCount, input->IsLongConstant());
+
+ // Note that both the Integer and Long intrinsics return an int as a result.
+ int result = inst->GetIntrinsic() == Intrinsics::kIntegerBitCount ?
+ POPCOUNT(input->AsIntConstant()->GetValue()) :
+ POPCOUNT(input->AsLongConstant()->GetValue());
+ inst->ReplaceWith(GetGraph()->GetIntConstant(result));
+ inst->GetBlock()->RemoveInstruction(inst);
+}
+
+void HConstantFoldingVisitor::FoldDivideUnsignedIntrinsic(HInvoke* inst) {
+ DCHECK(inst->GetIntrinsic() == Intrinsics::kIntegerDivideUnsigned ||
+ inst->GetIntrinsic() == Intrinsics::kLongDivideUnsigned);
+
+ HInstruction* divisor = inst->InputAt(1);
+ if (!divisor->IsConstant()) {
+ return;
+ }
+ DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kIntegerDivideUnsigned,
+ divisor->IsIntConstant());
+ DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kLongDivideUnsigned,
+ divisor->IsLongConstant());
+ const bool is_int_intrinsic = inst->GetIntrinsic() == Intrinsics::kIntegerDivideUnsigned;
+ if ((is_int_intrinsic && divisor->AsIntConstant()->IsArithmeticZero()) ||
+ (!is_int_intrinsic && divisor->AsLongConstant()->IsArithmeticZero())) {
+ // We will be throwing, don't constant fold.
+ inst->SetAlwaysThrows(true);
+ GetGraph()->SetHasAlwaysThrowingInvokes(true);
+ return;
+ }
+
+ HInstruction* dividend = inst->InputAt(0);
+ if (!dividend->IsConstant()) {
+ return;
+ }
+ DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kIntegerDivideUnsigned,
+ dividend->IsIntConstant());
+ DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kLongDivideUnsigned,
+ dividend->IsLongConstant());
+
+ if (is_int_intrinsic) {
+ uint32_t dividend_val =
+ dchecked_integral_cast<uint32_t>(dividend->AsIntConstant()->GetValueAsUint64());
+ uint32_t divisor_val =
+ dchecked_integral_cast<uint32_t>(divisor->AsIntConstant()->GetValueAsUint64());
+ inst->ReplaceWith(GetGraph()->GetIntConstant(static_cast<int32_t>(dividend_val / divisor_val)));
+ } else {
+ uint64_t dividend_val = dividend->AsLongConstant()->GetValueAsUint64();
+ uint64_t divisor_val = divisor->AsLongConstant()->GetValueAsUint64();
+ inst->ReplaceWith(
+ GetGraph()->GetLongConstant(static_cast<int64_t>(dividend_val / divisor_val)));
+ }
+
+ inst->GetBlock()->RemoveInstruction(inst);
+}
+
+void HConstantFoldingVisitor::FoldHighestOneBitIntrinsic(HInvoke* inst) {
+ DCHECK(inst->GetIntrinsic() == Intrinsics::kIntegerHighestOneBit ||
+ inst->GetIntrinsic() == Intrinsics::kLongHighestOneBit);
+
+ HInstruction* input = inst->InputAt(0);
+ if (!input->IsConstant()) {
+ return;
+ }
+
+ // Integer and Long intrinsics have different return types.
+ if (inst->GetIntrinsic() == Intrinsics::kIntegerHighestOneBit) {
+ DCHECK(input->IsIntConstant());
+ inst->ReplaceWith(
+ GetGraph()->GetIntConstant(HighestOneBitValue(input->AsIntConstant()->GetValue())));
+ } else {
+ DCHECK(input->IsLongConstant());
+ inst->ReplaceWith(
+ GetGraph()->GetLongConstant(HighestOneBitValue(input->AsLongConstant()->GetValue())));
+ }
+ inst->GetBlock()->RemoveInstruction(inst);
+}
+
+void HConstantFoldingVisitor::FoldLowestOneBitIntrinsic(HInvoke* inst) {
+ DCHECK(inst->GetIntrinsic() == Intrinsics::kIntegerLowestOneBit ||
+ inst->GetIntrinsic() == Intrinsics::kLongLowestOneBit);
+
+ HInstruction* input = inst->InputAt(0);
+ if (!input->IsConstant()) {
+ return;
+ }
+
+ // Integer and Long intrinsics have different return types.
+ if (inst->GetIntrinsic() == Intrinsics::kIntegerLowestOneBit) {
+ DCHECK(input->IsIntConstant());
+ inst->ReplaceWith(
+ GetGraph()->GetIntConstant(LowestOneBitValue(input->AsIntConstant()->GetValue())));
+ } else {
+ DCHECK(input->IsLongConstant());
+ inst->ReplaceWith(
+ GetGraph()->GetLongConstant(LowestOneBitValue(input->AsLongConstant()->GetValue())));
+ }
+ inst->GetBlock()->RemoveInstruction(inst);
+}
+
+void HConstantFoldingVisitor::FoldNumberOfLeadingZerosIntrinsic(HInvoke* inst) {
+ DCHECK(inst->GetIntrinsic() == Intrinsics::kIntegerNumberOfLeadingZeros ||
+ inst->GetIntrinsic() == Intrinsics::kLongNumberOfLeadingZeros);
+
+ HInstruction* input = inst->InputAt(0);
+ if (!input->IsConstant()) {
+ return;
+ }
+
+ DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kIntegerNumberOfLeadingZeros,
+ input->IsIntConstant());
+ DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kLongNumberOfLeadingZeros,
+ input->IsLongConstant());
+
+ // Note that both the Integer and Long intrinsics return an int as a result.
+ int result = input->IsIntConstant() ? JAVASTYLE_CLZ(input->AsIntConstant()->GetValue()) :
+ JAVASTYLE_CLZ(input->AsLongConstant()->GetValue());
+ inst->ReplaceWith(GetGraph()->GetIntConstant(result));
+ inst->GetBlock()->RemoveInstruction(inst);
+}
+
+void HConstantFoldingVisitor::FoldNumberOfTrailingZerosIntrinsic(HInvoke* inst) {
+ DCHECK(inst->GetIntrinsic() == Intrinsics::kIntegerNumberOfTrailingZeros ||
+ inst->GetIntrinsic() == Intrinsics::kLongNumberOfTrailingZeros);
+
+ HInstruction* input = inst->InputAt(0);
+ if (!input->IsConstant()) {
+ return;
+ }
+
+ DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kIntegerNumberOfTrailingZeros,
+ input->IsIntConstant());
+ DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kLongNumberOfTrailingZeros,
+ input->IsLongConstant());
+
+ // Note that both the Integer and Long intrinsics return an int as a result.
+ int result = input->IsIntConstant() ? JAVASTYLE_CTZ(input->AsIntConstant()->GetValue()) :
+ JAVASTYLE_CTZ(input->AsLongConstant()->GetValue());
+ inst->ReplaceWith(GetGraph()->GetIntConstant(result));
+ inst->GetBlock()->RemoveInstruction(inst);
+}
+
void HConstantFoldingVisitor::VisitArrayLength(HArrayLength* inst) {
HInstruction* input = inst->InputAt(0);
if (input->IsLoadString()) {
@@ -299,6 +622,25 @@ void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) {
if (constant != nullptr) {
inst->ReplaceWith(constant);
inst->GetBlock()->RemoveInstruction(inst);
+ } else if (inst->InputAt(0)->IsSelect() && inst->InputAt(0)->HasOnlyOneNonEnvironmentUse()) {
+ // Try to replace the select's inputs in Select+TypeConversion. We can do this if both
+ // inputs to the select are constants, and this is the only use of the select.
+ HSelect* select = inst->InputAt(0)->AsSelect();
+ HConstant* false_constant = inst->TryStaticEvaluation(select->GetFalseValue());
+ if (false_constant == nullptr) {
+ return;
+ }
+ HConstant* true_constant = inst->TryStaticEvaluation(select->GetTrueValue());
+ if (true_constant == nullptr) {
+ return;
+ }
+ DCHECK_EQ(select->InputAt(0), select->GetFalseValue());
+ DCHECK_EQ(select->InputAt(1), select->GetTrueValue());
+ select->ReplaceInput(false_constant, 0);
+ select->ReplaceInput(true_constant, 1);
+ select->UpdateType();
+ inst->ReplaceWith(select);
+ inst->GetBlock()->RemoveInstruction(inst);
}
}
@@ -583,7 +925,7 @@ void InstructionWithAbsorbingInputSimplifier::VisitRem(HRem* instruction) {
block->RemoveInstruction(instruction);
}
- HConstant* cst_right = instruction->GetRight()->AsConstant();
+ HConstant* cst_right = instruction->GetRight()->AsConstantOrNull();
if (((cst_right != nullptr) &&
(cst_right->IsOne() || cst_right->IsMinusOne())) ||
(instruction->GetLeft() == instruction->GetRight())) {
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index 741fd3f822..acdc8e6d3c 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -551,7 +551,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingOnSubtraction) {
*
* The intent of this test is to ensure that all constant expressions
* are actually evaluated at compile-time, thanks to the reverse
- * (forward) post-order traversal of the the dominator tree.
+ * (forward) post-order traversal of the dominator tree.
*
* 16-bit
* offset
diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc
index d9b7652f32..48635cfd15 100644
--- a/compiler/optimizing/constructor_fence_redundancy_elimination.cc
+++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc
@@ -78,7 +78,7 @@ class CFREVisitor final : public HGraphVisitor {
VisitSetLocation(instruction, value);
}
- void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) override {
+ void VisitDeoptimize([[maybe_unused]] HDeoptimize* instruction) override {
// Pessimize: Merge all fences.
MergeCandidateFences();
}
@@ -151,7 +151,7 @@ class CFREVisitor final : public HGraphVisitor {
}
}
- void VisitSetLocation(HInstruction* inst ATTRIBUTE_UNUSED, HInstruction* store_input) {
+ void VisitSetLocation([[maybe_unused]] HInstruction* inst, HInstruction* store_input) {
// An object is considered "published" if it's stored onto the heap.
// Sidenote: A later "LSE" pass can still remove the fence if it proves the
// object doesn't actually escape.
diff --git a/compiler/optimizing/critical_native_abi_fixup_arm.cc b/compiler/optimizing/critical_native_abi_fixup_arm.cc
index 77e156608b..4b1dec05b5 100644
--- a/compiler/optimizing/critical_native_abi_fixup_arm.cc
+++ b/compiler/optimizing/critical_native_abi_fixup_arm.cc
@@ -16,12 +16,8 @@
#include "critical_native_abi_fixup_arm.h"
-#include "art_method-inl.h"
#include "intrinsics.h"
-#include "jni/jni_internal.h"
#include "nodes.h"
-#include "scoped_thread_state_change-inl.h"
-#include "well_known_classes.h"
namespace art HIDDEN {
namespace arm {
@@ -43,46 +39,7 @@ static void FixUpArguments(HInvokeStaticOrDirect* invoke) {
break; // Remaining arguments are passed on stack.
}
if (DataType::IsFloatingPointType(input_type)) {
- bool is_double = (input_type == DataType::Type::kFloat64);
- DataType::Type converted_type = is_double ? DataType::Type::kInt64 : DataType::Type::kInt32;
- ArtMethod* resolved_method = is_double
- ? WellKnownClasses::java_lang_Double_doubleToRawLongBits
- : WellKnownClasses::java_lang_Float_floatToRawIntBits;
- DCHECK(resolved_method != nullptr);
- DCHECK(resolved_method->IsIntrinsic());
- MethodReference target_method(nullptr, 0);
- {
- ScopedObjectAccess soa(Thread::Current());
- target_method =
- MethodReference(resolved_method->GetDexFile(), resolved_method->GetDexMethodIndex());
- }
- // Use arbitrary dispatch info that does not require the method argument.
- HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
- MethodLoadKind::kBssEntry,
- CodePtrLocation::kCallArtMethod,
- /*method_load_data=*/ 0u
- };
- HBasicBlock* block = invoke->GetBlock();
- ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
- HInvokeStaticOrDirect* new_input = new (allocator) HInvokeStaticOrDirect(
- allocator,
- /*number_of_arguments=*/ 1u,
- converted_type,
- invoke->GetDexPc(),
- /*method_reference=*/ MethodReference(nullptr, dex::kDexNoIndex),
- resolved_method,
- dispatch_info,
- kStatic,
- target_method,
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
- !block->GetGraph()->IsDebuggable());
- // The intrinsic has no side effects and does not need environment or dex cache on ARM.
- new_input->SetSideEffects(SideEffects::None());
- IntrinsicOptimizations opt(new_input);
- opt.SetDoesNotNeedEnvironment();
- new_input->SetRawInputAt(0u, input);
- block->InsertInstructionBefore(new_input, invoke);
- invoke->ReplaceInput(new_input, i);
+ InsertFpToIntegralIntrinsic(invoke, i);
}
reg = next_reg;
}
diff --git a/compiler/optimizing/critical_native_abi_fixup_riscv64.cc b/compiler/optimizing/critical_native_abi_fixup_riscv64.cc
new file mode 100644
index 0000000000..c2c98d1df9
--- /dev/null
+++ b/compiler/optimizing/critical_native_abi_fixup_riscv64.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "critical_native_abi_fixup_riscv64.h"
+
+#include "arch/riscv64/jni_frame_riscv64.h"
+#include "intrinsics.h"
+#include "nodes.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+// Fix up FP arguments passed in core registers for call to @CriticalNative by inserting fake calls
+// to Float.floatToRawIntBits() or Double.doubleToRawLongBits() to satisfy type consistency checks.
+static void FixUpArguments(HInvokeStaticOrDirect* invoke) {
+ DCHECK_EQ(invoke->GetCodePtrLocation(), CodePtrLocation::kCallCriticalNative);
+ size_t core_reg = 0u;
+ size_t fp_reg = 0u;
+ for (size_t i = 0, num_args = invoke->GetNumberOfArguments(); i != num_args; ++i) {
+ if (core_reg == kMaxIntLikeArgumentRegisters) {
+ break; // Remaining arguments are passed in FP regs or on the stack.
+ }
+ HInstruction* input = invoke->InputAt(i);
+ DataType::Type input_type = input->GetType();
+ if (DataType::IsFloatingPointType(input_type)) {
+ if (fp_reg < kMaxFloatOrDoubleArgumentRegisters) {
+ ++fp_reg;
+ } else {
+ DCHECK_LT(core_reg, kMaxIntLikeArgumentRegisters);
+ InsertFpToIntegralIntrinsic(invoke, i);
+ ++core_reg;
+ }
+ } else {
+ ++core_reg;
+ }
+ }
+}
+
+bool CriticalNativeAbiFixupRiscv64::Run() {
+ if (!graph_->HasDirectCriticalNativeCall()) {
+ return false;
+ }
+
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ if (instruction->IsInvokeStaticOrDirect() &&
+ instruction->AsInvokeStaticOrDirect()->GetCodePtrLocation() ==
+ CodePtrLocation::kCallCriticalNative) {
+ FixUpArguments(instruction->AsInvokeStaticOrDirect());
+ }
+ }
+ }
+ return true;
+}
+
+} // namespace riscv64
+} // namespace art
diff --git a/compiler/optimizing/critical_native_abi_fixup_riscv64.h b/compiler/optimizing/critical_native_abi_fixup_riscv64.h
new file mode 100644
index 0000000000..dc76cff2b8
--- /dev/null
+++ b/compiler/optimizing/critical_native_abi_fixup_riscv64.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CRITICAL_NATIVE_ABI_FIXUP_RISCV64_H_
+#define ART_COMPILER_OPTIMIZING_CRITICAL_NATIVE_ABI_FIXUP_RISCV64_H_
+
+#include "base/macros.h"
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+class CriticalNativeAbiFixupRiscv64 : public HOptimization {
+ public:
+ CriticalNativeAbiFixupRiscv64(HGraph* graph, OptimizingCompilerStats* stats)
+ : HOptimization(graph, kCriticalNativeAbiFixupRiscv64PassName, stats) {}
+
+ static constexpr const char* kCriticalNativeAbiFixupRiscv64PassName =
+ "critical_native_abi_fixup_riscv64";
+
+ bool Run() override;
+};
+
+} // namespace riscv64
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_CRITICAL_NATIVE_ABI_FIXUP_RISCV64_H_
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index cf49e39849..5b420db5be 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -24,6 +24,7 @@
#include "base/scoped_arena_containers.h"
#include "base/stl_util.h"
#include "optimizing/nodes.h"
+#include "optimizing/nodes_vector.h"
#include "ssa_phi_elimination.h"
namespace art HIDDEN {
@@ -311,9 +312,7 @@ bool HDeadCodeElimination::SimplifyAlwaysThrows() {
// We need to re-analyze the graph in order to run DCE afterwards.
if (rerun_dominance_and_loop_analysis) {
- graph_->ClearLoopInformation();
- graph_->ClearDominanceInformation();
- graph_->BuildDominatorTree();
+ graph_->RecomputeDominatorTree();
return true;
}
return false;
@@ -437,9 +436,7 @@ bool HDeadCodeElimination::SimplifyIfs() {
// We need to re-analyze the graph in order to run DCE afterwards.
if (simplified_one_or_more_ifs) {
if (rerun_dominance_and_loop_analysis) {
- graph_->ClearLoopInformation();
- graph_->ClearDominanceInformation();
- graph_->BuildDominatorTree();
+ graph_->RecomputeDominatorTree();
} else {
graph_->ClearDominanceInformation();
// We have introduced critical edges, remove them.
@@ -773,6 +770,93 @@ bool HDeadCodeElimination::RemoveUnneededTries() {
}
}
+bool HDeadCodeElimination::RemoveEmptyIfs() {
+ bool did_opt = false;
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
+ if (!block->EndsWithIf()) {
+ continue;
+ }
+
+ HIf* if_instr = block->GetLastInstruction()->AsIf();
+ HBasicBlock* true_block = if_instr->IfTrueSuccessor();
+ HBasicBlock* false_block = if_instr->IfFalseSuccessor();
+
+ // We can use `visited_blocks` to detect cases like
+ // 1
+ // / \
+ // 2 3
+ // \ /
+ // 4 ...
+ // | /
+ // 5
+ // where 2, 3, and 4 are single HGoto blocks, and block 5 has Phis.
+ ScopedArenaAllocator allocator(graph_->GetArenaStack());
+ ScopedArenaHashSet<HBasicBlock*> visited_blocks(allocator.Adapter(kArenaAllocDCE));
+ HBasicBlock* merge_true = true_block;
+ visited_blocks.insert(merge_true);
+ while (merge_true->IsSingleGoto()) {
+ merge_true = merge_true->GetSuccessors()[0];
+ visited_blocks.insert(merge_true);
+ }
+
+ HBasicBlock* merge_false = false_block;
+ while (visited_blocks.find(merge_false) == visited_blocks.end() &&
+ merge_false->IsSingleGoto()) {
+ merge_false = merge_false->GetSuccessors()[0];
+ }
+
+ if (visited_blocks.find(merge_false) == visited_blocks.end() ||
+ !merge_false->GetPhis().IsEmpty()) {
+ // TODO(solanes): We could allow Phis iff both branches have the same value for all Phis. This
+ // may not be covered by SsaRedundantPhiElimination in cases like `HPhi[A,A,B]` where the Phi
+ // itself is not redundant for the general case but it is for a pair of branches.
+ continue;
+ }
+
+ // Data structures to help remove now-dead instructions.
+ ScopedArenaQueue<HInstruction*> maybe_remove(allocator.Adapter(kArenaAllocDCE));
+ ScopedArenaHashSet<HInstruction*> visited(allocator.Adapter(kArenaAllocDCE));
+ maybe_remove.push(if_instr->InputAt(0));
+
+ // Swap HIf with HGoto
+ block->ReplaceAndRemoveInstructionWith(
+ if_instr, new (graph_->GetAllocator()) HGoto(if_instr->GetDexPc()));
+
+ // Reconnect blocks
+ block->RemoveSuccessor(true_block);
+ block->RemoveSuccessor(false_block);
+ true_block->RemovePredecessor(block);
+ false_block->RemovePredecessor(block);
+ block->AddSuccessor(merge_false);
+
+ // Remove now dead instructions e.g. comparisons that are only used as input to the if
+ // instruction. This can allow for further removal of other empty ifs.
+ while (!maybe_remove.empty()) {
+ HInstruction* instr = maybe_remove.front();
+ maybe_remove.pop();
+ if (visited.find(instr) != visited.end()) {
+ continue;
+ }
+ visited.insert(instr);
+ if (instr->IsDeadAndRemovable()) {
+ for (HInstruction* input : instr->GetInputs()) {
+ maybe_remove.push(input);
+ }
+ instr->GetBlock()->RemoveInstructionOrPhi(instr);
+ MaybeRecordStat(stats_, MethodCompilationStat::kRemovedDeadInstruction);
+ }
+ }
+
+ did_opt = true;
+ }
+
+ if (did_opt) {
+ graph_->RecomputeDominatorTree();
+ }
+
+ return did_opt;
+}
+
bool HDeadCodeElimination::RemoveDeadBlocks(bool force_recomputation,
bool force_loop_recomputation) {
DCHECK_IMPLIES(force_loop_recomputation, force_recomputation);
@@ -807,9 +891,7 @@ bool HDeadCodeElimination::RemoveDeadBlocks(bool force_recomputation,
// dominator tree and try block membership.
if (removed_one_or_more_blocks || force_recomputation) {
if (rerun_dominance_and_loop_analysis || force_loop_recomputation) {
- graph_->ClearLoopInformation();
- graph_->ClearDominanceInformation();
- graph_->BuildDominatorTree();
+ graph_->RecomputeDominatorTree();
} else {
graph_->ClearDominanceInformation();
graph_->ComputeDominanceInformation();
@@ -837,12 +919,23 @@ void HDeadCodeElimination::RemoveDeadInstructions() {
MaybeRecordStat(stats_, MethodCompilationStat::kRemovedDeadInstruction);
}
}
+
+ // Same for Phis.
+ for (HBackwardInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+ DCHECK(phi_it.Current()->IsPhi());
+ HPhi* phi = phi_it.Current()->AsPhi();
+ if (phi->IsDeadAndRemovable()) {
+ block->RemovePhi(phi);
+ MaybeRecordStat(stats_, MethodCompilationStat::kRemovedDeadPhi);
+ }
+ }
}
}
void HDeadCodeElimination::UpdateGraphFlags() {
bool has_monitor_operations = false;
- bool has_simd = false;
+ bool has_traditional_simd = false;
+ bool has_predicated_simd = false;
bool has_bounds_checks = false;
bool has_always_throwing_invokes = false;
@@ -852,7 +945,12 @@ void HDeadCodeElimination::UpdateGraphFlags() {
if (instruction->IsMonitorOperation()) {
has_monitor_operations = true;
} else if (instruction->IsVecOperation()) {
- has_simd = true;
+ HVecOperation* vec_instruction = instruction->AsVecOperation();
+ if (vec_instruction->IsPredicated()) {
+ has_predicated_simd = true;
+ } else {
+ has_traditional_simd = true;
+ }
} else if (instruction->IsBoundsCheck()) {
has_bounds_checks = true;
} else if (instruction->IsInvoke() && instruction->AsInvoke()->AlwaysThrows()) {
@@ -862,7 +960,8 @@ void HDeadCodeElimination::UpdateGraphFlags() {
}
graph_->SetHasMonitorOperations(has_monitor_operations);
- graph_->SetHasSIMD(has_simd);
+ graph_->SetHasTraditionalSIMD(has_traditional_simd);
+ graph_->SetHasPredicatedSIMD(has_predicated_simd);
graph_->SetHasBoundsChecks(has_bounds_checks);
graph_->SetHasAlwaysThrowingInvokes(has_always_throwing_invokes);
}
@@ -877,6 +976,7 @@ bool HDeadCodeElimination::Run() {
bool did_any_simplification = false;
did_any_simplification |= SimplifyAlwaysThrows();
did_any_simplification |= SimplifyIfs();
+ did_any_simplification |= RemoveEmptyIfs();
did_any_simplification |= RemoveDeadBlocks();
// We call RemoveDeadBlocks before RemoveUnneededTries to remove the dead blocks from the
// previous optimizations. Otherwise, we might detect that a try has throwing instructions but
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index ddd01f7103..789962f93c 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -40,6 +40,17 @@ class HDeadCodeElimination : public HOptimization {
private:
void MaybeRecordDeadBlock(HBasicBlock* block);
void MaybeRecordSimplifyIf();
+ // Detects and remove ifs that are empty e.g. it turns
+ // 1
+ // / \
+ // 2 3
+ // \ /
+ // 4
+ // where 2 and 3 are single goto blocks and 4 doesn't contain a Phi into:
+ // 1
+ // |
+ // 4
+ bool RemoveEmptyIfs();
// If `force_recomputation` is true, we will recompute the dominance information even when we
// didn't delete any blocks. `force_loop_recomputation` is similar but it also forces the loop
// information recomputation.
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index b789434add..4082ec58fc 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -99,8 +99,9 @@ TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) {
// Expected difference after dead code elimination.
diff_t expected_diff = {
- { " 3: IntConstant [9, 8, 5]\n", " 3: IntConstant [8, 5]\n" },
- { " 8: Phi(4, 3) [9]\n", " 8: Phi(4, 3)\n" },
+ { " 3: IntConstant [9, 8, 5]\n", " 3: IntConstant [5]\n" },
+ { " 4: IntConstant [8, 5]\n", " 4: IntConstant [5]\n" },
+ { " 8: Phi(4, 3) [9]\n", removed },
{ " 9: Add(8, 3)\n", removed }
};
std::string expected_after = Patch(expected_before, expected_diff);
@@ -114,7 +115,7 @@ TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) {
*
* The intent of this test is to ensure that all dead instructions are
* actually pruned at compile-time, thanks to the (backward)
- * post-order traversal of the the dominator tree.
+ * post-order traversal of the dominator tree.
*
* 16-bit
* offset
diff --git a/compiler/optimizing/execution_subgraph.cc b/compiler/optimizing/execution_subgraph.cc
deleted file mode 100644
index 06aabbe040..0000000000
--- a/compiler/optimizing/execution_subgraph.cc
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * Copyright (C) 2020 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "execution_subgraph.h"
-
-#include <algorithm>
-#include <unordered_set>
-
-#include "android-base/macros.h"
-#include "base/arena_allocator.h"
-#include "base/arena_bit_vector.h"
-#include "base/globals.h"
-#include "base/scoped_arena_allocator.h"
-#include "nodes.h"
-
-namespace art HIDDEN {
-
-ExecutionSubgraph::ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator)
- : graph_(graph),
- allocator_(allocator),
- allowed_successors_(graph_->GetBlocks().size(),
- ~(std::bitset<kMaxFilterableSuccessors> {}),
- allocator_->Adapter(kArenaAllocLSA)),
- unreachable_blocks_(
- allocator_, graph_->GetBlocks().size(), /*expandable=*/ false, kArenaAllocLSA),
- valid_(true),
- needs_prune_(false),
- finalized_(false) {
- if (valid_) {
- DCHECK(std::all_of(graph->GetBlocks().begin(), graph->GetBlocks().end(), [](HBasicBlock* it) {
- return it == nullptr || it->GetSuccessors().size() <= kMaxFilterableSuccessors;
- }));
- }
-}
-
-void ExecutionSubgraph::RemoveBlock(const HBasicBlock* to_remove) {
- if (!valid_) {
- return;
- }
- uint32_t id = to_remove->GetBlockId();
- if (unreachable_blocks_.IsBitSet(id)) {
- if (kIsDebugBuild) {
- // This isn't really needed but it's good to have this so it functions as
- // a DCHECK that we always call Prune after removing any block.
- needs_prune_ = true;
- }
- return;
- }
- unreachable_blocks_.SetBit(id);
- for (HBasicBlock* pred : to_remove->GetPredecessors()) {
- std::bitset<kMaxFilterableSuccessors> allowed_successors {};
- // ZipCount iterates over both the successors and the index of them at the same time.
- for (auto [succ, i] : ZipCount(MakeIterationRange(pred->GetSuccessors()))) {
- if (succ != to_remove) {
- allowed_successors.set(i);
- }
- }
- LimitBlockSuccessors(pred, allowed_successors);
- }
-}
-
-// Removes sink nodes.
-void ExecutionSubgraph::Prune() {
- if (UNLIKELY(!valid_)) {
- return;
- }
- needs_prune_ = false;
- // This is the record of the edges that were both (1) explored and (2) reached
- // the exit node.
- {
- // Allocator for temporary values.
- ScopedArenaAllocator temporaries(graph_->GetArenaStack());
- ScopedArenaVector<std::bitset<kMaxFilterableSuccessors>> results(
- graph_->GetBlocks().size(), temporaries.Adapter(kArenaAllocLSA));
- unreachable_blocks_.ClearAllBits();
- // Fills up the 'results' map with what we need to add to update
- // allowed_successors in order to prune sink nodes.
- bool start_reaches_end = false;
- // This is basically a DFS of the graph with some edges skipped.
- {
- const size_t num_blocks = graph_->GetBlocks().size();
- constexpr ssize_t kUnvisitedSuccIdx = -1;
- ArenaBitVector visiting(&temporaries, num_blocks, false, kArenaAllocLSA);
- // How many of the successors of each block we have already examined. This
- // has three states.
- // (1) kUnvisitedSuccIdx: we have not examined any edges,
- // (2) 0 <= val < # of successors: we have examined 'val' successors/are
- // currently examining successors_[val],
- // (3) kMaxFilterableSuccessors: We have examined all of the successors of
- // the block (the 'result' is final).
- ScopedArenaVector<ssize_t> last_succ_seen(
- num_blocks, kUnvisitedSuccIdx, temporaries.Adapter(kArenaAllocLSA));
- // A stack of which blocks we are visiting in this DFS traversal. Does not
- // include the current-block. Used with last_succ_seen to figure out which
- // bits to set if we find a path to the end/loop.
- ScopedArenaVector<uint32_t> current_path(temporaries.Adapter(kArenaAllocLSA));
- // Just ensure we have enough space. The allocator will be cleared shortly
- // anyway so this is fast.
- current_path.reserve(num_blocks);
- // Current block we are examining. Modified only by 'push_block' and 'pop_block'
- const HBasicBlock* cur_block = graph_->GetEntryBlock();
- // Used to note a recur where we will start iterating on 'blk' and save
- // where we are. We must 'continue' immediately after this.
- auto push_block = [&](const HBasicBlock* blk) {
- DCHECK(std::find(current_path.cbegin(), current_path.cend(), cur_block->GetBlockId()) ==
- current_path.end());
- if (kIsDebugBuild) {
- std::for_each(current_path.cbegin(), current_path.cend(), [&](auto id) {
- DCHECK_GT(last_succ_seen[id], kUnvisitedSuccIdx) << id;
- DCHECK_LT(last_succ_seen[id], static_cast<ssize_t>(kMaxFilterableSuccessors)) << id;
- });
- }
- current_path.push_back(cur_block->GetBlockId());
- visiting.SetBit(cur_block->GetBlockId());
- cur_block = blk;
- };
- // Used to note that we have fully explored a block and should return back
- // up. Sets cur_block appropriately. We must 'continue' immediately after
- // calling this.
- auto pop_block = [&]() {
- if (UNLIKELY(current_path.empty())) {
- // Should only happen if entry-blocks successors are exhausted.
- DCHECK_GE(last_succ_seen[graph_->GetEntryBlock()->GetBlockId()],
- static_cast<ssize_t>(graph_->GetEntryBlock()->GetSuccessors().size()));
- cur_block = nullptr;
- } else {
- const HBasicBlock* last = graph_->GetBlocks()[current_path.back()];
- visiting.ClearBit(current_path.back());
- current_path.pop_back();
- cur_block = last;
- }
- };
- // Mark the current path as a path to the end. This is in contrast to paths
- // that end in (eg) removed blocks.
- auto propagate_true = [&]() {
- for (uint32_t id : current_path) {
- DCHECK_GT(last_succ_seen[id], kUnvisitedSuccIdx);
- DCHECK_LT(last_succ_seen[id], static_cast<ssize_t>(kMaxFilterableSuccessors));
- results[id].set(last_succ_seen[id]);
- }
- };
- ssize_t num_entry_succ = graph_->GetEntryBlock()->GetSuccessors().size();
- // As long as the entry-block has not explored all successors we still have
- // work to do.
- const uint32_t entry_block_id = graph_->GetEntryBlock()->GetBlockId();
- while (num_entry_succ > last_succ_seen[entry_block_id]) {
- DCHECK(cur_block != nullptr);
- uint32_t id = cur_block->GetBlockId();
- DCHECK((current_path.empty() && cur_block == graph_->GetEntryBlock()) ||
- current_path.front() == graph_->GetEntryBlock()->GetBlockId())
- << "current path size: " << current_path.size()
- << " cur_block id: " << cur_block->GetBlockId() << " entry id "
- << graph_->GetEntryBlock()->GetBlockId();
- if (visiting.IsBitSet(id)) {
- // TODO We should support infinite loops as well.
- start_reaches_end = false;
- break;
- }
- std::bitset<kMaxFilterableSuccessors>& result = results[id];
- if (cur_block == graph_->GetExitBlock()) {
- start_reaches_end = true;
- propagate_true();
- pop_block();
- continue;
- } else if (last_succ_seen[id] == kMaxFilterableSuccessors) {
- // Already fully explored.
- if (result.any()) {
- propagate_true();
- }
- pop_block();
- continue;
- }
- // NB This is a pointer. Modifications modify the last_succ_seen.
- ssize_t* cur_succ = &last_succ_seen[id];
- std::bitset<kMaxFilterableSuccessors> succ_bitmap = GetAllowedSuccessors(cur_block);
- // Get next successor allowed.
- while (++(*cur_succ) < static_cast<ssize_t>(kMaxFilterableSuccessors) &&
- !succ_bitmap.test(*cur_succ)) {
- DCHECK_GE(*cur_succ, 0);
- }
- if (*cur_succ >= static_cast<ssize_t>(cur_block->GetSuccessors().size())) {
- // No more successors. Mark that we've checked everything. Later visits
- // to this node can use the existing data.
- DCHECK_LE(*cur_succ, static_cast<ssize_t>(kMaxFilterableSuccessors));
- *cur_succ = kMaxFilterableSuccessors;
- pop_block();
- continue;
- }
- const HBasicBlock* nxt = cur_block->GetSuccessors()[*cur_succ];
- DCHECK(nxt != nullptr) << "id: " << *cur_succ
- << " max: " << cur_block->GetSuccessors().size();
- if (visiting.IsBitSet(nxt->GetBlockId())) {
- // This is a loop. Mark it and continue on. Mark allowed-successor on
- // this block's results as well.
- result.set(*cur_succ);
- propagate_true();
- } else {
- // Not a loop yet. Recur.
- push_block(nxt);
- }
- }
- }
- // If we can't reach the end then there is no path through the graph without
- // hitting excluded blocks
- if (UNLIKELY(!start_reaches_end)) {
- valid_ = false;
- return;
- }
- // Mark blocks we didn't see in the ReachesEnd flood-fill
- for (const HBasicBlock* blk : graph_->GetBlocks()) {
- if (blk != nullptr &&
- results[blk->GetBlockId()].none() &&
- blk != graph_->GetExitBlock() &&
- blk != graph_->GetEntryBlock()) {
- // We never visited this block, must be unreachable.
- unreachable_blocks_.SetBit(blk->GetBlockId());
- }
- }
- // write the new data.
- memcpy(allowed_successors_.data(),
- results.data(),
- results.size() * sizeof(std::bitset<kMaxFilterableSuccessors>));
- }
- RecalculateExcludedCohort();
-}
-
-void ExecutionSubgraph::RemoveConcavity() {
- if (UNLIKELY(!valid_)) {
- return;
- }
- DCHECK(!needs_prune_);
- for (const HBasicBlock* blk : graph_->GetBlocks()) {
- if (blk == nullptr || unreachable_blocks_.IsBitSet(blk->GetBlockId())) {
- continue;
- }
- uint32_t blkid = blk->GetBlockId();
- if (std::any_of(unreachable_blocks_.Indexes().begin(),
- unreachable_blocks_.Indexes().end(),
- [&](uint32_t skipped) { return graph_->PathBetween(skipped, blkid); }) &&
- std::any_of(unreachable_blocks_.Indexes().begin(),
- unreachable_blocks_.Indexes().end(),
- [&](uint32_t skipped) { return graph_->PathBetween(blkid, skipped); })) {
- RemoveBlock(blk);
- }
- }
- Prune();
-}
-
-void ExecutionSubgraph::RecalculateExcludedCohort() {
- DCHECK(!needs_prune_);
- excluded_list_.emplace(allocator_->Adapter(kArenaAllocLSA));
- ScopedArenaVector<ExcludedCohort>& res = excluded_list_.value();
- // Make a copy of unreachable_blocks_;
- ArenaBitVector unreachable(allocator_, graph_->GetBlocks().size(), false, kArenaAllocLSA);
- unreachable.Copy(&unreachable_blocks_);
- // Split cohorts with union-find
- while (unreachable.IsAnyBitSet()) {
- res.emplace_back(allocator_, graph_);
- ExcludedCohort& cohort = res.back();
- // We don't allocate except for the queue beyond here so create another arena to save memory.
- ScopedArenaAllocator alloc(graph_->GetArenaStack());
- ScopedArenaQueue<const HBasicBlock*> worklist(alloc.Adapter(kArenaAllocLSA));
- // Select an arbitrary node
- const HBasicBlock* first = graph_->GetBlocks()[unreachable.GetHighestBitSet()];
- worklist.push(first);
- do {
- // Flood-fill both forwards and backwards.
- const HBasicBlock* cur = worklist.front();
- worklist.pop();
- if (!unreachable.IsBitSet(cur->GetBlockId())) {
- // Already visited or reachable somewhere else.
- continue;
- }
- unreachable.ClearBit(cur->GetBlockId());
- cohort.blocks_.SetBit(cur->GetBlockId());
- // don't bother filtering here, it's done next go-around
- for (const HBasicBlock* pred : cur->GetPredecessors()) {
- worklist.push(pred);
- }
- for (const HBasicBlock* succ : cur->GetSuccessors()) {
- worklist.push(succ);
- }
- } while (!worklist.empty());
- }
- // Figure out entry & exit nodes.
- for (ExcludedCohort& cohort : res) {
- DCHECK(cohort.blocks_.IsAnyBitSet());
- auto is_external = [&](const HBasicBlock* ext) -> bool {
- return !cohort.blocks_.IsBitSet(ext->GetBlockId());
- };
- for (const HBasicBlock* blk : cohort.Blocks()) {
- const auto& preds = blk->GetPredecessors();
- const auto& succs = blk->GetSuccessors();
- if (std::any_of(preds.cbegin(), preds.cend(), is_external)) {
- cohort.entry_blocks_.SetBit(blk->GetBlockId());
- }
- if (std::any_of(succs.cbegin(), succs.cend(), is_external)) {
- cohort.exit_blocks_.SetBit(blk->GetBlockId());
- }
- }
- }
-}
-
-std::ostream& operator<<(std::ostream& os, const ExecutionSubgraph::ExcludedCohort& ex) {
- ex.Dump(os);
- return os;
-}
-
-void ExecutionSubgraph::ExcludedCohort::Dump(std::ostream& os) const {
- auto dump = [&](BitVecBlockRange arr) {
- os << "[";
- bool first = true;
- for (const HBasicBlock* b : arr) {
- if (!first) {
- os << ", ";
- }
- first = false;
- os << b->GetBlockId();
- }
- os << "]";
- };
- auto dump_blocks = [&]() {
- os << "[";
- bool first = true;
- for (const HBasicBlock* b : Blocks()) {
- if (!entry_blocks_.IsBitSet(b->GetBlockId()) && !exit_blocks_.IsBitSet(b->GetBlockId())) {
- if (!first) {
- os << ", ";
- }
- first = false;
- os << b->GetBlockId();
- }
- }
- os << "]";
- };
-
- os << "{ entry: ";
- dump(EntryBlocks());
- os << ", interior: ";
- dump_blocks();
- os << ", exit: ";
- dump(ExitBlocks());
- os << "}";
-}
-
-} // namespace art
diff --git a/compiler/optimizing/execution_subgraph.h b/compiler/optimizing/execution_subgraph.h
deleted file mode 100644
index 5ddf17de60..0000000000
--- a/compiler/optimizing/execution_subgraph.h
+++ /dev/null
@@ -1,365 +0,0 @@
-/*
- * Copyright (C) 2020 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_
-#define ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_
-
-#include <algorithm>
-#include <sstream>
-
-#include "base/arena_allocator.h"
-#include "base/arena_bit_vector.h"
-#include "base/arena_containers.h"
-#include "base/array_ref.h"
-#include "base/bit_vector-inl.h"
-#include "base/globals.h"
-#include "base/iteration_range.h"
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "base/scoped_arena_allocator.h"
-#include "base/scoped_arena_containers.h"
-#include "base/stl_util.h"
-#include "base/transform_iterator.h"
-#include "nodes.h"
-
-namespace art HIDDEN {
-
-// Helper for transforming blocks to block_ids.
-class BlockToBlockIdTransformer {
- public:
- BlockToBlockIdTransformer(BlockToBlockIdTransformer&&) = default;
- BlockToBlockIdTransformer(const BlockToBlockIdTransformer&) = default;
- BlockToBlockIdTransformer() {}
-
- inline uint32_t operator()(const HBasicBlock* b) const {
- return b->GetBlockId();
- }
-};
-
-// Helper for transforming block ids to blocks.
-class BlockIdToBlockTransformer {
- public:
- BlockIdToBlockTransformer(BlockIdToBlockTransformer&&) = default;
- BlockIdToBlockTransformer(const BlockIdToBlockTransformer&) = default;
- explicit BlockIdToBlockTransformer(const HGraph* graph) : graph_(graph) {}
-
- inline const HGraph* GetGraph() const {
- return graph_;
- }
-
- inline HBasicBlock* GetBlock(uint32_t id) const {
- DCHECK_LT(id, graph_->GetBlocks().size()) << graph_->PrettyMethod();
- HBasicBlock* blk = graph_->GetBlocks()[id];
- DCHECK(blk != nullptr);
- return blk;
- }
-
- inline HBasicBlock* operator()(uint32_t id) const {
- return GetBlock(id);
- }
-
- private:
- const HGraph* const graph_;
-};
-
-class BlockIdFilterThunk {
- public:
- explicit BlockIdFilterThunk(const BitVector& i) : inner_(i) {}
- BlockIdFilterThunk(BlockIdFilterThunk&& other) noexcept = default;
- BlockIdFilterThunk(const BlockIdFilterThunk&) = default;
-
- bool operator()(const HBasicBlock* b) const {
- return inner_.IsBitSet(b->GetBlockId());
- }
-
- private:
- const BitVector& inner_;
-};
-
-// A representation of a particular section of the graph. The graph is split
-// into an excluded and included area and is used to track escapes.
-//
-// This object is a view of the graph and is not updated as the graph is
-// changed.
-//
-// This is implemented by removing various escape points from the subgraph using
-// the 'RemoveBlock' function. Once all required blocks are removed one will
-// 'Finalize' the subgraph. This will extend the removed area to include:
-// (1) Any block which inevitably leads to (post-dominates) a removed block
-// (2) any block which is between 2 removed blocks
-//
-// This allows us to create a set of 'ExcludedCohorts' which are the
-// well-connected subsets of the graph made up of removed blocks. These cohorts
-// have a set of entry and exit blocks which act as the boundary of the cohort.
-// Since we removed blocks between 2 excluded blocks it is impossible for any
-// cohort-exit block to reach any cohort-entry block. This means we can use the
-// boundary between the cohort and the rest of the graph to insert
-// materialization blocks for partial LSE.
-//
-// TODO We really should expand this to take into account where the object
-// allocation takes place directly. Currently we always act as though it were
-// allocated in the entry block. This is a massively simplifying assumption but
-// means we can't partially remove objects that are repeatedly allocated in a
-// loop.
-class ExecutionSubgraph : public DeletableArenaObject<kArenaAllocLSA> {
- public:
- using BitVecBlockRange =
- IterationRange<TransformIterator<BitVector::IndexIterator, BlockIdToBlockTransformer>>;
- using FilteredBitVecBlockRange = IterationRange<
- FilterIterator<ArenaVector<HBasicBlock*>::const_iterator, BlockIdFilterThunk>>;
-
- // A set of connected blocks which are connected and removed from the
- // ExecutionSubgraph. See above comment for explanation.
- class ExcludedCohort : public ArenaObject<kArenaAllocLSA> {
- public:
- ExcludedCohort(ExcludedCohort&&) = default;
- ExcludedCohort(const ExcludedCohort&) = delete;
- explicit ExcludedCohort(ScopedArenaAllocator* allocator, HGraph* graph)
- : graph_(graph),
- entry_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA),
- exit_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA),
- blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA) {}
-
- ~ExcludedCohort() = default;
-
- // All blocks in the cohort.
- BitVecBlockRange Blocks() const {
- return BlockIterRange(blocks_);
- }
-
- // Blocks that have predecessors outside of the cohort. These blocks will
- // need to have PHIs/control-flow added to create the escaping value.
- BitVecBlockRange EntryBlocks() const {
- return BlockIterRange(entry_blocks_);
- }
-
- FilteredBitVecBlockRange EntryBlocksReversePostOrder() const {
- return Filter(MakeIterationRange(graph_->GetReversePostOrder()),
- BlockIdFilterThunk(entry_blocks_));
- }
-
- bool IsEntryBlock(const HBasicBlock* blk) const {
- return entry_blocks_.IsBitSet(blk->GetBlockId());
- }
-
- // Blocks that have successors outside of the cohort. The successors of
- // these blocks will need to have PHI's to restore state.
- BitVecBlockRange ExitBlocks() const {
- return BlockIterRange(exit_blocks_);
- }
-
- bool operator==(const ExcludedCohort& other) const {
- return blocks_.Equal(&other.blocks_);
- }
-
- bool ContainsBlock(const HBasicBlock* blk) const {
- return blocks_.IsBitSet(blk->GetBlockId());
- }
-
- // Returns true if there is a path from 'blk' to any block in this cohort.
- // NB blocks contained within the cohort are not considered to be succeeded
- // by the cohort (i.e. this function will return false).
- bool SucceedsBlock(const HBasicBlock* blk) const {
- if (ContainsBlock(blk)) {
- return false;
- }
- auto idxs = entry_blocks_.Indexes();
- return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t entry) -> bool {
- return blk->GetGraph()->PathBetween(blk->GetBlockId(), entry);
- });
- }
-
- // Returns true if there is a path from any block in this cohort to 'blk'.
- // NB blocks contained within the cohort are not considered to be preceded
- // by the cohort (i.e. this function will return false).
- bool PrecedesBlock(const HBasicBlock* blk) const {
- if (ContainsBlock(blk)) {
- return false;
- }
- auto idxs = exit_blocks_.Indexes();
- return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t exit) -> bool {
- return blk->GetGraph()->PathBetween(exit, blk->GetBlockId());
- });
- }
-
- void Dump(std::ostream& os) const;
-
- private:
- BitVecBlockRange BlockIterRange(const ArenaBitVector& bv) const {
- auto indexes = bv.Indexes();
- BitVecBlockRange res = MakeTransformRange(indexes, BlockIdToBlockTransformer(graph_));
- return res;
- }
-
- ExcludedCohort() = delete;
-
- HGraph* graph_;
- ArenaBitVector entry_blocks_;
- ArenaBitVector exit_blocks_;
- ArenaBitVector blocks_;
-
- friend class ExecutionSubgraph;
- friend class LoadStoreAnalysisTest;
- };
-
- // The number of successors we can track on a single block. Graphs which
- // contain a block with a branching factor greater than this will not be
- // analysed. This is used to both limit the memory usage of analysis to
- // reasonable levels and ensure that the analysis will complete in a
- // reasonable amount of time. It also simplifies the implementation somewhat
- // to have a constant branching factor.
- static constexpr uint32_t kMaxFilterableSuccessors = 8;
-
- // Instantiate a subgraph. The subgraph can be instantiated only if partial-escape
- // analysis is desired (eg not when being used for instruction scheduling) and
- // when the branching factor in the graph is not too high. These conditions
- // are determined once and passed down for performance reasons.
- ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator);
-
- void Invalidate() {
- valid_ = false;
- }
-
- // A block is contained by the ExecutionSubgraph if it is reachable. This
- // means it has not been removed explicitly or via pruning/concavity removal.
- // Finalization is needed to call this function.
- // See RemoveConcavity and Prune for more information.
- bool ContainsBlock(const HBasicBlock* blk) const {
- DCHECK_IMPLIES(finalized_, !needs_prune_);
- if (!valid_) {
- return false;
- }
- return !unreachable_blocks_.IsBitSet(blk->GetBlockId());
- }
-
- // Mark the block as removed from the subgraph.
- void RemoveBlock(const HBasicBlock* to_remove);
-
- // Called when no more updates will be done to the subgraph. Calculate the
- // final subgraph
- void Finalize() {
- Prune();
- RemoveConcavity();
- finalized_ = true;
- }
-
- BitVecBlockRange UnreachableBlocks() const {
- auto idxs = unreachable_blocks_.Indexes();
- return MakeTransformRange(idxs, BlockIdToBlockTransformer(graph_));
- }
-
- // Returns true if all allowed execution paths from start eventually reach the
- // graph's exit block (or diverge).
- bool IsValid() const {
- return valid_;
- }
-
- ArrayRef<const ExcludedCohort> GetExcludedCohorts() const {
- DCHECK_IMPLIES(valid_, !needs_prune_);
- if (!valid_ || !unreachable_blocks_.IsAnyBitSet()) {
- return ArrayRef<const ExcludedCohort>();
- } else {
- return ArrayRef<const ExcludedCohort>(*excluded_list_);
- }
- }
-
- // Helper class to create reachable blocks iterator.
- class ContainsFunctor {
- public:
- bool operator()(HBasicBlock* blk) const {
- return subgraph_->ContainsBlock(blk);
- }
-
- private:
- explicit ContainsFunctor(const ExecutionSubgraph* subgraph) : subgraph_(subgraph) {}
- const ExecutionSubgraph* const subgraph_;
- friend class ExecutionSubgraph;
- };
- // Returns an iterator over reachable blocks (filtered as we go). This is primarilly for testing.
- IterationRange<
- FilterIterator<typename ArenaVector<HBasicBlock*>::const_iterator, ContainsFunctor>>
- ReachableBlocks() const {
- return Filter(MakeIterationRange(graph_->GetBlocks()), ContainsFunctor(this));
- }
-
- static bool CanAnalyse(HGraph* graph) {
- // If there are any blocks with more than kMaxFilterableSuccessors we can't
- // analyse the graph. We avoid this case to prevent excessive memory and
- // time usage while allowing a simpler algorithm with a fixed-width
- // branching factor.
- return std::all_of(graph->GetBlocks().begin(), graph->GetBlocks().end(), [](HBasicBlock* blk) {
- return blk == nullptr || blk->GetSuccessors().size() <= kMaxFilterableSuccessors;
- });
- }
-
- private:
- std::bitset<kMaxFilterableSuccessors> GetAllowedSuccessors(const HBasicBlock* blk) const {
- DCHECK(valid_);
- return allowed_successors_[blk->GetBlockId()];
- }
-
- void LimitBlockSuccessors(const HBasicBlock* block,
- std::bitset<kMaxFilterableSuccessors> allowed) {
- needs_prune_ = true;
- allowed_successors_[block->GetBlockId()] &= allowed;
- }
-
- // Remove nodes which both precede and follow any exclusions. This ensures we don't need to deal
- // with only conditionally materializing objects depending on if we already materialized them
- // Ensure that for all blocks A, B, C: Unreachable(A) && Unreachable(C) && PathBetween(A, B) &&
- // PathBetween(A, C) implies Unreachable(B). This simplifies later transforms since it ensures
- // that no execution can leave and then re-enter any exclusion.
- void RemoveConcavity();
-
- // Removes sink nodes. Sink nodes are nodes where there is no execution which
- // avoids all removed nodes.
- void Prune();
-
- void RecalculateExcludedCohort();
-
- HGraph* graph_;
- ScopedArenaAllocator* allocator_;
- // The map from block_id -> allowed-successors.
- // This is the canonical representation of this subgraph. If a bit in the
- // bitset is not set then the corresponding outgoing edge of that block is not
- // considered traversable.
- ScopedArenaVector<std::bitset<kMaxFilterableSuccessors>> allowed_successors_;
- // Helper that holds which blocks we are able to reach. Only valid if
- // 'needs_prune_ == false'.
- ArenaBitVector unreachable_blocks_;
- // A list of the excluded-cohorts of this subgraph. This is only valid when
- // 'needs_prune_ == false'
- std::optional<ScopedArenaVector<ExcludedCohort>> excluded_list_;
- // Bool to hold if there is at least one known path from the start block to
- // the end in this graph. Used to short-circuit computation.
- bool valid_;
- // True if the subgraph is consistent and can be queried. Modifying the
- // subgraph clears this and requires a prune to restore.
- bool needs_prune_;
- // True if no more modification of the subgraph is permitted.
- bool finalized_;
-
- friend class ExecutionSubgraphTest;
- friend class LoadStoreAnalysisTest;
-
- DISALLOW_COPY_AND_ASSIGN(ExecutionSubgraph);
-};
-
-std::ostream& operator<<(std::ostream& os, const ExecutionSubgraph::ExcludedCohort& ex);
-
-} // namespace art
-
-#endif // ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_
diff --git a/compiler/optimizing/execution_subgraph_test.cc b/compiler/optimizing/execution_subgraph_test.cc
deleted file mode 100644
index 921ef056ba..0000000000
--- a/compiler/optimizing/execution_subgraph_test.cc
+++ /dev/null
@@ -1,975 +0,0 @@
-/*
- * Copyright (C) 2020 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "execution_subgraph_test.h"
-
-#include <array>
-#include <sstream>
-#include <string_view>
-#include <unordered_map>
-#include <unordered_set>
-
-#include "base/scoped_arena_allocator.h"
-#include "base/stl_util.h"
-#include "class_root.h"
-#include "dex/dex_file_types.h"
-#include "dex/method_reference.h"
-#include "entrypoints/quick/quick_entrypoints_enum.h"
-#include "execution_subgraph.h"
-#include "gtest/gtest.h"
-#include "handle.h"
-#include "handle_scope.h"
-#include "nodes.h"
-#include "optimizing/data_type.h"
-#include "optimizing_unit_test.h"
-#include "scoped_thread_state_change.h"
-
-namespace art HIDDEN {
-
-using BlockSet = std::unordered_set<const HBasicBlock*>;
-
-// Helper that checks validity directly.
-bool ExecutionSubgraphTestHelper::CalculateValidity(HGraph* graph, const ExecutionSubgraph* esg) {
- bool reached_end = false;
- std::queue<const HBasicBlock*> worklist;
- std::unordered_set<const HBasicBlock*> visited;
- worklist.push(graph->GetEntryBlock());
- while (!worklist.empty()) {
- const HBasicBlock* cur = worklist.front();
- worklist.pop();
- if (visited.find(cur) != visited.end()) {
- continue;
- } else {
- visited.insert(cur);
- }
- if (cur == graph->GetExitBlock()) {
- reached_end = true;
- continue;
- }
- bool has_succ = false;
- for (const HBasicBlock* succ : cur->GetSuccessors()) {
- DCHECK(succ != nullptr) << "Bad successors on block " << cur->GetBlockId();
- if (!esg->ContainsBlock(succ)) {
- continue;
- }
- has_succ = true;
- worklist.push(succ);
- }
- if (!has_succ) {
- // We aren't at the end and have nowhere to go so fail.
- return false;
- }
- }
- return reached_end;
-}
-
-class ExecutionSubgraphTest : public OptimizingUnitTest {
- public:
- ExecutionSubgraphTest() : graph_(CreateGraph()) {}
-
- AdjacencyListGraph SetupFromAdjacencyList(const std::string_view entry_name,
- const std::string_view exit_name,
- const std::vector<AdjacencyListGraph::Edge>& adj) {
- return AdjacencyListGraph(graph_, GetAllocator(), entry_name, exit_name, adj);
- }
-
- bool IsValidSubgraph(const ExecutionSubgraph* esg) {
- return ExecutionSubgraphTestHelper::CalculateValidity(graph_, esg);
- }
-
- bool IsValidSubgraph(const ExecutionSubgraph& esg) {
- return ExecutionSubgraphTestHelper::CalculateValidity(graph_, &esg);
- }
-
- HGraph* graph_;
-};
-
-// Some comparators used by these tests to avoid having to deal with various set types.
-template <typename BLKS, typename = std::enable_if_t<!std::is_same_v<BlockSet, BLKS>>>
-bool operator==(const BlockSet& bs, const BLKS& sas) {
- std::unordered_set<const HBasicBlock*> us(sas.begin(), sas.end());
- return bs == us;
-}
-template <typename BLKS, typename = std::enable_if_t<!std::is_same_v<BlockSet, BLKS>>>
-bool operator==(const BLKS& sas, const BlockSet& bs) {
- return bs == sas;
-}
-template <typename BLKS, typename = std::enable_if_t<!std::is_same_v<BlockSet, BLKS>>>
-bool operator!=(const BlockSet& bs, const BLKS& sas) {
- return !(bs == sas);
-}
-template <typename BLKS, typename = std::enable_if_t<!std::is_same_v<BlockSet, BLKS>>>
-bool operator!=(const BLKS& sas, const BlockSet& bs) {
- return !(bs == sas);
-}
-
-// +-------+ +-------+
-// | right | <-- | entry |
-// +-------+ +-------+
-// | |
-// | |
-// | v
-// | + - - - - - +
-// | ' removed '
-// | ' '
-// | ' +-------+ '
-// | ' | left | '
-// | ' +-------+ '
-// | ' '
-// | + - - - - - +
-// | |
-// | |
-// | v
-// | +-------+
-// +---------> | exit |
-// +-------+
-TEST_F(ExecutionSubgraphTest, Basic) {
- AdjacencyListGraph blks(SetupFromAdjacencyList(
- "entry",
- "exit",
- { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } }));
- ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_));
- ExecutionSubgraph esg(graph_, GetScopedAllocator());
- esg.RemoveBlock(blks.Get("left"));
- esg.Finalize();
- ASSERT_TRUE(esg.IsValid());
- ASSERT_TRUE(IsValidSubgraph(esg));
- std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(),
- esg.ReachableBlocks().end());
-
- ASSERT_EQ(contents.size(), 3u);
- ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end());
-
- ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end());
- esg.RemoveBlock(blks.Get("right"));
- esg.Finalize();
- std::unordered_set<const HBasicBlock*> contents_2(esg.ReachableBlocks().begin(),
- esg.ReachableBlocks().end());
- ASSERT_EQ(contents_2.size(), 0u);
-}
-
-// +-------+ +-------+
-// | right | <-- | entry |
-// +-------+ +-------+
-// | |
-// | |
-// | v
-// | + - - - - - - - - - - - - - - - - - - - -+
-// | ' indirectly_removed '
-// | ' '
-// | ' +-------+ +-----+ '
-// | ' | l1 | -------------------> | l1r | '
-// | ' +-------+ +-----+ '
-// | ' | | '
-// | ' | | '
-// | ' v | '
-// | ' +-------+ | '
-// | ' | l1l | | '
-// | ' +-------+ | '
-// | ' | | '
-// | ' | | '
-// | ' | | '
-// + - - - - - - - -+ | +- - - | | '
-// ' ' | +- v | '
-// ' +-----+ | +----------------+ | '
-// ' | l2r | <---------+-------------- | l2 (removed) | <-------------+ '
-// ' +-----+ | +----------------+ '
-// ' | ' | +- | '
-// ' | - - -+ | +- - - | - - - - - - - - - - - - - -+
-// ' | ' | ' | '
-// ' | ' | ' | '
-// ' | ' | ' v '
-// ' | ' | ' +-------+ '
-// ' | ' | ' | l2l | '
-// ' | ' | ' +-------+ '
-// ' | ' | ' | '
-// ' | ' | ' | '
-// ' | ' | ' | '
-// ' | - - -+ | +- - - | '
-// ' | ' | +- v '
-// ' | | +-------+ '
-// ' +---------------+-------------> | l3 | '
-// ' | +-------+ '
-// ' ' | +- '
-// + - - - - - - - -+ | +- - - - - - - - - +
-// | |
-// | |
-// | v
-// | +-------+
-// +-----------> | exit |
-// +-------+
-TEST_F(ExecutionSubgraphTest, Propagation) {
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- { { "entry", "l1" },
- { "l1", "l1l" },
- { "l1", "l1r" },
- { "l1l", "l2" },
- { "l1r", "l2" },
- { "l2", "l2l" },
- { "l2", "l2r" },
- { "l2l", "l3" },
- { "l2r", "l3" },
- { "l3", "exit" },
- { "entry", "right" },
- { "right", "exit" } }));
- ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_));
- ExecutionSubgraph esg(graph_, GetScopedAllocator());
- esg.RemoveBlock(blks.Get("l2"));
- esg.Finalize();
- ASSERT_TRUE(esg.IsValid());
- ASSERT_TRUE(IsValidSubgraph(esg));
- std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(),
- esg.ReachableBlocks().end());
-
- // ASSERT_EQ(contents.size(), 3u);
- // Not present, no path through.
- ASSERT_TRUE(contents.find(blks.Get("l1")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l2")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l3")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l1l")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l1r")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l2l")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l2r")) == contents.end());
-
- // present, path through.
- ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end());
-}
-
-// +------------------------------------+
-// | |
-// | +-------+ +-------+ |
-// | | right | <-- | entry | |
-// | +-------+ +-------+ |
-// | | | |
-// | | | |
-// | | v |
-// | | +-------+ +--------+
-// +----+---------> | l1 | --> | l1loop |
-// | +-------+ +--------+
-// | |
-// | |
-// | v
-// | +- - - - - -+
-// | ' removed '
-// | ' '
-// | ' +-------+ '
-// | ' | l2 | '
-// | ' +-------+ '
-// | ' '
-// | +- - - - - -+
-// | |
-// | |
-// | v
-// | +-------+
-// +---------> | exit |
-// +-------+
-TEST_F(ExecutionSubgraphTest, PropagationLoop) {
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- { { "entry", "l1" },
- { "l1", "l2" },
- { "l1", "l1loop" },
- { "l1loop", "l1" },
- { "l2", "exit" },
- { "entry", "right" },
- { "right", "exit" } }));
- ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_));
- ExecutionSubgraph esg(graph_, GetScopedAllocator());
- esg.RemoveBlock(blks.Get("l2"));
- esg.Finalize();
- ASSERT_TRUE(esg.IsValid());
- ASSERT_TRUE(IsValidSubgraph(esg));
- std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(),
- esg.ReachableBlocks().end());
-
- ASSERT_EQ(contents.size(), 5u);
-
- // Not present, no path through.
- ASSERT_TRUE(contents.find(blks.Get("l2")) == contents.end());
-
- // present, path through.
- // Since the loop can diverge we should leave it in the execution subgraph.
- ASSERT_TRUE(contents.find(blks.Get("l1")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l1loop")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end());
-}
-
-// +--------------------------------+
-// | |
-// | +-------+ +-------+ |
-// | | right | <-- | entry | |
-// | +-------+ +-------+ |
-// | | | |
-// | | | |
-// | | v |
-// | | +-------+ +--------+
-// +----+---------> | l1 | --> | l1loop |
-// | +-------+ +--------+
-// | |
-// | |
-// | v
-// | +-------+
-// | | l2 |
-// | +-------+
-// | |
-// | |
-// | v
-// | +-------+
-// +---------> | exit |
-// +-------+
-TEST_F(ExecutionSubgraphTest, PropagationLoop2) {
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- { { "entry", "l1" },
- { "l1", "l2" },
- { "l1", "l1loop" },
- { "l1loop", "l1" },
- { "l2", "exit" },
- { "entry", "right" },
- { "right", "exit" } }));
- ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_));
- ExecutionSubgraph esg(graph_, GetScopedAllocator());
- esg.RemoveBlock(blks.Get("l1"));
- esg.Finalize();
- ASSERT_TRUE(esg.IsValid());
- ASSERT_TRUE(IsValidSubgraph(esg));
- std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(),
- esg.ReachableBlocks().end());
-
- ASSERT_EQ(contents.size(), 3u);
-
- // Not present, no path through.
- ASSERT_TRUE(contents.find(blks.Get("l1")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l1loop")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l2")) == contents.end());
-
- // present, path through.
- ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end());
-}
-
-// +--------------------------------+
-// | |
-// | +-------+ +-------+ |
-// | | right | <-- | entry | |
-// | +-------+ +-------+ |
-// | | | |
-// | | | |
-// | | v |
-// | | +-------+ +--------+
-// +----+---------> | l1 | --> | l1loop |
-// | +-------+ +--------+
-// | |
-// | |
-// | v
-// | +-------+
-// | | l2 |
-// | +-------+
-// | |
-// | |
-// | v
-// | +-------+
-// +---------> | exit |
-// +-------+
-TEST_F(ExecutionSubgraphTest, PropagationLoop3) {
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- { { "entry", "l1" },
- { "l1", "l2" },
- { "l1", "l1loop" },
- { "l1loop", "l1" },
- { "l2", "exit" },
- { "entry", "right" },
- { "right", "exit" } }));
- ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_));
- ExecutionSubgraph esg(graph_, GetScopedAllocator());
- esg.RemoveBlock(blks.Get("l1loop"));
- esg.Finalize();
- ASSERT_TRUE(esg.IsValid());
- ASSERT_TRUE(IsValidSubgraph(esg));
- std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(),
- esg.ReachableBlocks().end());
-
- ASSERT_EQ(contents.size(), 3u);
-
- // Not present, no path through. If we got to l1 loop then we must merge back
- // with l1 and l2 so they're bad too.
- ASSERT_TRUE(contents.find(blks.Get("l1loop")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l1")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l2")) == contents.end());
-
- // present, path through.
- ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end());
-}
-
-// ┌───────┐ ┌──────────────┐
-// │ right │ ◀── │ entry │
-// └───────┘ └──────────────┘
-// │ │
-// │ │
-// ▼ ▼
-// ┌────┐ ┌───────┐ ┌──────────────┐
-// │ l2 │ ──▶ │ exit │ ┌─ │ l1 │ ◀┐
-// └────┘ └───────┘ │ └──────────────┘ │
-// ▲ │ │ │
-// └───────────────────┘ │ │
-// ▼ │
-// ┌──────────────┐ │ ┌──────────────┐
-// ┌─ │ l1loop │ │ │ l1loop_right │ ◀┐
-// │ └──────────────┘ │ └──────────────┘ │
-// │ │ │ │ │
-// │ │ │ │ │
-// │ ▼ │ │ │
-// │ ┌−−−−−−−−−−−−−−−−−−┐ │ │ │
-// │ ╎ removed ╎ │ │ │
-// │ ╎ ╎ │ │ │
-// │ ╎ ┌──────────────┐ ╎ │ │ │
-// │ ╎ │ l1loop_left │ ╎ │ │ │
-// │ ╎ └──────────────┘ ╎ │ │ │
-// │ ╎ ╎ │ │ │
-// │ └−−−−−−−−−−−−−−−−−−┘ │ │ │
-// │ │ │ │ │
-// │ │ │ │ │
-// │ ▼ │ │ │
-// │ ┌──────────────┐ │ │ │
-// │ │ l1loop_merge │ ─┘ │ │
-// │ └──────────────┘ │ │
-// │ ▲ │ │
-// │ └──────────────────────┘ │
-// │ │
-// │ │
-// └─────────────────────────────────────────────┘
-
-TEST_F(ExecutionSubgraphTest, PropagationLoop4) {
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "l1"},
- {"l1", "l2"},
- {"l1", "l1loop"},
- {"l1loop", "l1loop_left"},
- {"l1loop", "l1loop_right"},
- {"l1loop_left", "l1loop_merge"},
- {"l1loop_right", "l1loop_merge"},
- {"l1loop_merge", "l1"},
- {"l2", "exit"},
- {"entry", "right"},
- {"right", "exit"}}));
- ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_));
- ExecutionSubgraph esg(graph_, GetScopedAllocator());
- esg.RemoveBlock(blks.Get("l1loop_left"));
- esg.Finalize();
- ASSERT_TRUE(esg.IsValid());
- ASSERT_TRUE(IsValidSubgraph(esg));
- std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(),
- esg.ReachableBlocks().end());
-
- ASSERT_EQ(contents.size(), 3u);
-
- // Not present, no path through. If we got to l1 loop then we must merge back
- // with l1 and l2 so they're bad too.
- ASSERT_TRUE(contents.find(blks.Get("l1loop")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l1")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l1loop_left")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l1loop_right")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l1loop_merge")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l2")) == contents.end());
-
- // present, path through.
- ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end());
-}
-
-// +------------------------------------------------------+
-// | |
-// | +--------------+ +-------------+ |
-// | | right | <-- | entry | |
-// | +--------------+ +-------------+ |
-// | | | |
-// | | | |
-// | v v |
-// | +--------------+ +--------------------+ +----+
-// +> | exit | +> | l1 | --> | l2 |
-// +--------------+ | +--------------------+ +----+
-// | | ^
-// +---------------+ | |
-// | v |
-// +--------------+ +-------------+ |
-// | l1loop_right | <-- | l1loop | |
-// +--------------+ +-------------+ |
-// | |
-// | |
-// v |
-// + - - - - - - - - + |
-// ' removed ' |
-// ' ' |
-// ' +-------------+ ' |
-// ' | l1loop_left | ' -+
-// ' +-------------+ '
-// ' '
-// + - - - - - - - - +
-TEST_F(ExecutionSubgraphTest, PropagationLoop5) {
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "l1"},
- {"l1", "l2"},
- {"l1", "l1loop"},
- {"l1loop", "l1loop_left"},
- {"l1loop", "l1loop_right"},
- {"l1loop_left", "l1"},
- {"l1loop_right", "l1"},
- {"l2", "exit"},
- {"entry", "right"},
- {"right", "exit"}}));
- ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_));
- ExecutionSubgraph esg(graph_, GetScopedAllocator());
- esg.RemoveBlock(blks.Get("l1loop_left"));
- esg.Finalize();
- ASSERT_TRUE(esg.IsValid());
- ASSERT_TRUE(IsValidSubgraph(esg));
- std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(),
- esg.ReachableBlocks().end());
-
- ASSERT_EQ(contents.size(), 3u);
-
- // Not present, no path through. If we got to l1 loop then we must merge back
- // with l1 and l2 so they're bad too.
- ASSERT_TRUE(contents.find(blks.Get("l1loop")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l1")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l1loop_left")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l1loop_right")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("l2")) == contents.end());
-
- // present, path through.
- ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end());
-}
-
-TEST_F(ExecutionSubgraphTest, Invalid) {
- AdjacencyListGraph blks(SetupFromAdjacencyList(
- "entry",
- "exit",
- { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } }));
- ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_));
- ExecutionSubgraph esg(graph_, GetScopedAllocator());
- esg.RemoveBlock(blks.Get("left"));
- esg.RemoveBlock(blks.Get("right"));
- esg.Finalize();
-
- ASSERT_FALSE(esg.IsValid());
- ASSERT_FALSE(IsValidSubgraph(esg));
- std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(),
- esg.ReachableBlocks().end());
-
- ASSERT_EQ(contents.size(), 0u);
-}
-// Sibling branches are disconnected.
-TEST_F(ExecutionSubgraphTest, Exclusions) {
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- { { "entry", "a" },
- { "entry", "b" },
- { "entry", "c" },
- { "a", "exit" },
- { "b", "exit" },
- { "c", "exit" } }));
- ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_));
- ExecutionSubgraph esg(graph_, GetScopedAllocator());
- esg.RemoveBlock(blks.Get("a"));
- esg.RemoveBlock(blks.Get("c"));
- esg.Finalize();
- ASSERT_TRUE(esg.IsValid());
- ASSERT_TRUE(IsValidSubgraph(esg));
- std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(),
- esg.ReachableBlocks().end());
-
- ASSERT_EQ(contents.size(), 3u);
- // Not present, no path through.
- ASSERT_TRUE(contents.find(blks.Get("a")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("c")) == contents.end());
-
- // present, path through.
- ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("b")) != contents.end());
-
- ArrayRef<const ExecutionSubgraph::ExcludedCohort> exclusions(esg.GetExcludedCohorts());
- ASSERT_EQ(exclusions.size(), 2u);
- std::unordered_set<const HBasicBlock*> exclude_a({ blks.Get("a") });
- std::unordered_set<const HBasicBlock*> exclude_c({ blks.Get("c") });
- ASSERT_TRUE(std::find_if(exclusions.cbegin(),
- exclusions.cend(),
- [&](const ExecutionSubgraph::ExcludedCohort& it) {
- return it.Blocks() == exclude_a;
- }) != exclusions.cend());
- ASSERT_TRUE(std::find_if(exclusions.cbegin(),
- exclusions.cend(),
- [&](const ExecutionSubgraph::ExcludedCohort& it) {
- return it.Blocks() == exclude_c;
- }) != exclusions.cend());
-}
-
-// Sibling branches are disconnected.
-// +- - - - - - - - - - - - - - - - - - - - - - +
-// ' remove_c '
-// ' '
-// ' +-----------+ '
-// ' | c_begin_2 | -------------------------+ '
-// ' +-----------+ | '
-// ' | '
-// +- - - - - - - - - - - - - - - - - - | '
-// ^ ' | '
-// | ' | '
-// | ' | '
-// + - - - - - -+ ' | '
-// ' remove_a ' ' | '
-// ' ' ' | '
-// ' +--------+ ' +-----------+ +---+' | '
-// ' | **a** | ' <-- | entry | --> | b |' | '
-// ' +--------+ ' +-----------+ +---+' | '
-// ' ' ' | '
-// + - - - - - -+ ' | '
-// | | | ' | '
-// | | | ' | '
-// | v | ' | '
-// | +- - - - - - - -+ | ' | '
-// | ' ' | ' | '
-// | ' +-----------+ ' | ' | '
-// | ' | c_begin_1 | ' | ' | '
-// | ' +-----------+ ' | ' | '
-// | ' | ' | ' | '
-// | ' | ' | ' | '
-// | ' | ' | ' | '
-// + - - - - - - - - -+ | + - - - | - - - - - - - + | ' | '
-// ' ' | + v ' | + | '
-// ' +---------+ | +-----------+ | | '
-// ' | c_end_2 | <-------+--------------- | **c_mid** | <-----------------+------+ '
-// ' +---------+ | +-----------+ | '
-// ' ' | + | ' | + '
-// + - - - - - - - - -+ | + - - - | - - - - - - - + | + - - - +
-// | | ' | ' |
-// | | ' | ' |
-// | | ' v ' |
-// | | ' +-----------+ ' |
-// | | ' | c_end_1 | ' |
-// | | ' +-----------+ ' |
-// | | ' ' |
-// | | +- - - - - - - -+ |
-// | | | |
-// | | | |
-// | | v v
-// | | +---------------------------------+
-// | +------------> | exit |
-// | +---------------------------------+
-// | ^
-// +------------------------------------+
-TEST_F(ExecutionSubgraphTest, ExclusionExtended) {
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- { { "entry", "a" },
- { "entry", "b" },
- { "entry", "c_begin_1" },
- { "entry", "c_begin_2" },
- { "c_begin_1", "c_mid" },
- { "c_begin_2", "c_mid" },
- { "c_mid", "c_end_1" },
- { "c_mid", "c_end_2" },
- { "a", "exit" },
- { "b", "exit" },
- { "c_end_1", "exit" },
- { "c_end_2", "exit" } }));
- ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_));
- ExecutionSubgraph esg(graph_, GetScopedAllocator());
- esg.RemoveBlock(blks.Get("a"));
- esg.RemoveBlock(blks.Get("c_mid"));
- esg.Finalize();
- ASSERT_TRUE(esg.IsValid());
- ASSERT_TRUE(IsValidSubgraph(esg));
- std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(),
- esg.ReachableBlocks().end());
-
- ASSERT_EQ(contents.size(), 3u);
- // Not present, no path through.
- ASSERT_TRUE(contents.find(blks.Get("a")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("c_begin_1")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("c_begin_2")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("c_mid")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("c_end_1")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("c_end_2")) == contents.end());
-
- // present, path through.
- ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("b")) != contents.end());
-
- ArrayRef<const ExecutionSubgraph::ExcludedCohort> exclusions(esg.GetExcludedCohorts());
- ASSERT_EQ(exclusions.size(), 2u);
- BlockSet exclude_a({ blks.Get("a") });
- BlockSet exclude_c({ blks.Get("c_begin_1"),
- blks.Get("c_begin_2"),
- blks.Get("c_mid"),
- blks.Get("c_end_1"),
- blks.Get("c_end_2") });
- ASSERT_TRUE(std::find_if(exclusions.cbegin(),
- exclusions.cend(),
- [&](const ExecutionSubgraph::ExcludedCohort& it) {
- return it.Blocks() == exclude_a;
- }) != exclusions.cend());
- ASSERT_TRUE(
- std::find_if(
- exclusions.cbegin(), exclusions.cend(), [&](const ExecutionSubgraph::ExcludedCohort& it) {
- return it.Blocks() == exclude_c &&
- BlockSet({ blks.Get("c_begin_1"), blks.Get("c_begin_2") }) == it.EntryBlocks() &&
- BlockSet({ blks.Get("c_end_1"), blks.Get("c_end_2") }) == it.ExitBlocks();
- }) != exclusions.cend());
-}
-
-// ┌───────┐ ┌────────────┐
-// ┌─ │ right │ ◀── │ entry │
-// │ └───────┘ └────────────┘
-// │ │
-// │ │
-// │ ▼
-// │ ┌────────────┐
-// │ │ esc_top │
-// │ └────────────┘
-// │ │
-// │ │
-// │ ▼
-// │ ┌────────────┐
-// └──────────────▶ │ middle │ ─┐
-// └────────────┘ │
-// │ │
-// │ │
-// ▼ │
-// ┌────────────┐ │
-// │ esc_bottom │ │
-// └────────────┘ │
-// │ │
-// │ │
-// ▼ │
-// ┌────────────┐ │
-// │ exit │ ◀┘
-// └────────────┘
-TEST_F(ExecutionSubgraphTest, InAndOutEscape) {
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- { { "entry", "esc_top" },
- { "entry", "right" },
- { "esc_top", "middle" },
- { "right", "middle" },
- { "middle", "exit" },
- { "middle", "esc_bottom" },
- { "esc_bottom", "exit" } }));
-
- ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_));
- ExecutionSubgraph esg(graph_, GetScopedAllocator());
- esg.RemoveBlock(blks.Get("esc_top"));
- esg.RemoveBlock(blks.Get("esc_bottom"));
- esg.Finalize();
-
- std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(),
- esg.ReachableBlocks().end());
- ASSERT_EQ(contents.size(), 0u);
- ASSERT_FALSE(esg.IsValid());
- ASSERT_FALSE(IsValidSubgraph(esg));
-
- ASSERT_EQ(contents.size(), 0u);
-}
-
-// Test with max number of successors and no removals.
-TEST_F(ExecutionSubgraphTest, BigNodes) {
- std::vector<std::string> mid_blocks;
- for (auto i : Range(ExecutionSubgraph::kMaxFilterableSuccessors)) {
- std::ostringstream oss;
- oss << "blk" << i;
- mid_blocks.push_back(oss.str().c_str());
- }
- ASSERT_EQ(mid_blocks.size(), ExecutionSubgraph::kMaxFilterableSuccessors);
- std::vector<AdjacencyListGraph::Edge> edges;
- for (const auto& mid : mid_blocks) {
- edges.emplace_back("entry", mid);
- edges.emplace_back(mid, "exit");
- }
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", edges));
- ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_));
- ExecutionSubgraph esg(graph_, GetScopedAllocator());
- esg.Finalize();
- ASSERT_TRUE(esg.IsValid());
- ASSERT_TRUE(IsValidSubgraph(esg));
- std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(),
- esg.ReachableBlocks().end());
-
- for (const auto& mid : mid_blocks) {
- EXPECT_TRUE(contents.find(blks.Get(mid)) != contents.end()) << mid;
- }
- // + 2 for entry and exit nodes.
- ASSERT_EQ(contents.size(), ExecutionSubgraph::kMaxFilterableSuccessors + 2);
-}
-
-// Test with max number of successors and some removals.
-TEST_F(ExecutionSubgraphTest, BigNodesMissing) {
- std::vector<std::string> mid_blocks;
- for (auto i : Range(ExecutionSubgraph::kMaxFilterableSuccessors)) {
- std::ostringstream oss;
- oss << "blk" << i;
- mid_blocks.push_back(oss.str());
- }
- std::vector<AdjacencyListGraph::Edge> edges;
- for (const auto& mid : mid_blocks) {
- edges.emplace_back("entry", mid);
- edges.emplace_back(mid, "exit");
- }
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", edges));
- ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_));
- ExecutionSubgraph esg(graph_, GetScopedAllocator());
- esg.RemoveBlock(blks.Get("blk2"));
- esg.RemoveBlock(blks.Get("blk4"));
- esg.Finalize();
- ASSERT_TRUE(esg.IsValid());
- ASSERT_TRUE(IsValidSubgraph(esg));
- std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(),
- esg.ReachableBlocks().end());
-
- ASSERT_EQ(contents.size(), ExecutionSubgraph::kMaxFilterableSuccessors + 2 - 2);
-
- // Not present, no path through.
- ASSERT_TRUE(contents.find(blks.Get("blk2")) == contents.end());
- ASSERT_TRUE(contents.find(blks.Get("blk4")) == contents.end());
-}
-
-// Test with max number of successors and all successors removed.
-TEST_F(ExecutionSubgraphTest, BigNodesNoPath) {
- std::vector<std::string> mid_blocks;
- for (auto i : Range(ExecutionSubgraph::kMaxFilterableSuccessors)) {
- std::ostringstream oss;
- oss << "blk" << i;
- mid_blocks.push_back(oss.str());
- }
- std::vector<AdjacencyListGraph::Edge> edges;
- for (const auto& mid : mid_blocks) {
- edges.emplace_back("entry", mid);
- edges.emplace_back(mid, "exit");
- }
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", edges));
- ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_));
- ExecutionSubgraph esg(graph_, GetScopedAllocator());
- for (const auto& mid : mid_blocks) {
- esg.RemoveBlock(blks.Get(mid));
- }
- esg.Finalize();
- ASSERT_FALSE(esg.IsValid());
- ASSERT_FALSE(IsValidSubgraph(esg));
-}
-
-// Test with max number of successors
-TEST_F(ExecutionSubgraphTest, CanAnalyseBig) {
- // Make an absurdly huge and well connected graph. This should be pretty worst-case scenario.
- constexpr size_t kNumBlocks = ExecutionSubgraph::kMaxFilterableSuccessors + 1000;
- std::vector<std::string> mid_blocks;
- for (auto i : Range(kNumBlocks)) {
- std::ostringstream oss;
- oss << "blk" << i;
- mid_blocks.push_back(oss.str());
- }
- std::vector<AdjacencyListGraph::Edge> edges;
- for (auto cur : Range(kNumBlocks)) {
- for (auto nxt :
- Range(cur + 1,
- std::min(cur + ExecutionSubgraph::kMaxFilterableSuccessors + 1, kNumBlocks))) {
- edges.emplace_back(mid_blocks[cur], mid_blocks[nxt]);
- }
- }
- AdjacencyListGraph blks(SetupFromAdjacencyList(mid_blocks.front(), mid_blocks.back(), edges));
- ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_));
-
- ExecutionSubgraph esg(graph_, GetScopedAllocator());
- esg.Finalize();
- ASSERT_TRUE(esg.IsValid());
- ASSERT_TRUE(IsValidSubgraph(esg));
- std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(),
- esg.ReachableBlocks().end());
-
- ASSERT_EQ(contents.size(), kNumBlocks);
-}
-
-// Test with many successors
-TEST_F(ExecutionSubgraphTest, CanAnalyseBig2) {
- // Make an absurdly huge and well connected graph. This should be pretty worst-case scenario.
- constexpr size_t kNumBlocks = ExecutionSubgraph::kMaxFilterableSuccessors + 1000;
- constexpr size_t kTestMaxSuccessors = ExecutionSubgraph::kMaxFilterableSuccessors - 1;
- std::vector<std::string> mid_blocks;
- for (auto i : Range(kNumBlocks)) {
- std::ostringstream oss;
- oss << "blk" << i;
- mid_blocks.push_back(oss.str());
- }
- std::vector<AdjacencyListGraph::Edge> edges;
- for (auto cur : Range(kNumBlocks)) {
- for (auto nxt : Range(cur + 1, std::min(cur + 1 + kTestMaxSuccessors, kNumBlocks))) {
- edges.emplace_back(mid_blocks[cur], mid_blocks[nxt]);
- }
- }
- edges.emplace_back(mid_blocks.front(), mid_blocks.back());
- AdjacencyListGraph blks(SetupFromAdjacencyList(mid_blocks.front(), mid_blocks.back(), edges));
- ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_));
- ExecutionSubgraph esg(graph_, GetScopedAllocator());
- constexpr size_t kToRemoveIdx = kNumBlocks / 2;
- HBasicBlock* remove_implicit = blks.Get(mid_blocks[kToRemoveIdx]);
- for (HBasicBlock* pred : remove_implicit->GetPredecessors()) {
- esg.RemoveBlock(pred);
- }
- esg.Finalize();
- EXPECT_TRUE(esg.IsValid());
- EXPECT_TRUE(IsValidSubgraph(esg));
- std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(),
- esg.ReachableBlocks().end());
-
- // Only entry and exit. The middle ones should eliminate everything else.
- EXPECT_EQ(contents.size(), 2u);
- EXPECT_TRUE(contents.find(remove_implicit) == contents.end());
- EXPECT_TRUE(contents.find(blks.Get(mid_blocks.front())) != contents.end());
- EXPECT_TRUE(contents.find(blks.Get(mid_blocks.back())) != contents.end());
-}
-
-// Test with too many successors
-TEST_F(ExecutionSubgraphTest, CanNotAnalyseBig) {
- std::vector<std::string> mid_blocks;
- for (auto i : Range(ExecutionSubgraph::kMaxFilterableSuccessors + 4)) {
- std::ostringstream oss;
- oss << "blk" << i;
- mid_blocks.push_back(oss.str());
- }
- std::vector<AdjacencyListGraph::Edge> edges;
- for (const auto& mid : mid_blocks) {
- edges.emplace_back("entry", mid);
- edges.emplace_back(mid, "exit");
- }
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", edges));
- ASSERT_FALSE(ExecutionSubgraph::CanAnalyse(graph_));
-}
-} // namespace art
diff --git a/compiler/optimizing/execution_subgraph_test.h b/compiler/optimizing/execution_subgraph_test.h
deleted file mode 100644
index cee105a045..0000000000
--- a/compiler/optimizing/execution_subgraph_test.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2020 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_TEST_H_
-#define ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_TEST_H_
-
-#include "android-base/macros.h"
-
-#include "base/macros.h"
-
-namespace art HIDDEN {
-
-class HGraph;
-class ExecutionSubgraph;
-
-class ExecutionSubgraphTestHelper {
- public:
- static bool CalculateValidity(HGraph* graph, const ExecutionSubgraph* subgraph);
-
- private:
- ExecutionSubgraphTestHelper() = delete;
-
- DISALLOW_COPY_AND_ASSIGN(ExecutionSubgraphTestHelper);
-};
-} // namespace art
-
-#endif // ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_TEST_H_
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 190b362145..e8c94dd6b4 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -28,6 +28,7 @@
#include "code_generator.h"
#include "handle.h"
#include "mirror/class.h"
+#include "nodes.h"
#include "obj_ptr-inl.h"
#include "scoped_thread_state_change-inl.h"
#include "subtype_check.h"
@@ -168,52 +169,68 @@ void GraphChecker::CheckGraphFlags() {
void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
current_block_ = block;
- // Use local allocator for allocating memory.
- ScopedArenaAllocator allocator(GetGraph()->GetArenaStack());
-
- // Check consistency with respect to predecessors of `block`.
- // Note: Counting duplicates with a sorted vector uses up to 6x less memory
- // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse.
- ScopedArenaVector<HBasicBlock*> sorted_predecessors(allocator.Adapter(kArenaAllocGraphChecker));
- sorted_predecessors.assign(block->GetPredecessors().begin(), block->GetPredecessors().end());
- std::sort(sorted_predecessors.begin(), sorted_predecessors.end());
- for (auto it = sorted_predecessors.begin(), end = sorted_predecessors.end(); it != end; ) {
- HBasicBlock* p = *it++;
- size_t p_count_in_block_predecessors = 1u;
- for (; it != end && *it == p; ++it) {
- ++p_count_in_block_predecessors;
- }
- size_t block_count_in_p_successors =
- std::count(p->GetSuccessors().begin(), p->GetSuccessors().end(), block);
- if (p_count_in_block_predecessors != block_count_in_p_successors) {
- AddError(StringPrintf(
- "Block %d lists %zu occurrences of block %d in its predecessors, whereas "
- "block %d lists %zu occurrences of block %d in its successors.",
- block->GetBlockId(), p_count_in_block_predecessors, p->GetBlockId(),
- p->GetBlockId(), block_count_in_p_successors, block->GetBlockId()));
- }
- }
+ {
+ // Use local allocator for allocating memory. We use C++ scopes (i.e. `{}`) to reclaim the
+ // memory as soon as possible, and to end the scope of this `ScopedArenaAllocator`.
+ ScopedArenaAllocator allocator(GetGraph()->GetArenaStack());
- // Check consistency with respect to successors of `block`.
- // Note: Counting duplicates with a sorted vector uses up to 6x less memory
- // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse.
- ScopedArenaVector<HBasicBlock*> sorted_successors(allocator.Adapter(kArenaAllocGraphChecker));
- sorted_successors.assign(block->GetSuccessors().begin(), block->GetSuccessors().end());
- std::sort(sorted_successors.begin(), sorted_successors.end());
- for (auto it = sorted_successors.begin(), end = sorted_successors.end(); it != end; ) {
- HBasicBlock* s = *it++;
- size_t s_count_in_block_successors = 1u;
- for (; it != end && *it == s; ++it) {
- ++s_count_in_block_successors;
+ {
+ // Check consistency with respect to predecessors of `block`.
+ // Note: Counting duplicates with a sorted vector uses up to 6x less memory
+ // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse.
+ ScopedArenaVector<HBasicBlock*> sorted_predecessors(
+ allocator.Adapter(kArenaAllocGraphChecker));
+ sorted_predecessors.assign(block->GetPredecessors().begin(), block->GetPredecessors().end());
+ std::sort(sorted_predecessors.begin(), sorted_predecessors.end());
+ for (auto it = sorted_predecessors.begin(), end = sorted_predecessors.end(); it != end;) {
+ HBasicBlock* p = *it++;
+ size_t p_count_in_block_predecessors = 1u;
+ for (; it != end && *it == p; ++it) {
+ ++p_count_in_block_predecessors;
+ }
+ size_t block_count_in_p_successors =
+ std::count(p->GetSuccessors().begin(), p->GetSuccessors().end(), block);
+ if (p_count_in_block_predecessors != block_count_in_p_successors) {
+ AddError(StringPrintf(
+ "Block %d lists %zu occurrences of block %d in its predecessors, whereas "
+ "block %d lists %zu occurrences of block %d in its successors.",
+ block->GetBlockId(),
+ p_count_in_block_predecessors,
+ p->GetBlockId(),
+ p->GetBlockId(),
+ block_count_in_p_successors,
+ block->GetBlockId()));
+ }
+ }
}
- size_t block_count_in_s_predecessors =
- std::count(s->GetPredecessors().begin(), s->GetPredecessors().end(), block);
- if (s_count_in_block_successors != block_count_in_s_predecessors) {
- AddError(StringPrintf(
- "Block %d lists %zu occurrences of block %d in its successors, whereas "
- "block %d lists %zu occurrences of block %d in its predecessors.",
- block->GetBlockId(), s_count_in_block_successors, s->GetBlockId(),
- s->GetBlockId(), block_count_in_s_predecessors, block->GetBlockId()));
+
+ {
+ // Check consistency with respect to successors of `block`.
+ // Note: Counting duplicates with a sorted vector uses up to 6x less memory
+ // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse.
+ ScopedArenaVector<HBasicBlock*> sorted_successors(allocator.Adapter(kArenaAllocGraphChecker));
+ sorted_successors.assign(block->GetSuccessors().begin(), block->GetSuccessors().end());
+ std::sort(sorted_successors.begin(), sorted_successors.end());
+ for (auto it = sorted_successors.begin(), end = sorted_successors.end(); it != end;) {
+ HBasicBlock* s = *it++;
+ size_t s_count_in_block_successors = 1u;
+ for (; it != end && *it == s; ++it) {
+ ++s_count_in_block_successors;
+ }
+ size_t block_count_in_s_predecessors =
+ std::count(s->GetPredecessors().begin(), s->GetPredecessors().end(), block);
+ if (s_count_in_block_successors != block_count_in_s_predecessors) {
+ AddError(
+ StringPrintf("Block %d lists %zu occurrences of block %d in its successors, whereas "
+ "block %d lists %zu occurrences of block %d in its predecessors.",
+ block->GetBlockId(),
+ s_count_in_block_successors,
+ s->GetBlockId(),
+ s->GetBlockId(),
+ block_count_in_s_predecessors,
+ block->GetBlockId()));
+ }
+ }
}
}
@@ -506,6 +523,26 @@ void GraphChecker::VisitMonitorOperation(HMonitorOperation* monitor_op) {
flag_info_.seen_monitor_operation = true;
}
+bool GraphChecker::ContainedInItsBlockList(HInstruction* instruction) {
+ HBasicBlock* block = instruction->GetBlock();
+ ScopedArenaSafeMap<HBasicBlock*, ScopedArenaHashSet<HInstruction*>>& instruction_set =
+ instruction->IsPhi() ? phis_per_block_ : instructions_per_block_;
+ auto map_it = instruction_set.find(block);
+ if (map_it == instruction_set.end()) {
+ // Populate extra bookkeeping.
+ map_it = instruction_set.insert(
+ {block, ScopedArenaHashSet<HInstruction*>(allocator_.Adapter(kArenaAllocGraphChecker))})
+ .first;
+ const HInstructionList& instruction_list = instruction->IsPhi() ?
+ instruction->GetBlock()->GetPhis() :
+ instruction->GetBlock()->GetInstructions();
+ for (HInstructionIterator list_it(instruction_list); !list_it.Done(); list_it.Advance()) {
+ map_it->second.insert(list_it.Current());
+ }
+ }
+ return map_it->second.find(instruction) != map_it->second.end();
+}
+
void GraphChecker::VisitInstruction(HInstruction* instruction) {
if (seen_ids_.IsBitSet(instruction->GetId())) {
AddError(StringPrintf("Instruction id %d is duplicate in graph.",
@@ -528,23 +565,19 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
instruction->GetBlock()->GetBlockId()));
}
- // Ensure the inputs of `instruction` are defined in a block of the graph.
+ // Ensure the inputs of `instruction` are defined in a block of the graph, and the entry in the
+ // use list is consistent.
for (HInstruction* input : instruction->GetInputs()) {
if (input->GetBlock() == nullptr) {
AddError(StringPrintf("Input %d of instruction %d is not in any "
"basic block of the control-flow graph.",
input->GetId(),
instruction->GetId()));
- } else {
- const HInstructionList& list = input->IsPhi()
- ? input->GetBlock()->GetPhis()
- : input->GetBlock()->GetInstructions();
- if (!list.Contains(input)) {
+ } else if (!ContainedInItsBlockList(input)) {
AddError(StringPrintf("Input %d of instruction %d is not defined "
"in a basic block of the control-flow graph.",
input->GetId(),
instruction->GetId()));
- }
}
}
@@ -552,10 +585,7 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
// and the entry in the use list is consistent.
for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
HInstruction* user = use.GetUser();
- const HInstructionList& list = user->IsPhi()
- ? user->GetBlock()->GetPhis()
- : user->GetBlock()->GetInstructions();
- if (!list.Contains(user)) {
+ if (!ContainedInItsBlockList(user)) {
AddError(StringPrintf("User %s:%d of instruction %d is not defined "
"in a basic block of the control-flow graph.",
user->DebugName(),
@@ -587,21 +617,38 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
}
// Ensure 'instruction' has pointers to its inputs' use entries.
- auto&& input_records = instruction->GetInputRecords();
- for (size_t i = 0; i < input_records.size(); ++i) {
- const HUserRecord<HInstruction*>& input_record = input_records[i];
- HInstruction* input = input_record.GetInstruction();
- if ((input_record.GetBeforeUseNode() == input->GetUses().end()) ||
- (input_record.GetUseNode() == input->GetUses().end()) ||
- !input->GetUses().ContainsNode(*input_record.GetUseNode()) ||
- (input_record.GetUseNode()->GetIndex() != i)) {
- AddError(StringPrintf("Instruction %s:%d has an invalid iterator before use entry "
- "at input %u (%s:%d).",
- instruction->DebugName(),
- instruction->GetId(),
- static_cast<unsigned>(i),
- input->DebugName(),
- input->GetId()));
+ {
+ auto&& input_records = instruction->GetInputRecords();
+ for (size_t i = 0; i < input_records.size(); ++i) {
+ const HUserRecord<HInstruction*>& input_record = input_records[i];
+ HInstruction* input = input_record.GetInstruction();
+
+ // Populate bookkeeping, if needed. See comment in graph_checker.h for uses_per_instruction_.
+ auto it = uses_per_instruction_.find(input->GetId());
+ if (it == uses_per_instruction_.end()) {
+ it = uses_per_instruction_
+ .insert({input->GetId(),
+ ScopedArenaSet<const art::HUseListNode<art::HInstruction*>*>(
+ allocator_.Adapter(kArenaAllocGraphChecker))})
+ .first;
+ for (auto&& use : input->GetUses()) {
+ it->second.insert(std::addressof(use));
+ }
+ }
+
+ if ((input_record.GetBeforeUseNode() == input->GetUses().end()) ||
+ (input_record.GetUseNode() == input->GetUses().end()) ||
+ (it->second.find(std::addressof(*input_record.GetUseNode())) == it->second.end()) ||
+ (input_record.GetUseNode()->GetIndex() != i)) {
+ AddError(
+ StringPrintf("Instruction %s:%d has an invalid iterator before use entry "
+ "at input %u (%s:%d).",
+ instruction->DebugName(),
+ instruction->GetId(),
+ static_cast<unsigned>(i),
+ input->DebugName(),
+ input->GetId()));
+ }
}
}
@@ -688,10 +735,59 @@ void GraphChecker::VisitInvoke(HInvoke* invoke) {
}
flag_info_.seen_always_throwing_invokes = true;
}
+
+ // Check for intrinsics which should have been replaced by intermediate representation in the
+ // instruction builder.
+ switch (invoke->GetIntrinsic()) {
+ case Intrinsics::kIntegerRotateRight:
+ case Intrinsics::kLongRotateRight:
+ case Intrinsics::kIntegerRotateLeft:
+ case Intrinsics::kLongRotateLeft:
+ case Intrinsics::kIntegerCompare:
+ case Intrinsics::kLongCompare:
+ case Intrinsics::kIntegerSignum:
+ case Intrinsics::kLongSignum:
+ case Intrinsics::kFloatIsNaN:
+ case Intrinsics::kDoubleIsNaN:
+ case Intrinsics::kStringIsEmpty:
+ case Intrinsics::kUnsafeLoadFence:
+ case Intrinsics::kUnsafeStoreFence:
+ case Intrinsics::kUnsafeFullFence:
+ case Intrinsics::kJdkUnsafeLoadFence:
+ case Intrinsics::kJdkUnsafeStoreFence:
+ case Intrinsics::kJdkUnsafeFullFence:
+ case Intrinsics::kVarHandleFullFence:
+ case Intrinsics::kVarHandleAcquireFence:
+ case Intrinsics::kVarHandleReleaseFence:
+ case Intrinsics::kVarHandleLoadLoadFence:
+ case Intrinsics::kVarHandleStoreStoreFence:
+ case Intrinsics::kMathMinIntInt:
+ case Intrinsics::kMathMinLongLong:
+ case Intrinsics::kMathMinFloatFloat:
+ case Intrinsics::kMathMinDoubleDouble:
+ case Intrinsics::kMathMaxIntInt:
+ case Intrinsics::kMathMaxLongLong:
+ case Intrinsics::kMathMaxFloatFloat:
+ case Intrinsics::kMathMaxDoubleDouble:
+ case Intrinsics::kMathAbsInt:
+ case Intrinsics::kMathAbsLong:
+ case Intrinsics::kMathAbsFloat:
+ case Intrinsics::kMathAbsDouble:
+ AddError(
+ StringPrintf("The graph contains an instrinsic which should have been replaced in the "
+ "instruction builder: %s:%d in block %d.",
+ invoke->DebugName(),
+ invoke->GetId(),
+ invoke->GetBlock()->GetBlockId()));
+ break;
+ default:
+ break;
+ }
}
void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // We call VisitInvoke and not VisitInstruction to de-duplicate the always throwing code check.
+ // We call VisitInvoke and not VisitInstruction to de-duplicate the common code: always throwing
+ // and instrinsic checks.
VisitInvoke(invoke);
if (invoke->IsStaticWithExplicitClinitCheck()) {
@@ -944,8 +1040,7 @@ static bool IsSameSizeConstant(const HInstruction* insn1, const HInstruction* in
static bool IsConstantEquivalent(const HInstruction* insn1,
const HInstruction* insn2,
BitVector* visited) {
- if (insn1->IsPhi() &&
- insn1->AsPhi()->IsVRegEquivalentOf(insn2)) {
+ if (insn1->IsPhi() && insn1->AsPhi()->IsVRegEquivalentOf(insn2)) {
HConstInputsRef insn1_inputs = insn1->GetInputs();
HConstInputsRef insn2_inputs = insn2->GetInputs();
if (insn1_inputs.size() != insn2_inputs.size()) {
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index d6644f3b50..38e2d7ced9 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -22,7 +22,7 @@
#include "base/arena_bit_vector.h"
#include "base/bit_vector-inl.h"
#include "base/macros.h"
-#include "base/scoped_arena_allocator.h"
+#include "base/scoped_arena_containers.h"
#include "nodes.h"
namespace art HIDDEN {
@@ -35,12 +35,15 @@ class GraphChecker : public HGraphDelegateVisitor {
explicit GraphChecker(HGraph* graph,
CodeGenerator* codegen = nullptr,
const char* dump_prefix = "art::GraphChecker: ")
- : HGraphDelegateVisitor(graph),
- errors_(graph->GetAllocator()->Adapter(kArenaAllocGraphChecker)),
- dump_prefix_(dump_prefix),
- allocator_(graph->GetArenaStack()),
- seen_ids_(&allocator_, graph->GetCurrentInstructionId(), false, kArenaAllocGraphChecker),
- codegen_(codegen) {
+ : HGraphDelegateVisitor(graph),
+ errors_(graph->GetAllocator()->Adapter(kArenaAllocGraphChecker)),
+ dump_prefix_(dump_prefix),
+ allocator_(graph->GetArenaStack()),
+ seen_ids_(&allocator_, graph->GetCurrentInstructionId(), false, kArenaAllocGraphChecker),
+ uses_per_instruction_(allocator_.Adapter(kArenaAllocGraphChecker)),
+ instructions_per_block_(allocator_.Adapter(kArenaAllocGraphChecker)),
+ phis_per_block_(allocator_.Adapter(kArenaAllocGraphChecker)),
+ codegen_(codegen) {
seen_ids_.ClearAllBits();
}
@@ -107,7 +110,7 @@ class GraphChecker : public HGraphDelegateVisitor {
}
}
- protected:
+ private:
// Report a new error.
void AddError(const std::string& error) {
errors_.push_back(error);
@@ -118,17 +121,33 @@ class GraphChecker : public HGraphDelegateVisitor {
// Errors encountered while checking the graph.
ArenaVector<std::string> errors_;
- private:
void VisitReversePostOrder();
// Checks that the graph's flags are set correctly.
void CheckGraphFlags();
+ // Checks if `instruction` is in its block's instruction/phi list. To do so, it searches
+ // instructions_per_block_/phis_per_block_ which are set versions of that. If the set to
+ // check hasn't been populated yet, it does so now.
+ bool ContainedInItsBlockList(HInstruction* instruction);
+
// String displayed before dumped errors.
const char* const dump_prefix_;
ScopedArenaAllocator allocator_;
ArenaBitVector seen_ids_;
+ // As part of VisitInstruction, we verify that the instruction's input_record is present in the
+ // corresponding input's GetUses. If an instruction is used in many places (e.g. 200K+ uses), the
+ // linear search through GetUses is too slow. We can use bookkeeping to search in a set, instead
+ // of a list.
+ ScopedArenaSafeMap<int, ScopedArenaSet<const art::HUseListNode<art::HInstruction*>*>>
+ uses_per_instruction_;
+
+ // Extra bookkeeping to increase GraphChecker's speed while asking if an instruction is contained
+ // in a list of instructions/phis.
+ ScopedArenaSafeMap<HBasicBlock*, ScopedArenaHashSet<HInstruction*>> instructions_per_block_;
+ ScopedArenaSafeMap<HBasicBlock*, ScopedArenaHashSet<HInstruction*>> phis_per_block_;
+
// Used to access target information.
CodeGenerator* codegen_;
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 73bdd1e223..b7f7a0f550 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -115,7 +115,9 @@ std::ostream& operator<<(std::ostream& os, const StringList& list) {
}
}
-#ifndef ART_STATIC_LIBART_COMPILER
+// On target: load `libart-disassembler` only when required (to save on memory).
+// On host: `libart-disassembler` should be linked directly (either as a static or dynamic lib)
+#ifdef ART_TARGET
using create_disasm_prototype = Disassembler*(InstructionSet, DisassemblerOptions*);
#endif
@@ -125,7 +127,7 @@ class HGraphVisualizerDisassembler {
const uint8_t* base_address,
const uint8_t* end_address)
: instruction_set_(instruction_set), disassembler_(nullptr) {
-#ifndef ART_STATIC_LIBART_COMPILER
+#ifdef ART_TARGET
constexpr const char* libart_disassembler_so_name =
kIsDebugBuild ? "libartd-disassembler.so" : "libart-disassembler.so";
libart_disassembler_handle_ = dlopen(libart_disassembler_so_name, RTLD_NOW);
@@ -159,7 +161,7 @@ class HGraphVisualizerDisassembler {
~HGraphVisualizerDisassembler() {
// We need to call ~Disassembler() before we close the library.
disassembler_.reset();
-#ifndef ART_STATIC_LIBART_COMPILER
+#ifdef ART_TARGET
if (libart_disassembler_handle_ != nullptr) {
dlclose(libart_disassembler_handle_);
}
@@ -184,7 +186,7 @@ class HGraphVisualizerDisassembler {
InstructionSet instruction_set_;
std::unique_ptr<Disassembler> disassembler_;
-#ifndef ART_STATIC_LIBART_COMPILER
+#ifdef ART_TARGET
void* libart_disassembler_handle_;
#endif
};
@@ -494,6 +496,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
StartAttributeStream("bias") << condition->GetBias();
}
+ void VisitIf(HIf* if_instr) override {
+ StartAttributeStream("true_count") << if_instr->GetTrueCount();
+ StartAttributeStream("false_count") << if_instr->GetFalseCount();
+ }
+
void VisitInvoke(HInvoke* invoke) override {
StartAttributeStream("dex_file_index") << invoke->GetMethodReference().index;
ArtMethod* method = invoke->GetResolvedMethod();
@@ -538,13 +545,6 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
StartAttributeStream("invoke_type") << "InvokePolymorphic";
}
- void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* iget) override {
- StartAttributeStream("field_name") <<
- iget->GetFieldInfo().GetDexFile().PrettyField(iget->GetFieldInfo().GetFieldIndex(),
- /* with type */ false);
- StartAttributeStream("field_type") << iget->GetFieldType();
- }
-
void VisitInstanceFieldGet(HInstanceFieldGet* iget) override {
StartAttributeStream("field_name") <<
iget->GetFieldInfo().GetDexFile().PrettyField(iget->GetFieldInfo().GetFieldIndex(),
@@ -557,8 +557,6 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
iset->GetFieldInfo().GetDexFile().PrettyField(iset->GetFieldInfo().GetFieldIndex(),
/* with type */ false);
StartAttributeStream("field_type") << iset->GetFieldType();
- StartAttributeStream("predicated")
- << std::boolalpha << iset->GetIsPredicatedSet() << std::noboolalpha;
StartAttributeStream("write_barrier_kind") << iset->GetWriteBarrierKind();
}
@@ -610,6 +608,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
}
void VisitVecMemoryOperation(HVecMemoryOperation* vec_mem_operation) override {
+ VisitVecOperation(vec_mem_operation);
StartAttributeStream("alignment") << vec_mem_operation->GetAlignment().ToString();
}
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 9b78699ead..8568062933 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -255,8 +255,8 @@ bool InductionVarRange::CanGenerateRange(const HBasicBlock* context,
nullptr, // nothing generated yet
&stride_value,
needs_finite_test,
- needs_taken_test)
- && (stride_value == -1 ||
+ needs_taken_test) &&
+ (stride_value == -1 ||
stride_value == 0 ||
stride_value == 1); // avoid arithmetic wrap-around anomalies.
}
@@ -280,7 +280,10 @@ void InductionVarRange::GenerateRange(const HBasicBlock* context,
nullptr,
&stride_value,
&b1,
- &b2)) {
+ &b2) ||
+ (stride_value != -1 &&
+ stride_value != 0 &&
+ stride_value != 1)) {
LOG(FATAL) << "Failed precondition: CanGenerateRange()";
}
}
@@ -303,7 +306,10 @@ HInstruction* InductionVarRange::GenerateTakenTest(HInstruction* loop_control,
&taken_test,
&stride_value,
&b1,
- &b2)) {
+ &b2) ||
+ (stride_value != -1 &&
+ stride_value != 0 &&
+ stride_value != 1)) {
LOG(FATAL) << "Failed precondition: CanGenerateRange()";
}
return taken_test;
@@ -336,7 +342,8 @@ HInstruction* InductionVarRange::GenerateLastValue(HInstruction* instruction,
HInstruction* last_value = nullptr;
bool is_last_value = true;
int64_t stride_value = 0;
- bool b1, b2; // unused
+ bool needs_finite_test = false;
+ bool needs_taken_test = false;
if (!GenerateRangeOrLastValue(context,
instruction,
is_last_value,
@@ -346,8 +353,10 @@ HInstruction* InductionVarRange::GenerateLastValue(HInstruction* instruction,
&last_value,
nullptr,
&stride_value,
- &b1,
- &b2)) {
+ &needs_finite_test,
+ &needs_taken_test) ||
+ needs_finite_test ||
+ needs_taken_test) {
LOG(FATAL) << "Failed precondition: CanGenerateLastValue()";
}
return last_value;
@@ -1066,11 +1075,11 @@ bool InductionVarRange::GenerateRangeOrLastValue(const HBasicBlock* context,
if (*stride_value > 0) {
lower = nullptr;
return GenerateLastValueLinear(
- context, loop, info, trip, graph, block, /*is_min=*/false, upper);
+ context, loop, info, trip, graph, block, /*is_min=*/false, upper, needs_taken_test);
} else {
upper = nullptr;
return GenerateLastValueLinear(
- context, loop, info, trip, graph, block, /*is_min=*/true, lower);
+ context, loop, info, trip, graph, block, /*is_min=*/true, lower, needs_taken_test);
}
case HInductionVarAnalysis::kPolynomial:
return GenerateLastValuePolynomial(context, loop, info, trip, graph, block, lower);
@@ -1124,7 +1133,8 @@ bool InductionVarRange::GenerateLastValueLinear(const HBasicBlock* context,
HGraph* graph,
HBasicBlock* block,
bool is_min,
- /*out*/ HInstruction** result) const {
+ /*out*/ HInstruction** result,
+ /*inout*/ bool* needs_taken_test) const {
DataType::Type type = info->type;
// Avoid any narrowing linear induction or any type mismatch between the linear induction and the
// trip count expression.
@@ -1132,18 +1142,27 @@ bool InductionVarRange::GenerateLastValueLinear(const HBasicBlock* context,
return false;
}
- // Stride value must be a known constant that fits into int32.
+ // Stride value must be a known constant that fits into int32. The stride will be the `i` in `a *
+ // i + b`.
int64_t stride_value = 0;
if (!IsConstant(context, loop, info->op_a, kExact, &stride_value) ||
!CanLongValueFitIntoInt(stride_value)) {
return false;
}
- // We require `a` to be a constant value that didn't overflow.
+ // We require the calculation of `a` to not overflow.
const bool is_min_a = stride_value >= 0 ? is_min : !is_min;
- Value val_a = GetVal(context, loop, trip, trip, is_min_a);
+ HInstruction* opa;
HInstruction* opb;
- if (!IsConstantValue(val_a) ||
+ if (!GenerateCode(context,
+ loop,
+ trip,
+ trip,
+ graph,
+ block,
+ is_min_a,
+ &opa,
+ /*allow_potential_overflow=*/false) ||
!GenerateCode(context, loop, info->op_b, trip, graph, block, is_min, &opb)) {
return false;
}
@@ -1151,7 +1170,8 @@ bool InductionVarRange::GenerateLastValueLinear(const HBasicBlock* context,
if (graph != nullptr) {
ArenaAllocator* allocator = graph->GetAllocator();
HInstruction* oper;
- HInstruction* opa = graph->GetConstant(type, val_a.b_constant);
+ // Emit instructions for `a * i + b`. These are fine to overflow as they would have overflown
+ // also if we had kept the loop.
if (stride_value == 1) {
oper = new (allocator) HAdd(type, opa, opb);
} else if (stride_value == -1) {
@@ -1162,6 +1182,15 @@ bool InductionVarRange::GenerateLastValueLinear(const HBasicBlock* context,
}
*result = Insert(block, oper);
}
+
+ if (*needs_taken_test) {
+ if (TryGenerateTakenTest(context, loop, trip->op_b, graph, block, result, opb)) {
+ *needs_taken_test = false; // taken care of
+ } else {
+ return false;
+ }
+ }
+
return true;
}
@@ -1298,8 +1327,8 @@ bool InductionVarRange::GenerateLastValuePeriodic(const HBasicBlock* context,
HInductionVarAnalysis::InductionInfo* trip,
HGraph* graph,
HBasicBlock* block,
- /*out*/HInstruction** result,
- /*out*/bool* needs_taken_test) const {
+ /*out*/ HInstruction** result,
+ /*inout*/ bool* needs_taken_test) const {
DCHECK(info != nullptr);
DCHECK_EQ(info->induction_class, HInductionVarAnalysis::kPeriodic);
// Count period and detect all-invariants.
@@ -1339,6 +1368,15 @@ bool InductionVarRange::GenerateLastValuePeriodic(const HBasicBlock* context,
HInstruction* x = nullptr;
HInstruction* y = nullptr;
HInstruction* t = nullptr;
+
+ // Overflows when the stride is equal to `1` are fine since the periodicity is
+ // `2` and the lowest bit is the same. Similar with `-1`.
+ auto allow_potential_overflow = [&]() {
+ int64_t stride_value = 0;
+ return IsConstant(context, loop, trip->op_a->op_b, kExact, &stride_value) &&
+ (stride_value == 1 || stride_value == -1);
+ };
+
if (period == 2 &&
GenerateCode(context,
loop,
@@ -1363,7 +1401,8 @@ bool InductionVarRange::GenerateLastValuePeriodic(const HBasicBlock* context,
graph,
block,
/*is_min=*/ false,
- graph ? &t : nullptr)) {
+ graph ? &t : nullptr,
+ allow_potential_overflow())) {
// During actual code generation (graph != nullptr), generate is_even ? x : y.
if (graph != nullptr) {
DataType::Type type = trip->type;
@@ -1374,21 +1413,9 @@ bool InductionVarRange::GenerateLastValuePeriodic(const HBasicBlock* context,
Insert(block, new (allocator) HEqual(msk, graph->GetConstant(type, 0), kNoDexPc));
*result = Insert(block, new (graph->GetAllocator()) HSelect(is_even, x, y, kNoDexPc));
}
- // Guard select with taken test if needed.
+
if (*needs_taken_test) {
- HInstruction* is_taken = nullptr;
- if (GenerateCode(context,
- loop,
- trip->op_b,
- /*trip=*/ nullptr,
- graph,
- block,
- /*is_min=*/ false,
- graph ? &is_taken : nullptr)) {
- if (graph != nullptr) {
- ArenaAllocator* allocator = graph->GetAllocator();
- *result = Insert(block, new (allocator) HSelect(is_taken, *result, x, kNoDexPc));
- }
+ if (TryGenerateTakenTest(context, loop, trip->op_b, graph, block, result, x)) {
*needs_taken_test = false; // taken care of
} else {
return false;
@@ -1406,7 +1433,8 @@ bool InductionVarRange::GenerateCode(const HBasicBlock* context,
HGraph* graph, // when set, code is generated
HBasicBlock* block,
bool is_min,
- /*out*/HInstruction** result) const {
+ /*out*/ HInstruction** result,
+ bool allow_potential_overflow) const {
if (info != nullptr) {
// If during codegen, the result is not needed (nullptr), simply return success.
if (graph != nullptr && result == nullptr) {
@@ -1431,8 +1459,41 @@ bool InductionVarRange::GenerateCode(const HBasicBlock* context,
case HInductionVarAnalysis::kLE:
case HInductionVarAnalysis::kGT:
case HInductionVarAnalysis::kGE:
- if (GenerateCode(context, loop, info->op_a, trip, graph, block, is_min, &opa) &&
- GenerateCode(context, loop, info->op_b, trip, graph, block, is_min, &opb)) {
+ if (GenerateCode(context,
+ loop,
+ info->op_a,
+ trip,
+ graph,
+ block,
+ is_min,
+ &opa,
+ allow_potential_overflow) &&
+ GenerateCode(context,
+ loop,
+ info->op_b,
+ trip,
+ graph,
+ block,
+ is_min,
+ &opb,
+ allow_potential_overflow)) {
+ // Check for potentially invalid operations.
+ if (!allow_potential_overflow) {
+ switch (info->operation) {
+ case HInductionVarAnalysis::kAdd:
+ return TryGenerateAddWithoutOverflow(
+ context, loop, info, graph, opa, opb, result);
+ case HInductionVarAnalysis::kSub:
+ return TryGenerateSubWithoutOverflow(context, loop, info, graph, opa, result);
+ default:
+ // The rest of the operations are not relevant in the cases where
+ // `allow_potential_overflow` is false. Fall through to the allowed overflow
+ // case.
+ break;
+ }
+ }
+
+ // Overflows here are accepted.
if (graph != nullptr) {
HInstruction* operation = nullptr;
switch (info->operation) {
@@ -1465,7 +1526,15 @@ bool InductionVarRange::GenerateCode(const HBasicBlock* context,
}
break;
case HInductionVarAnalysis::kNeg:
- if (GenerateCode(context, loop, info->op_b, trip, graph, block, !is_min, &opb)) {
+ if (GenerateCode(context,
+ loop,
+ info->op_b,
+ trip,
+ graph,
+ block,
+ !is_min,
+ &opb,
+ allow_potential_overflow)) {
if (graph != nullptr) {
*result = Insert(block, new (graph->GetAllocator()) HNeg(type, opb));
}
@@ -1481,8 +1550,15 @@ bool InductionVarRange::GenerateCode(const HBasicBlock* context,
case HInductionVarAnalysis::kTripCountInLoopUnsafe:
if (UseFullTripCount(context, loop, is_min)) {
// Generate the full trip count (do not subtract 1 as we do in loop body).
- return GenerateCode(
- context, loop, info->op_a, trip, graph, block, /*is_min=*/ false, result);
+ return GenerateCode(context,
+ loop,
+ info->op_a,
+ trip,
+ graph,
+ block,
+ /*is_min=*/false,
+ result,
+ allow_potential_overflow);
}
FALLTHROUGH_INTENDED;
case HInductionVarAnalysis::kTripCountInBody:
@@ -1492,12 +1568,31 @@ bool InductionVarRange::GenerateCode(const HBasicBlock* context,
*result = graph->GetConstant(type, 0);
}
return true;
- } else if (IsContextInBody(context, loop)) {
- if (GenerateCode(context, loop, info->op_a, trip, graph, block, is_min, &opb)) {
+ } else if (IsContextInBody(context, loop) ||
+ (context == loop->GetHeader() && !allow_potential_overflow)) {
+ if (GenerateCode(context,
+ loop,
+ info->op_a,
+ trip,
+ graph,
+ block,
+ is_min,
+ &opb,
+ allow_potential_overflow)) {
if (graph != nullptr) {
- ArenaAllocator* allocator = graph->GetAllocator();
- *result =
- Insert(block, new (allocator) HSub(type, opb, graph->GetConstant(type, 1)));
+ if (IsContextInBody(context, loop)) {
+ ArenaAllocator* allocator = graph->GetAllocator();
+ *result =
+ Insert(block, new (allocator) HSub(type, opb, graph->GetConstant(type, 1)));
+ } else {
+ // We want to generate the full trip count since we want the last value. This
+ // will be combined with an `is_taken` test so we don't want to subtract one.
+ DCHECK(context == loop->GetHeader());
+ // TODO(solanes): Remove the !allow_potential_overflow restriction and allow
+ // other parts e.g. BCE to take advantage of this.
+ DCHECK(!allow_potential_overflow);
+ *result = opb;
+ }
}
return true;
}
@@ -1519,8 +1614,24 @@ bool InductionVarRange::GenerateCode(const HBasicBlock* context,
if (IsConstant(context, loop, info->op_a, kExact, &stride_value) &&
CanLongValueFitIntoInt(stride_value)) {
const bool is_min_a = stride_value >= 0 ? is_min : !is_min;
- if (GenerateCode(context, loop, trip, trip, graph, block, is_min_a, &opa) &&
- GenerateCode(context, loop, info->op_b, trip, graph, block, is_min, &opb)) {
+ if (GenerateCode(context,
+ loop,
+ trip,
+ trip,
+ graph,
+ block,
+ is_min_a,
+ &opa,
+ allow_potential_overflow) &&
+ GenerateCode(context,
+ loop,
+ info->op_b,
+ trip,
+ graph,
+ block,
+ is_min,
+ &opb,
+ allow_potential_overflow)) {
if (graph != nullptr) {
ArenaAllocator* allocator = graph->GetAllocator();
HInstruction* oper;
@@ -1562,6 +1673,119 @@ bool InductionVarRange::GenerateCode(const HBasicBlock* context,
return false;
}
+bool InductionVarRange::TryGenerateAddWithoutOverflow(const HBasicBlock* context,
+ const HLoopInformation* loop,
+ HInductionVarAnalysis::InductionInfo* info,
+ HGraph* graph,
+ /*in*/ HInstruction* opa,
+ /*in*/ HInstruction* opb,
+ /*out*/ HInstruction** result) const {
+ // Calculate `a + b` making sure we can't overflow.
+ int64_t val_a;
+ const bool a_is_const = IsConstant(context, loop, info->op_a, kExact, &val_a);
+ int64_t val_b;
+ const bool b_is_const = IsConstant(context, loop, info->op_b, kExact, &val_b);
+ if (a_is_const && b_is_const) {
+ // Calculate `a + b` and use that. Note that even when the values are known,
+ // their addition can still overflow.
+ Value add_val = AddValue(Value(val_a), Value(val_b));
+ if (add_val.is_known) {
+ DCHECK(IsConstantValue(add_val));
+ // Known value not overflowing.
+ if (graph != nullptr) {
+ *result = graph->GetConstant(info->type, add_val.b_constant);
+ }
+ return true;
+ }
+ }
+
+ // When `a` is `0`, we can just use `b`.
+ if (a_is_const && val_a == 0) {
+ if (graph != nullptr) {
+ *result = opb;
+ }
+ return true;
+ }
+
+ if (b_is_const && val_b == 0) {
+ if (graph != nullptr) {
+ *result = opa;
+ }
+ return true;
+ }
+
+ // Couldn't safely calculate the addition.
+ return false;
+}
+
+bool InductionVarRange::TryGenerateSubWithoutOverflow(const HBasicBlock* context,
+ const HLoopInformation* loop,
+ HInductionVarAnalysis::InductionInfo* info,
+ HGraph* graph,
+ /*in*/ HInstruction* opa,
+ /*out*/ HInstruction** result) const {
+ // Calculate `a - b` making sure we can't overflow.
+ int64_t val_b;
+ if (!IsConstant(context, loop, info->op_b, kExact, &val_b)) {
+ // If b is unknown, a - b can potentially overflow for any value of a since b
+ // can be Integer.MIN_VALUE.
+ return false;
+ }
+
+ int64_t val_a;
+ if (IsConstant(context, loop, info->op_a, kExact, &val_a)) {
+ // Calculate `a - b` and use that. Note that even when the values are known,
+ // their subtraction can still overflow.
+ Value sub_val = SubValue(Value(val_a), Value(val_b));
+ if (sub_val.is_known) {
+ DCHECK(IsConstantValue(sub_val));
+ // Known value not overflowing.
+ if (graph != nullptr) {
+ *result = graph->GetConstant(info->type, sub_val.b_constant);
+ }
+ return true;
+ }
+ }
+
+ // When `b` is `0`, we can just use `a`.
+ if (val_b == 0) {
+ if (graph != nullptr) {
+ *result = opa;
+ }
+ return true;
+ }
+
+ // Couldn't safely calculate the subtraction.
+ return false;
+}
+
+bool InductionVarRange::TryGenerateTakenTest(const HBasicBlock* context,
+ const HLoopInformation* loop,
+ HInductionVarAnalysis::InductionInfo* info,
+ HGraph* graph,
+ HBasicBlock* block,
+ /*inout*/ HInstruction** result,
+ /*inout*/ HInstruction* not_taken_result) const {
+ HInstruction* is_taken = nullptr;
+ if (GenerateCode(context,
+ loop,
+ info,
+ /*trip=*/nullptr,
+ graph,
+ block,
+ /*is_min=*/false,
+ graph != nullptr ? &is_taken : nullptr)) {
+ if (graph != nullptr) {
+ ArenaAllocator* allocator = graph->GetAllocator();
+ *result =
+ Insert(block, new (allocator) HSelect(is_taken, *result, not_taken_result, kNoDexPc));
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
void InductionVarRange::ReplaceInduction(HInductionVarAnalysis::InductionInfo* info,
HInstruction* fetch,
HInstruction* replacement) {
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 3e1212bec8..a81227b41b 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -325,7 +325,8 @@ class InductionVarRange {
HGraph* graph,
HBasicBlock* block,
bool is_min,
- /*out*/ HInstruction** result) const;
+ /*out*/ HInstruction** result,
+ /*inout*/ bool* needs_taken_test) const;
bool GenerateLastValuePolynomial(const HBasicBlock* context,
const HLoopInformation* loop,
@@ -357,8 +358,8 @@ class InductionVarRange {
HInductionVarAnalysis::InductionInfo* trip,
HGraph* graph,
HBasicBlock* block,
- /*out*/HInstruction** result,
- /*out*/ bool* needs_taken_test) const;
+ /*out*/ HInstruction** result,
+ /*inout*/ bool* needs_taken_test) const;
bool GenerateCode(const HBasicBlock* context,
const HLoopInformation* loop,
@@ -367,7 +368,34 @@ class InductionVarRange {
HGraph* graph,
HBasicBlock* block,
bool is_min,
- /*out*/ HInstruction** result) const;
+ /*out*/ HInstruction** result,
+ // TODO(solanes): Remove default value when all cases have been assessed.
+ bool allow_potential_overflow = true) const;
+
+ bool TryGenerateAddWithoutOverflow(const HBasicBlock* context,
+ const HLoopInformation* loop,
+ HInductionVarAnalysis::InductionInfo* info,
+ HGraph* graph,
+ /*in*/ HInstruction* opa,
+ /*in*/ HInstruction* opb,
+ /*out*/ HInstruction** result) const;
+
+ bool TryGenerateSubWithoutOverflow(const HBasicBlock* context,
+ const HLoopInformation* loop,
+ HInductionVarAnalysis::InductionInfo* info,
+ HGraph* graph,
+ /*in*/ HInstruction* opa,
+ /*out*/ HInstruction** result) const;
+
+ // Try to guard the taken test with an HSelect instruction. Returns true if it can generate the
+ // code, or false otherwise. The caller is responsible of updating `needs_taken_test`.
+ bool TryGenerateTakenTest(const HBasicBlock* context,
+ const HLoopInformation* loop,
+ HInductionVarAnalysis::InductionInfo* info,
+ HGraph* graph,
+ HBasicBlock* block,
+ /*inout*/ HInstruction** result,
+ /*inout*/ HInstruction* not_taken_result) const;
void ReplaceInduction(HInductionVarAnalysis::InductionInfo* info,
HInstruction* fetch,
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index d879897959..40fb0d6092 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -1061,11 +1061,13 @@ TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
range_.CanGenerateRange(exit->GetBlock(), exit, &needs_finite_test, &needs_taken_test));
EXPECT_FALSE(range_.CanGenerateLastValue(exit));
- // Last value (unsimplified).
+ // Last value (unsimplified). We expect Sub(1000, Neg(-1000)) which is equivalent to Sub(1000,
+ // 1000) aka 0.
HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_);
ASSERT_TRUE(last->IsSub());
ExpectInt(1000, last->InputAt(0));
- ExpectInt(1000, last->InputAt(1));
+ ASSERT_TRUE(last->InputAt(1)->IsNeg());
+ ExpectInt(-1000, last->InputAt(1)->AsNeg()->InputAt(0));
// Loop logic.
int64_t tc = 0;
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 5a4478dc14..35582297f3 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -144,11 +144,6 @@ bool HInliner::Run() {
}
bool did_inline = false;
- // The inliner is the only phase that sets invokes as `always throwing`, and since we only run the
- // inliner once per graph this value should always be false at the beginning of the inlining
- // phase. This is important since we use `HasAlwaysThrowingInvokes` to know whether the inliner
- // phase performed a relevant change in the graph.
- DCHECK(!graph_->HasAlwaysThrowingInvokes());
// Initialize the number of instructions for the method being compiled. Recursive calls
// to HInliner::Run have already updated the instruction count.
@@ -180,7 +175,7 @@ bool HInliner::Run() {
for (HBasicBlock* block : blocks) {
for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr;) {
HInstruction* next = instruction->GetNext();
- HInvoke* call = instruction->AsInvoke();
+ HInvoke* call = instruction->AsInvokeOrNull();
// As long as the call is not intrinsified, it is worth trying to inline.
if (call != nullptr && !codegen_->IsImplementedIntrinsic(call)) {
if (honor_noinline_directives) {
@@ -210,6 +205,16 @@ bool HInliner::Run() {
// We return true if we either inlined at least one method, or we marked one of our methods as
// always throwing.
+ // To check if we added an always throwing method we can either:
+ // 1) Pass a boolean throughout the pipeline and get an accurate result, or
+ // 2) Just check that the `HasAlwaysThrowingInvokes()` flag is true now. This is not 100%
+ // accurate but the only other part where we set `HasAlwaysThrowingInvokes` is constant
+ // folding the DivideUnsigned intrinsics for when the divisor is known to be 0. This case is
+ // rare enough that changing the pipeline for this is not worth it. In the case of the false
+ // positive (i.e. A) we didn't inline at all, B) the graph already had an always throwing
+ // invoke, and C) we didn't set any new always throwing invokes), we will be running constant
+ // folding, instruction simplifier, and dead code elimination one more time even though it
+ // shouldn't change things. There's no false negative case.
return did_inline || graph_->HasAlwaysThrowingInvokes();
}
@@ -223,7 +228,7 @@ static bool IsMethodOrDeclaringClassFinal(ArtMethod* method)
* the actual runtime target of an interface or virtual call.
* Return nullptr if the runtime target cannot be proven.
*/
-static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke)
+static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ReferenceTypeInfo info)
REQUIRES_SHARED(Locks::mutator_lock_) {
ArtMethod* resolved_method = invoke->GetResolvedMethod();
if (IsMethodOrDeclaringClassFinal(resolved_method)) {
@@ -231,20 +236,7 @@ static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke)
return resolved_method;
}
- HInstruction* receiver = invoke->InputAt(0);
- if (receiver->IsNullCheck()) {
- // Due to multiple levels of inlining within the same pass, it might be that
- // null check does not have the reference type of the actual receiver.
- receiver = receiver->InputAt(0);
- }
- ReferenceTypeInfo info = receiver->GetReferenceTypeInfo();
- DCHECK(info.IsValid()) << "Invalid RTI for " << receiver->DebugName();
- if (!info.IsExact()) {
- // We currently only support inlining with known receivers.
- // TODO: Remove this check, we should be able to inline final methods
- // on unknown receivers.
- return nullptr;
- } else if (info.GetTypeHandle()->IsInterface()) {
+ if (info.GetTypeHandle()->IsInterface()) {
// Statically knowing that the receiver has an interface type cannot
// help us find what is the target method.
return nullptr;
@@ -336,8 +328,8 @@ static dex::TypeIndex FindClassIndexIn(ObjPtr<mirror::Class> cls,
HInliner::InlineCacheType HInliner::GetInlineCacheType(
const StackHandleScope<InlineCache::kIndividualCacheSize>& classes) {
- DCHECK_EQ(classes.NumberOfReferences(), InlineCache::kIndividualCacheSize);
- uint8_t number_of_types = InlineCache::kIndividualCacheSize - classes.RemainingSlots();
+ DCHECK_EQ(classes.Capacity(), InlineCache::kIndividualCacheSize);
+ uint8_t number_of_types = classes.Size();
if (number_of_types == 0) {
return kInlineCacheUninitialized;
} else if (number_of_types == 1) {
@@ -472,15 +464,31 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
return false;
}
- ArtMethod* actual_method = invoke_instruction->IsInvokeStaticOrDirect()
- ? invoke_instruction->GetResolvedMethod()
- : FindVirtualOrInterfaceTarget(invoke_instruction);
+ ArtMethod* actual_method = nullptr;
+ ReferenceTypeInfo receiver_info = ReferenceTypeInfo::CreateInvalid();
+ if (invoke_instruction->GetInvokeType() == kStatic) {
+ actual_method = invoke_instruction->GetResolvedMethod();
+ } else {
+ HInstruction* receiver = invoke_instruction->InputAt(0);
+ while (receiver->IsNullCheck()) {
+ // Due to multiple levels of inlining within the same pass, it might be that
+ // null check does not have the reference type of the actual receiver.
+ receiver = receiver->InputAt(0);
+ }
+ receiver_info = receiver->GetReferenceTypeInfo();
+ DCHECK(receiver_info.IsValid()) << "Invalid RTI for " << receiver->DebugName();
+ if (invoke_instruction->IsInvokeStaticOrDirect()) {
+ actual_method = invoke_instruction->GetResolvedMethod();
+ } else {
+ actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, receiver_info);
+ }
+ }
if (actual_method != nullptr) {
// Single target.
bool result = TryInlineAndReplace(invoke_instruction,
actual_method,
- ReferenceTypeInfo::CreateInvalid(),
+ receiver_info,
/* do_rtp= */ true,
/* is_speculative= */ false);
if (result) {
@@ -541,9 +549,10 @@ bool HInliner::TryInlineFromCHA(HInvoke* invoke_instruction) {
uint32_t dex_pc = invoke_instruction->GetDexPc();
HInstruction* cursor = invoke_instruction->GetPrevious();
HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
+ Handle<mirror::Class> cls = graph_->GetHandleCache()->NewHandle(method->GetDeclaringClass());
if (!TryInlineAndReplace(invoke_instruction,
method,
- ReferenceTypeInfo::CreateInvalid(),
+ ReferenceTypeInfo::Create(cls),
/* do_rtp= */ true,
/* is_speculative= */ true)) {
return false;
@@ -660,17 +669,23 @@ HInliner::InlineCacheType HInliner::GetInlineCacheJIT(
return kInlineCacheNoData;
}
- Runtime::Current()->GetJit()->GetCodeCache()->CopyInlineCacheInto(
- *profiling_info->GetInlineCache(invoke_instruction->GetDexPc()),
- classes);
+ InlineCache* cache = profiling_info->GetInlineCache(invoke_instruction->GetDexPc());
+ if (cache == nullptr) {
+ // This shouldn't happen, but we don't guarantee that method resolution
+ // between baseline compilation and optimizing compilation is identical. Be robust,
+ // warn about it, and return that we don't have any inline cache data.
+ LOG(WARNING) << "No inline cache found for " << caller->PrettyMethod();
+ return kInlineCacheNoData;
+ }
+ Runtime::Current()->GetJit()->GetCodeCache()->CopyInlineCacheInto(*cache, classes);
return GetInlineCacheType(*classes);
}
HInliner::InlineCacheType HInliner::GetInlineCacheAOT(
HInvoke* invoke_instruction,
/*out*/StackHandleScope<InlineCache::kIndividualCacheSize>* classes) {
- DCHECK_EQ(classes->NumberOfReferences(), InlineCache::kIndividualCacheSize);
- DCHECK_EQ(classes->RemainingSlots(), InlineCache::kIndividualCacheSize);
+ DCHECK_EQ(classes->Capacity(), InlineCache::kIndividualCacheSize);
+ DCHECK_EQ(classes->Size(), 0u);
const ProfileCompilationInfo* pci = codegen_->GetCompilerOptions().GetProfileCompilationInfo();
if (pci == nullptr) {
@@ -702,19 +717,21 @@ HInliner::InlineCacheType HInliner::GetInlineCacheAOT(
// Walk over the class descriptors and look up the actual classes.
// If we cannot find a type we return kInlineCacheMissingTypes.
ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+ Thread* self = Thread::Current();
for (const dex::TypeIndex& type_index : dex_pc_data.classes) {
const DexFile* dex_file = caller_compilation_unit_.GetDexFile();
const char* descriptor = pci->GetTypeDescriptor(dex_file, type_index);
- ObjPtr<mirror::ClassLoader> class_loader = caller_compilation_unit_.GetClassLoader().Get();
- ObjPtr<mirror::Class> clazz = class_linker->LookupResolvedType(descriptor, class_loader);
+ ObjPtr<mirror::Class> clazz =
+ class_linker->FindClass(self, descriptor, caller_compilation_unit_.GetClassLoader());
if (clazz == nullptr) {
+ self->ClearException(); // Clean up the exception left by type resolution.
VLOG(compiler) << "Could not find class from inline cache in AOT mode "
<< invoke_instruction->GetMethodReference().PrettyMethod()
<< " : "
<< descriptor;
return kInlineCacheMissingTypes;
}
- DCHECK_NE(classes->RemainingSlots(), 0u);
+ DCHECK_LT(classes->Size(), classes->Capacity());
classes->NewHandle(clazz);
}
@@ -965,8 +982,8 @@ bool HInliner::TryInlinePolymorphicCall(
bool all_targets_inlined = true;
bool one_target_inlined = false;
- DCHECK_EQ(classes.NumberOfReferences(), InlineCache::kIndividualCacheSize);
- uint8_t number_of_types = InlineCache::kIndividualCacheSize - classes.RemainingSlots();
+ DCHECK_EQ(classes.Capacity(), InlineCache::kIndividualCacheSize);
+ uint8_t number_of_types = classes.Size();
for (size_t i = 0; i != number_of_types; ++i) {
DCHECK(classes.GetReference(i) != nullptr);
Handle<mirror::Class> handle =
@@ -1152,8 +1169,8 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
// Check whether we are actually calling the same method among
// the different types seen.
- DCHECK_EQ(classes.NumberOfReferences(), InlineCache::kIndividualCacheSize);
- uint8_t number_of_types = InlineCache::kIndividualCacheSize - classes.RemainingSlots();
+ DCHECK_EQ(classes.Capacity(), InlineCache::kIndividualCacheSize);
+ uint8_t number_of_types = classes.Size();
for (size_t i = 0; i != number_of_types; ++i) {
DCHECK(classes.GetReference(i) != nullptr);
ArtMethod* new_method = nullptr;
@@ -1184,9 +1201,11 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
HInstruction* return_replacement = nullptr;
+ Handle<mirror::Class> cls =
+ graph_->GetHandleCache()->NewHandle(actual_method->GetDeclaringClass());
if (!TryBuildAndInline(invoke_instruction,
actual_method,
- ReferenceTypeInfo::CreateInvalid(),
+ ReferenceTypeInfo::Create(cls),
&return_replacement,
/* is_speculative= */ true)) {
return false;
@@ -2062,7 +2081,8 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
ReferenceTypeInfo receiver_type,
HInstruction** return_replacement,
bool is_speculative) {
- DCHECK(!(resolved_method->IsStatic() && receiver_type.IsValid()));
+ DCHECK_IMPLIES(resolved_method->IsStatic(), !receiver_type.IsValid());
+ DCHECK_IMPLIES(!resolved_method->IsStatic(), receiver_type.IsValid());
const dex::CodeItem* code_item = resolved_method->GetCodeItem();
const DexFile& callee_dex_file = *resolved_method->GetDexFile();
uint32_t method_index = resolved_method->GetDexMethodIndex();
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index fee9091145..fe0f3fe319 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -665,22 +665,31 @@ void HInstructionBuilder::InitializeParameters() {
}
}
-template<typename T>
-void HInstructionBuilder::If_22t(const Instruction& instruction, uint32_t dex_pc) {
- HInstruction* first = LoadLocal(instruction.VRegA(), DataType::Type::kInt32);
- HInstruction* second = LoadLocal(instruction.VRegB(), DataType::Type::kInt32);
- T* comparison = new (allocator_) T(first, second, dex_pc);
- AppendInstruction(comparison);
- AppendInstruction(new (allocator_) HIf(comparison, dex_pc));
- current_block_ = nullptr;
-}
-
-template<typename T>
-void HInstructionBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) {
+template<typename T, bool kCompareWithZero>
+void HInstructionBuilder::If_21_22t(const Instruction& instruction, uint32_t dex_pc) {
HInstruction* value = LoadLocal(instruction.VRegA(), DataType::Type::kInt32);
- T* comparison = new (allocator_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc);
+ T* comparison = nullptr;
+ if (kCompareWithZero) {
+ comparison = new (allocator_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc);
+ } else {
+ HInstruction* second = LoadLocal(instruction.VRegB(), DataType::Type::kInt32);
+ comparison = new (allocator_) T(value, second, dex_pc);
+ }
AppendInstruction(comparison);
- AppendInstruction(new (allocator_) HIf(comparison, dex_pc));
+ HIf* if_instr = new (allocator_) HIf(comparison, dex_pc);
+
+ ProfilingInfo* info = graph_->GetProfilingInfo();
+ if (info != nullptr && !graph_->IsCompilingBaseline()) {
+ BranchCache* cache = info->GetBranchCache(dex_pc);
+ if (cache != nullptr) {
+ if_instr->SetTrueCount(cache->GetTrue());
+ if_instr->SetFalseCount(cache->GetFalse());
+ }
+ }
+
+ // Append after setting true/false count, so that the builder knows if the
+ // instruction needs an environment.
+ AppendInstruction(if_instr);
current_block_ = nullptr;
}
@@ -1364,8 +1373,7 @@ bool HInstructionBuilder::BuildInvokePolymorphic(uint32_t dex_pc,
method_reference,
resolved_method,
resolved_method_reference,
- proto_idx,
- !graph_->IsDebuggable());
+ proto_idx);
if (!HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false)) {
return false;
}
@@ -2365,9 +2373,9 @@ void HInstructionBuilder::BuildCheckedDivRem(uint16_t out_vreg,
second = LoadLocal(second_vreg_or_constant, type);
}
- if (!second_is_constant
- || (type == DataType::Type::kInt32 && second->AsIntConstant()->GetValue() == 0)
- || (type == DataType::Type::kInt64 && second->AsLongConstant()->GetValue() == 0)) {
+ if (!second_is_constant ||
+ (type == DataType::Type::kInt32 && second->AsIntConstant()->GetValue() == 0) ||
+ (type == DataType::Type::kInt64 && second->AsLongConstant()->GetValue() == 0)) {
second = new (allocator_) HDivZeroCheck(second, dex_pc);
AppendInstruction(second);
}
@@ -2691,6 +2699,9 @@ void HInstructionBuilder::BuildLoadMethodType(dex::ProtoIndex proto_index, uint3
const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
HLoadMethodType* load_method_type =
new (allocator_) HLoadMethodType(graph_->GetCurrentMethod(), proto_index, dex_file, dex_pc);
+ if (!code_generator_->GetCompilerOptions().IsJitCompiler()) {
+ load_method_type->SetLoadKind(HLoadMethodType::LoadKind::kBssEntry);
+ }
AppendInstruction(load_method_type);
}
@@ -2880,8 +2891,12 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
}
#define IF_XX(comparison, cond) \
- case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_pc); break; \
- case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_pc); break
+ case Instruction::IF_##cond: \
+ If_21_22t<comparison, /* kCompareWithZero= */ false>(instruction, dex_pc); \
+ break; \
+ case Instruction::IF_##cond##Z: \
+ If_21_22t<comparison, /* kCompareWithZero= */ true>(instruction, dex_pc); \
+ break;
IF_XX(HEqual, EQ);
IF_XX(HNotEqual, NE);
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 3d65d8fb54..5c165d7bf9 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -116,8 +116,8 @@ class HInstructionBuilder : public ValueObject {
template<typename T>
void Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc);
- template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_pc);
- template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_pc);
+ template<typename T, bool kCompareWithZero>
+ void If_21_22t(const Instruction& instruction, uint32_t dex_pc);
void Conversion_12x(const Instruction& instruction,
DataType::Type input_type,
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 0c2fd5de56..5d552411db 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -22,6 +22,7 @@
#include "data_type-inl.h"
#include "driver/compiler_options.h"
#include "escape.h"
+#include "intrinsic_objects.h"
#include "intrinsics.h"
#include "intrinsics_utils.h"
#include "mirror/class-inl.h"
@@ -30,6 +31,7 @@
#include "scoped_thread_state_change-inl.h"
#include "sharpening.h"
#include "string_builder_append.h"
+#include "well_known_classes.h"
namespace art HIDDEN {
@@ -113,7 +115,7 @@ class InstructionSimplifierVisitor final : public HGraphDelegateVisitor {
void VisitInvoke(HInvoke* invoke) override;
void VisitDeoptimize(HDeoptimize* deoptimize) override;
void VisitVecMul(HVecMul* instruction) override;
- void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* instruction) override;
+ void SimplifyBoxUnbox(HInvoke* instruction, ArtField* field, DataType::Type type);
void SimplifySystemArrayCopy(HInvoke* invoke);
void SimplifyStringEquals(HInvoke* invoke);
void SimplifyFP2Int(HInvoke* invoke);
@@ -947,67 +949,6 @@ static HInstruction* AllowInMinMax(IfCondition cmp,
return nullptr;
}
-// TODO This should really be done by LSE itself since there is significantly
-// more information available there.
-void InstructionSimplifierVisitor::VisitPredicatedInstanceFieldGet(
- HPredicatedInstanceFieldGet* pred_get) {
- HInstruction* target = pred_get->GetTarget();
- HInstruction* default_val = pred_get->GetDefaultValue();
- if (target->IsNullConstant()) {
- pred_get->ReplaceWith(default_val);
- pred_get->GetBlock()->RemoveInstruction(pred_get);
- RecordSimplification();
- return;
- } else if (!target->CanBeNull()) {
- HInstruction* replace_with = new (GetGraph()->GetAllocator())
- HInstanceFieldGet(pred_get->GetTarget(),
- pred_get->GetFieldInfo().GetField(),
- pred_get->GetFieldType(),
- pred_get->GetFieldOffset(),
- pred_get->IsVolatile(),
- pred_get->GetFieldInfo().GetFieldIndex(),
- pred_get->GetFieldInfo().GetDeclaringClassDefIndex(),
- pred_get->GetFieldInfo().GetDexFile(),
- pred_get->GetDexPc());
- if (pred_get->GetType() == DataType::Type::kReference) {
- replace_with->SetReferenceTypeInfoIfValid(pred_get->GetReferenceTypeInfo());
- }
- pred_get->GetBlock()->InsertInstructionBefore(replace_with, pred_get);
- pred_get->ReplaceWith(replace_with);
- pred_get->GetBlock()->RemoveInstruction(pred_get);
- RecordSimplification();
- return;
- }
- if (!target->IsPhi() || !default_val->IsPhi() || default_val->GetBlock() != target->GetBlock()) {
- // The iget has already been reduced. We know the target or the phi
- // selection will differ between the target and default.
- return;
- }
- DCHECK_EQ(default_val->InputCount(), target->InputCount());
- // In the same block both phis only one non-null we can remove the phi from default_val.
- HInstruction* single_value = nullptr;
- auto inputs = target->GetInputs();
- for (auto [input, idx] : ZipCount(MakeIterationRange(inputs))) {
- if (input->CanBeNull()) {
- if (single_value == nullptr) {
- single_value = default_val->InputAt(idx);
- } else if (single_value != default_val->InputAt(idx) &&
- !single_value->Equals(default_val->InputAt(idx))) {
- // Multiple values are associated with potential nulls, can't combine.
- return;
- }
- }
- }
- DCHECK(single_value != nullptr) << "All target values are non-null but the phi as a whole still"
- << " can be null? This should not be possible." << std::endl
- << pred_get->DumpWithArgs();
- if (single_value->StrictlyDominates(pred_get)) {
- // Combine all the maybe null values into one.
- pred_get->ReplaceInput(single_value, 0);
- RecordSimplification();
- }
-}
-
void InstructionSimplifierVisitor::VisitSelect(HSelect* select) {
HInstruction* replace_with = nullptr;
HInstruction* condition = select->GetCondition();
@@ -1050,51 +991,60 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) {
HInstruction* b = condition->InputAt(1);
DataType::Type t_type = true_value->GetType();
DataType::Type f_type = false_value->GetType();
- // Here we have a <cmp> b ? true_value : false_value.
- // Test if both values are compatible integral types (resulting MIN/MAX/ABS
- // type will be int or long, like the condition). Replacements are general,
- // but assume conditions prefer constants on the right.
if (DataType::IsIntegralType(t_type) && DataType::Kind(t_type) == DataType::Kind(f_type)) {
- // Allow a < 100 ? max(a, -100) : ..
- // or a > -100 ? min(a, 100) : ..
- // to use min/max instead of a to detect nested min/max expressions.
- HInstruction* new_a = AllowInMinMax(cmp, a, b, true_value);
- if (new_a != nullptr) {
- a = new_a;
- }
- // Try to replace typical integral MIN/MAX/ABS constructs.
- if ((cmp == kCondLT || cmp == kCondLE || cmp == kCondGT || cmp == kCondGE) &&
- ((a == true_value && b == false_value) ||
- (b == true_value && a == false_value))) {
- // Found a < b ? a : b (MIN) or a < b ? b : a (MAX)
- // or a > b ? a : b (MAX) or a > b ? b : a (MIN).
- bool is_min = (cmp == kCondLT || cmp == kCondLE) == (a == true_value);
- replace_with = NewIntegralMinMax(GetGraph()->GetAllocator(), a, b, select, is_min);
- } else if (((cmp == kCondLT || cmp == kCondLE) && true_value->IsNeg()) ||
- ((cmp == kCondGT || cmp == kCondGE) && false_value->IsNeg())) {
- bool negLeft = (cmp == kCondLT || cmp == kCondLE);
- HInstruction* the_negated = negLeft ? true_value->InputAt(0) : false_value->InputAt(0);
- HInstruction* not_negated = negLeft ? false_value : true_value;
- if (a == the_negated && a == not_negated && IsInt64Value(b, 0)) {
- // Found a < 0 ? -a : a
- // or a > 0 ? a : -a
- // which can be replaced by ABS(a).
- replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), a, select);
+ if (cmp == kCondEQ || cmp == kCondNE) {
+ // Turns
+ // * Select[a, b, EQ(a,b)] / Select[a, b, EQ(b,a)] into a
+ // * Select[a, b, NE(a,b)] / Select[a, b, NE(b,a)] into b
+ // Note that the order in EQ/NE is irrelevant.
+ if ((a == true_value && b == false_value) || (a == false_value && b == true_value)) {
+ replace_with = cmp == kCondEQ ? false_value : true_value;
+ }
+ } else {
+ // Test if both values are compatible integral types (resulting MIN/MAX/ABS
+ // type will be int or long, like the condition). Replacements are general,
+ // but assume conditions prefer constants on the right.
+
+ // Allow a < 100 ? max(a, -100) : ..
+ // or a > -100 ? min(a, 100) : ..
+ // to use min/max instead of a to detect nested min/max expressions.
+ HInstruction* new_a = AllowInMinMax(cmp, a, b, true_value);
+ if (new_a != nullptr) {
+ a = new_a;
}
- } else if (true_value->IsSub() && false_value->IsSub()) {
- HInstruction* true_sub1 = true_value->InputAt(0);
- HInstruction* true_sub2 = true_value->InputAt(1);
- HInstruction* false_sub1 = false_value->InputAt(0);
- HInstruction* false_sub2 = false_value->InputAt(1);
- if ((((cmp == kCondGT || cmp == kCondGE) &&
- (a == true_sub1 && b == true_sub2 && a == false_sub2 && b == false_sub1)) ||
- ((cmp == kCondLT || cmp == kCondLE) &&
- (a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) &&
- AreLowerPrecisionArgs(t_type, a, b)) {
- // Found a > b ? a - b : b - a
- // or a < b ? b - a : a - b
- // which can be replaced by ABS(a - b) for lower precision operands a, b.
- replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select);
+ // Try to replace typical integral MIN/MAX/ABS constructs.
+ if ((cmp == kCondLT || cmp == kCondLE || cmp == kCondGT || cmp == kCondGE) &&
+ ((a == true_value && b == false_value) || (b == true_value && a == false_value))) {
+ // Found a < b ? a : b (MIN) or a < b ? b : a (MAX)
+ // or a > b ? a : b (MAX) or a > b ? b : a (MIN).
+ bool is_min = (cmp == kCondLT || cmp == kCondLE) == (a == true_value);
+ replace_with = NewIntegralMinMax(GetGraph()->GetAllocator(), a, b, select, is_min);
+ } else if (((cmp == kCondLT || cmp == kCondLE) && true_value->IsNeg()) ||
+ ((cmp == kCondGT || cmp == kCondGE) && false_value->IsNeg())) {
+ bool negLeft = (cmp == kCondLT || cmp == kCondLE);
+ HInstruction* the_negated = negLeft ? true_value->InputAt(0) : false_value->InputAt(0);
+ HInstruction* not_negated = negLeft ? false_value : true_value;
+ if (a == the_negated && a == not_negated && IsInt64Value(b, 0)) {
+ // Found a < 0 ? -a : a
+ // or a > 0 ? a : -a
+ // which can be replaced by ABS(a).
+ replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), a, select);
+ }
+ } else if (true_value->IsSub() && false_value->IsSub()) {
+ HInstruction* true_sub1 = true_value->InputAt(0);
+ HInstruction* true_sub2 = true_value->InputAt(1);
+ HInstruction* false_sub1 = false_value->InputAt(0);
+ HInstruction* false_sub2 = false_value->InputAt(1);
+ if ((((cmp == kCondGT || cmp == kCondGE) &&
+ (a == true_sub1 && b == true_sub2 && a == false_sub2 && b == false_sub1)) ||
+ ((cmp == kCondLT || cmp == kCondLE) &&
+ (a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) &&
+ AreLowerPrecisionArgs(t_type, a, b)) {
+ // Found a > b ? a - b : b - a
+ // or a < b ? b - a : a - b
+ // which can be replaced by ABS(a - b) for lower precision operands a, b.
+ replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select);
+ }
}
}
}
@@ -1222,9 +1172,6 @@ static inline bool TryReplaceFieldOrArrayGetType(HInstruction* maybe_get, DataTy
if (maybe_get->IsInstanceFieldGet()) {
maybe_get->AsInstanceFieldGet()->SetType(new_type);
return true;
- } else if (maybe_get->IsPredicatedInstanceFieldGet()) {
- maybe_get->AsPredicatedInstanceFieldGet()->SetType(new_type);
- return true;
} else if (maybe_get->IsStaticFieldGet()) {
maybe_get->AsStaticFieldGet()->SetType(new_type);
return true;
@@ -1456,24 +1403,26 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
}
}
- HNeg* neg = left_is_neg ? left->AsNeg() : right->AsNeg();
- if (left_is_neg != right_is_neg && neg->HasOnlyOneNonEnvironmentUse()) {
- // Replace code looking like
- // NEG tmp, b
- // ADD dst, a, tmp
- // with
- // SUB dst, a, b
- // We do not perform the optimization if the input negation has environment
- // uses or multiple non-environment uses as it could lead to worse code. In
- // particular, we do not want the live range of `b` to be extended if we are
- // not sure the initial 'NEG' instruction can be removed.
- HInstruction* other = left_is_neg ? right : left;
- HSub* sub =
- new(GetGraph()->GetAllocator()) HSub(instruction->GetType(), other, neg->GetInput());
- instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub);
- RecordSimplification();
- neg->GetBlock()->RemoveInstruction(neg);
- return;
+ if (left_is_neg != right_is_neg) {
+ HNeg* neg = left_is_neg ? left->AsNeg() : right->AsNeg();
+ if (neg->HasOnlyOneNonEnvironmentUse()) {
+ // Replace code looking like
+ // NEG tmp, b
+ // ADD dst, a, tmp
+ // with
+ // SUB dst, a, b
+ // We do not perform the optimization if the input negation has environment
+ // uses or multiple non-environment uses as it could lead to worse code. In
+ // particular, we do not want the live range of `b` to be extended if we are
+ // not sure the initial 'NEG' instruction can be removed.
+ HInstruction* other = left_is_neg ? right : left;
+ HSub* sub =
+ new(GetGraph()->GetAllocator()) HSub(instruction->GetType(), other, neg->GetInput());
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub);
+ RecordSimplification();
+ neg->GetBlock()->RemoveInstruction(neg);
+ return;
+ }
}
if (TryReplaceWithRotate(instruction)) {
@@ -1676,7 +1625,7 @@ static bool RecognizeAndSimplifyClassCheck(HCondition* condition) {
HInstruction* input_two = condition->InputAt(1);
HLoadClass* load_class = input_one->IsLoadClass()
? input_one->AsLoadClass()
- : input_two->AsLoadClass();
+ : input_two->AsLoadClassOrNull();
if (load_class == nullptr) {
return false;
}
@@ -1688,8 +1637,8 @@ static bool RecognizeAndSimplifyClassCheck(HCondition* condition) {
}
HInstanceFieldGet* field_get = (load_class == input_one)
- ? input_two->AsInstanceFieldGet()
- : input_one->AsInstanceFieldGet();
+ ? input_two->AsInstanceFieldGetOrNull()
+ : input_one->AsInstanceFieldGetOrNull();
if (field_get == nullptr) {
return false;
}
@@ -2240,6 +2189,7 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) {
}
if (left->IsAdd()) {
+ // Cases (x + y) - y = x, and (x + y) - x = y.
// Replace code patterns looking like
// ADD dst1, x, y ADD dst1, x, y
// SUB dst2, dst1, y SUB dst2, dst1, x
@@ -2248,14 +2198,75 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) {
// SUB instruction is not needed in this case, we may use
// one of inputs of ADD instead.
// It is applicable to integral types only.
+ HAdd* add = left->AsAdd();
DCHECK(DataType::IsIntegralType(type));
- if (left->InputAt(1) == right) {
- instruction->ReplaceWith(left->InputAt(0));
+ if (add->GetRight() == right) {
+ instruction->ReplaceWith(add->GetLeft());
RecordSimplification();
instruction->GetBlock()->RemoveInstruction(instruction);
return;
- } else if (left->InputAt(0) == right) {
- instruction->ReplaceWith(left->InputAt(1));
+ } else if (add->GetLeft() == right) {
+ instruction->ReplaceWith(add->GetRight());
+ RecordSimplification();
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ }
+ } else if (right->IsAdd()) {
+ // Cases y - (x + y) = -x, and x - (x + y) = -y.
+ // Replace code patterns looking like
+ // ADD dst1, x, y ADD dst1, x, y
+ // SUB dst2, y, dst1 SUB dst2, x, dst1
+ // with
+ // ADD dst1, x, y ADD dst1, x, y
+ // NEG x NEG y
+ // SUB instruction is not needed in this case, we may use
+ // one of inputs of ADD instead with a NEG.
+ // It is applicable to integral types only.
+ HAdd* add = right->AsAdd();
+ DCHECK(DataType::IsIntegralType(type));
+ if (add->GetRight() == left) {
+ HNeg* neg = new (GetGraph()->GetAllocator()) HNeg(add->GetType(), add->GetLeft());
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, neg);
+ RecordSimplification();
+ return;
+ } else if (add->GetLeft() == left) {
+ HNeg* neg = new (GetGraph()->GetAllocator()) HNeg(add->GetType(), add->GetRight());
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, neg);
+ RecordSimplification();
+ return;
+ }
+ } else if (left->IsSub()) {
+ // Case (x - y) - x = -y.
+ // Replace code patterns looking like
+ // SUB dst1, x, y
+ // SUB dst2, dst1, x
+ // with
+ // SUB dst1, x, y
+ // NEG y
+ // The second SUB is not needed in this case, we may use the second input of the first SUB
+ // instead with a NEG.
+ // It is applicable to integral types only.
+ HSub* sub = left->AsSub();
+ DCHECK(DataType::IsIntegralType(type));
+ if (sub->GetLeft() == right) {
+ HNeg* neg = new (GetGraph()->GetAllocator()) HNeg(sub->GetType(), sub->GetRight());
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, neg);
+ RecordSimplification();
+ return;
+ }
+ } else if (right->IsSub()) {
+ // Case x - (x - y) = y.
+ // Replace code patterns looking like
+ // SUB dst1, x, y
+ // SUB dst2, x, dst1
+ // with
+ // SUB dst1, x, y
+ // The second SUB is not needed in this case, we may use the second input of the first SUB.
+ // It is applicable to integral types only.
+ HSub* sub = right->AsSub();
+ DCHECK(DataType::IsIntegralType(type));
+ if (sub->GetLeft() == left) {
+ instruction->ReplaceWith(sub->GetRight());
RecordSimplification();
instruction->GetBlock()->RemoveInstruction(instruction);
return;
@@ -2334,6 +2345,29 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) {
TryHandleAssociativeAndCommutativeOperation(instruction);
}
+void InstructionSimplifierVisitor::SimplifyBoxUnbox(
+ HInvoke* instruction, ArtField* field, DataType::Type type) {
+ DCHECK(instruction->GetIntrinsic() == Intrinsics::kByteValueOf ||
+ instruction->GetIntrinsic() == Intrinsics::kShortValueOf ||
+ instruction->GetIntrinsic() == Intrinsics::kCharacterValueOf ||
+ instruction->GetIntrinsic() == Intrinsics::kIntegerValueOf);
+ const HUseList<HInstruction*>& uses = instruction->GetUses();
+ for (auto it = uses.begin(), end = uses.end(); it != end;) {
+ HInstruction* user = it->GetUser();
+ ++it; // Increment the iterator before we potentially remove the node from the list.
+ if (user->IsInstanceFieldGet() &&
+ user->AsInstanceFieldGet()->GetFieldInfo().GetField() == field &&
+ // Note: Due to other simplifications, we may have an `HInstanceFieldGet` with
+ // a different type (Int8 vs. Uint8, Int16 vs. Uint16) for the same field.
+ // Do not optimize that case for now. (We would need to insert a `HTypeConversion`.)
+ user->GetType() == type) {
+ user->ReplaceWith(instruction->InputAt(0));
+ RecordSimplification();
+ // Do not remove `user` while we're iterating over the block's instructions. Let DCE do it.
+ }
+ }
+}
+
void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) {
HInstruction* argument = instruction->InputAt(1);
HInstruction* receiver = instruction->InputAt(0);
@@ -2372,7 +2406,9 @@ static bool IsArrayLengthOf(HInstruction* potential_length, HInstruction* potent
void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) {
HInstruction* source = instruction->InputAt(0);
+ HInstruction* source_pos = instruction->InputAt(1);
HInstruction* destination = instruction->InputAt(2);
+ HInstruction* destination_pos = instruction->InputAt(3);
HInstruction* count = instruction->InputAt(4);
SystemArrayCopyOptimizations optimizations(instruction);
if (CanEnsureNotNullAt(source, instruction)) {
@@ -2385,6 +2421,10 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction)
optimizations.SetDestinationIsSource();
}
+ if (source_pos == destination_pos) {
+ optimizations.SetSourcePositionIsDestinationPosition();
+ }
+
if (IsArrayLengthOf(count, source)) {
optimizations.SetCountIsSourceLength();
}
@@ -2985,6 +3025,12 @@ bool InstructionSimplifierVisitor::CanUseKnownBootImageVarHandle(HInvoke* invoke
void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
switch (instruction->GetIntrinsic()) {
+#define SIMPLIFY_BOX_UNBOX(name, low, high, type, start_index) \
+ case Intrinsics::k ## name ## ValueOf: \
+ SimplifyBoxUnbox(instruction, WellKnownClasses::java_lang_##name##_value, type); \
+ break;
+ BOXED_TYPES(SIMPLIFY_BOX_UNBOX)
+#undef SIMPLIFY_BOX_UNBOX
case Intrinsics::kStringEquals:
SimplifyStringEquals(instruction);
break;
@@ -3063,43 +3109,6 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
case Intrinsics::kVarHandleWeakCompareAndSetRelease:
SimplifyVarHandleIntrinsic(instruction);
break;
- case Intrinsics::kIntegerRotateRight:
- case Intrinsics::kLongRotateRight:
- case Intrinsics::kIntegerRotateLeft:
- case Intrinsics::kLongRotateLeft:
- case Intrinsics::kIntegerCompare:
- case Intrinsics::kLongCompare:
- case Intrinsics::kIntegerSignum:
- case Intrinsics::kLongSignum:
- case Intrinsics::kFloatIsNaN:
- case Intrinsics::kDoubleIsNaN:
- case Intrinsics::kStringIsEmpty:
- case Intrinsics::kUnsafeLoadFence:
- case Intrinsics::kUnsafeStoreFence:
- case Intrinsics::kUnsafeFullFence:
- case Intrinsics::kJdkUnsafeLoadFence:
- case Intrinsics::kJdkUnsafeStoreFence:
- case Intrinsics::kJdkUnsafeFullFence:
- case Intrinsics::kVarHandleFullFence:
- case Intrinsics::kVarHandleAcquireFence:
- case Intrinsics::kVarHandleReleaseFence:
- case Intrinsics::kVarHandleLoadLoadFence:
- case Intrinsics::kVarHandleStoreStoreFence:
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- case Intrinsics::kMathMinFloatFloat:
- case Intrinsics::kMathMinDoubleDouble:
- case Intrinsics::kMathMaxIntInt:
- case Intrinsics::kMathMaxLongLong:
- case Intrinsics::kMathMaxFloatFloat:
- case Intrinsics::kMathMaxDoubleDouble:
- case Intrinsics::kMathAbsInt:
- case Intrinsics::kMathAbsLong:
- case Intrinsics::kMathAbsFloat:
- case Intrinsics::kMathAbsDouble:
- // These are replaced by intermediate representation in the instruction builder.
- LOG(FATAL) << "Unexpected " << static_cast<Intrinsics>(instruction->GetIntrinsic());
- UNREACHABLE();
default:
break;
}
@@ -3215,7 +3224,7 @@ bool InstructionSimplifierVisitor::TrySubtractionChainSimplification(
HInstruction* left = instruction->GetLeft();
HInstruction* right = instruction->GetRight();
// Variable names as described above.
- HConstant* const2 = right->IsConstant() ? right->AsConstant() : left->AsConstant();
+ HConstant* const2 = right->IsConstant() ? right->AsConstant() : left->AsConstantOrNull();
if (const2 == nullptr) {
return false;
}
@@ -3231,7 +3240,7 @@ bool InstructionSimplifierVisitor::TrySubtractionChainSimplification(
}
left = y->GetLeft();
- HConstant* const1 = left->IsConstant() ? left->AsConstant() : y->GetRight()->AsConstant();
+ HConstant* const1 = left->IsConstant() ? left->AsConstant() : y->GetRight()->AsConstantOrNull();
if (const1 == nullptr) {
return false;
}
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index 05a518d544..be4371f734 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -33,8 +33,9 @@ namespace arm {
class InstructionSimplifierArmVisitor final : public HGraphVisitor {
public:
- InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats)
- : HGraphVisitor(graph), stats_(stats) {}
+ InstructionSimplifierArmVisitor(
+ HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+ : HGraphVisitor(graph), codegen_(codegen), stats_(stats) {}
private:
void RecordSimplification() {
@@ -78,6 +79,7 @@ class InstructionSimplifierArmVisitor final : public HGraphVisitor {
void VisitTypeConversion(HTypeConversion* instruction) override;
void VisitUShr(HUShr* instruction) override;
+ CodeGenerator* codegen_;
OptimizingCompilerStats* stats_;
};
@@ -217,7 +219,8 @@ void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) {
return;
}
- if (TryExtractArrayAccessAddress(instruction,
+ if (TryExtractArrayAccessAddress(codegen_,
+ instruction,
instruction->GetArray(),
instruction->GetIndex(),
data_offset)) {
@@ -238,7 +241,8 @@ void InstructionSimplifierArmVisitor::VisitArraySet(HArraySet* instruction) {
return;
}
- if (TryExtractArrayAccessAddress(instruction,
+ if (TryExtractArrayAccessAddress(codegen_,
+ instruction,
instruction->GetArray(),
instruction->GetIndex(),
data_offset)) {
@@ -300,7 +304,7 @@ void InstructionSimplifierArmVisitor::VisitUShr(HUShr* instruction) {
}
bool InstructionSimplifierArm::Run() {
- InstructionSimplifierArmVisitor visitor(graph_, stats_);
+ InstructionSimplifierArmVisitor visitor(graph_, codegen_, stats_);
visitor.VisitReversePostOrder();
return true;
}
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index 0517e4f49e..25cea7c829 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -22,16 +22,23 @@
#include "optimization.h"
namespace art HIDDEN {
+
+class CodeGenerator;
+
namespace arm {
class InstructionSimplifierArm : public HOptimization {
public:
- InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats)
- : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {}
+ InstructionSimplifierArm(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+ : HOptimization(graph, kInstructionSimplifierArmPassName, stats),
+ codegen_(codegen) {}
static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm";
bool Run() override;
+
+ private:
+ CodeGenerator* codegen_;
};
} // namespace arm
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 671900bd9d..2c191dc3f4 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -33,8 +33,9 @@ using helpers::ShifterOperandSupportsExtension;
class InstructionSimplifierArm64Visitor final : public HGraphVisitor {
public:
- InstructionSimplifierArm64Visitor(HGraph* graph, OptimizingCompilerStats* stats)
- : HGraphVisitor(graph), stats_(stats) {}
+ InstructionSimplifierArm64Visitor(
+ HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+ : HGraphVisitor(graph), codegen_(codegen), stats_(stats) {}
private:
void RecordSimplification() {
@@ -84,6 +85,7 @@ class InstructionSimplifierArm64Visitor final : public HGraphVisitor {
void VisitVecLoad(HVecLoad* instruction) override;
void VisitVecStore(HVecStore* instruction) override;
+ CodeGenerator* codegen_;
OptimizingCompilerStats* stats_;
};
@@ -198,7 +200,8 @@ void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) {
void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
- if (TryExtractArrayAccessAddress(instruction,
+ if (TryExtractArrayAccessAddress(codegen_,
+ instruction,
instruction->GetArray(),
instruction->GetIndex(),
data_offset)) {
@@ -209,7 +212,8 @@ void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) {
size_t access_size = DataType::Size(instruction->GetComponentType());
size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value();
- if (TryExtractArrayAccessAddress(instruction,
+ if (TryExtractArrayAccessAddress(codegen_,
+ instruction,
instruction->GetArray(),
instruction->GetIndex(),
data_offset)) {
@@ -284,7 +288,7 @@ void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) {
size_t size = DataType::Size(instruction->GetPackedType());
size_t offset = mirror::Array::DataOffset(size).Uint32Value();
if (TryExtractArrayAccessAddress(
- instruction, instruction->GetArray(), instruction->GetIndex(), offset)) {
+ codegen_, instruction, instruction->GetArray(), instruction->GetIndex(), offset)) {
RecordSimplification();
}
}
@@ -298,14 +302,14 @@ void InstructionSimplifierArm64Visitor::VisitVecStore(HVecStore* instruction) {
size_t size = DataType::Size(instruction->GetPackedType());
size_t offset = mirror::Array::DataOffset(size).Uint32Value();
if (TryExtractArrayAccessAddress(
- instruction, instruction->GetArray(), instruction->GetIndex(), offset)) {
+ codegen_, instruction, instruction->GetArray(), instruction->GetIndex(), offset)) {
RecordSimplification();
}
}
}
bool InstructionSimplifierArm64::Run() {
- InstructionSimplifierArm64Visitor visitor(graph_, stats_);
+ InstructionSimplifierArm64Visitor visitor(graph_, codegen_, stats_);
visitor.VisitReversePostOrder();
return true;
}
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 374638ab9e..5c57484b24 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -22,16 +22,23 @@
#include "optimization.h"
namespace art HIDDEN {
+
+class CodeGenerator;
+
namespace arm64 {
class InstructionSimplifierArm64 : public HOptimization {
public:
- InstructionSimplifierArm64(HGraph* graph, OptimizingCompilerStats* stats)
- : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {}
+ InstructionSimplifierArm64(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+ : HOptimization(graph, kInstructionSimplifierArm64PassName, stats),
+ codegen_(codegen) {}
static constexpr const char* kInstructionSimplifierArm64PassName = "instruction_simplifier_arm64";
bool Run() override;
+
+ private:
+ CodeGenerator* codegen_;
};
} // namespace arm64
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index 34daae21ee..50ea2b929b 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -16,6 +16,7 @@
#include "instruction_simplifier_shared.h"
+#include "code_generator.h"
#include "mirror/array-inl.h"
namespace art HIDDEN {
@@ -229,7 +230,8 @@ bool TryMergeNegatedInput(HBinaryOperation* op) {
}
-bool TryExtractArrayAccessAddress(HInstruction* access,
+bool TryExtractArrayAccessAddress(CodeGenerator* codegen,
+ HInstruction* access,
HInstruction* array,
HInstruction* index,
size_t data_offset) {
@@ -244,8 +246,7 @@ bool TryExtractArrayAccessAddress(HInstruction* access,
// The access may require a runtime call or the original array pointer.
return false;
}
- if (gUseReadBarrier &&
- !kUseBakerReadBarrier &&
+ if (codegen->EmitNonBakerReadBarrier() &&
access->IsArrayGet() &&
access->GetType() == DataType::Type::kReference) {
// For object arrays, the non-Baker read barrier instrumentation requires
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index ddc3a867b8..68148cff7e 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -22,6 +22,8 @@
namespace art HIDDEN {
+class CodeGenerator;
+
namespace helpers {
inline bool CanFitInShifterOperand(HInstruction* instruction) {
@@ -54,7 +56,7 @@ inline bool HasShifterOperand(HInstruction* instr, InstructionSet isa) {
// t3 = Sub(*, t2)
inline bool IsSubRightSubLeftShl(HSub *sub) {
HInstruction* right = sub->GetRight();
- return right->IsSub() && right->AsSub()->GetLeft()->IsShl();;
+ return right->IsSub() && right->AsSub()->GetLeft()->IsShl();
}
} // namespace helpers
@@ -64,7 +66,8 @@ bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa);
// a negated bitwise instruction.
bool TryMergeNegatedInput(HBinaryOperation* op);
-bool TryExtractArrayAccessAddress(HInstruction* access,
+bool TryExtractArrayAccessAddress(CodeGenerator* codegen,
+ HInstruction* access,
HInstruction* array,
HInstruction* index,
size_t data_offset);
diff --git a/compiler/optimizing/instruction_simplifier_test.cc b/compiler/optimizing/instruction_simplifier_test.cc
index 966f5b91cf..9f47995cf5 100644
--- a/compiler/optimizing/instruction_simplifier_test.cc
+++ b/compiler/optimizing/instruction_simplifier_test.cc
@@ -134,260 +134,6 @@ class InstanceOfInstructionSimplifierTestGroup
};
// // ENTRY
-// switch (param) {
-// case 1:
-// obj1 = param2; break;
-// case 2:
-// obj1 = param3; break;
-// default:
-// obj2 = new Obj();
-// }
-// val_phi = PHI[3,4,10]
-// target_phi = PHI[param2, param3, obj2]
-// return PredFieldGet[val_phi, target_phi] => PredFieldGet[val_phi, target_phi]
-TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetNoMerge) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "case1"},
- {"entry", "case2"},
- {"entry", "case3"},
- {"case1", "breturn"},
- {"case2", "breturn"},
- {"case3", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(case1);
- GET_BLOCK(case2);
- GET_BLOCK(case3);
- GET_BLOCK(breturn);
-#undef GET_BLOCK
-
- HInstruction* bool_value = MakeParam(DataType::Type::kInt32);
- HInstruction* obj1_param = MakeParam(DataType::Type::kReference);
- HInstruction* obj2_param = MakeParam(DataType::Type::kReference);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c4 = graph_->GetIntConstant(4);
- HInstruction* c10 = graph_->GetIntConstant(10);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(switch_inst);
- ManuallyBuildEnvFor(cls, {});
-
- HInstruction* goto_c1 = new (GetAllocator()) HGoto();
- case1->AddInstruction(goto_c1);
-
- HInstruction* goto_c2 = new (GetAllocator()) HGoto();
- case2->AddInstruction(goto_c2);
-
- HInstruction* obj3 = MakeNewInstance(cls);
- HInstruction* goto_c3 = new (GetAllocator()) HGoto();
- case3->AddInstruction(obj3);
- case3->AddInstruction(goto_c3);
-
- HPhi* val_phi = MakePhi({c3, c4, c10});
- HPhi* obj_phi = MakePhi({obj1_param, obj2_param, obj3});
- HPredicatedInstanceFieldGet* read_end =
- new (GetAllocator()) HPredicatedInstanceFieldGet(obj_phi,
- nullptr,
- val_phi,
- val_phi->GetType(),
- MemberOffset(10),
- false,
- 42,
- 0,
- graph_->GetDexFile(),
- 0);
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_end);
- breturn->AddPhi(val_phi);
- breturn->AddPhi(obj_phi);
- breturn->AddInstruction(read_end);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformSimplification(blks);
-
- EXPECT_INS_RETAINED(read_end);
-
- EXPECT_INS_EQ(read_end->GetTarget(), obj_phi);
- EXPECT_INS_EQ(read_end->GetDefaultValue(), val_phi);
-}
-
-// // ENTRY
-// switch (param) {
-// case 1:
-// obj1 = param2; break;
-// case 2:
-// obj1 = param3; break;
-// default:
-// obj2 = new Obj();
-// }
-// val_phi = PHI[3,3,10]
-// target_phi = PHI[param2, param3, obj2]
-// return PredFieldGet[val_phi, target_phi] => PredFieldGet[3, target_phi]
-TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetMerge) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "case1"},
- {"entry", "case2"},
- {"entry", "case3"},
- {"case1", "breturn"},
- {"case2", "breturn"},
- {"case3", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(case1);
- GET_BLOCK(case2);
- GET_BLOCK(case3);
- GET_BLOCK(breturn);
-#undef GET_BLOCK
-
- HInstruction* bool_value = MakeParam(DataType::Type::kInt32);
- HInstruction* obj1_param = MakeParam(DataType::Type::kReference);
- HInstruction* obj2_param = MakeParam(DataType::Type::kReference);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c10 = graph_->GetIntConstant(10);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(switch_inst);
- ManuallyBuildEnvFor(cls, {});
-
- HInstruction* goto_c1 = new (GetAllocator()) HGoto();
- case1->AddInstruction(goto_c1);
-
- HInstruction* goto_c2 = new (GetAllocator()) HGoto();
- case2->AddInstruction(goto_c2);
-
- HInstruction* obj3 = MakeNewInstance(cls);
- HInstruction* goto_c3 = new (GetAllocator()) HGoto();
- case3->AddInstruction(obj3);
- case3->AddInstruction(goto_c3);
-
- HPhi* val_phi = MakePhi({c3, c3, c10});
- HPhi* obj_phi = MakePhi({obj1_param, obj2_param, obj3});
- HPredicatedInstanceFieldGet* read_end =
- new (GetAllocator()) HPredicatedInstanceFieldGet(obj_phi,
- nullptr,
- val_phi,
- val_phi->GetType(),
- MemberOffset(10),
- false,
- 42,
- 0,
- graph_->GetDexFile(),
- 0);
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_end);
- breturn->AddPhi(val_phi);
- breturn->AddPhi(obj_phi);
- breturn->AddInstruction(read_end);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformSimplification(blks);
-
- EXPECT_FALSE(obj3->CanBeNull());
- EXPECT_INS_RETAINED(read_end);
-
- EXPECT_INS_EQ(read_end->GetTarget(), obj_phi);
- EXPECT_INS_EQ(read_end->GetDefaultValue(), c3);
-}
-
-// // ENTRY
-// if (param) {
-// obj1 = new Obj();
-// } else {
-// obj2 = new Obj();
-// }
-// val_phi = PHI[3,10]
-// target_phi = PHI[obj1, obj2]
-// return PredFieldGet[val_phi, target_phi] => FieldGet[target_phi]
-TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetNoNull) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(left);
- GET_BLOCK(right);
- GET_BLOCK(breturn);
-#undef GET_BLOCK
-
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c10 = graph_->GetIntConstant(10);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
-
- HInstruction* obj1 = MakeNewInstance(cls);
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(obj1);
- left->AddInstruction(goto_left);
-
- HInstruction* obj2 = MakeNewInstance(cls);
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(obj2);
- right->AddInstruction(goto_right);
-
- HPhi* val_phi = MakePhi({c3, c10});
- HPhi* obj_phi = MakePhi({obj1, obj2});
- obj_phi->SetCanBeNull(false);
- HInstruction* read_end = new (GetAllocator()) HPredicatedInstanceFieldGet(obj_phi,
- nullptr,
- val_phi,
- val_phi->GetType(),
- MemberOffset(10),
- false,
- 42,
- 0,
- graph_->GetDexFile(),
- 0);
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_end);
- breturn->AddPhi(val_phi);
- breturn->AddPhi(obj_phi);
- breturn->AddInstruction(read_end);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformSimplification(blks);
-
- EXPECT_FALSE(obj1->CanBeNull());
- EXPECT_FALSE(obj2->CanBeNull());
- EXPECT_INS_REMOVED(read_end);
-
- HInstanceFieldGet* ifget = FindSingleInstruction<HInstanceFieldGet>(graph_, breturn);
- ASSERT_NE(ifget, nullptr);
- EXPECT_INS_EQ(ifget->InputAt(0), obj_phi);
-}
-
-// // ENTRY
// obj = new Obj();
// // Make sure this graph isn't broken
// if (obj instanceof <other>) {
diff --git a/compiler/optimizing/intrinsic_objects.cc b/compiler/optimizing/intrinsic_objects.cc
index 7e542117a9..cf49f50d91 100644
--- a/compiler/optimizing/intrinsic_objects.cc
+++ b/compiler/optimizing/intrinsic_objects.cc
@@ -20,28 +20,54 @@
#include "base/casts.h"
#include "base/logging.h"
#include "image.h"
+#include "intrinsics.h"
#include "obj_ptr-inl.h"
+#include "well_known_classes.h"
namespace art HIDDEN {
static constexpr size_t kIntrinsicObjectsOffset =
enum_cast<size_t>(ImageHeader::kIntrinsicObjectsStart);
-ObjPtr<mirror::ObjectArray<mirror::Object>> IntrinsicObjects::LookupIntegerCache(
- Thread* self, ClassLinker* class_linker) {
- ObjPtr<mirror::Class> integer_cache_class = class_linker->LookupClass(
- self, "Ljava/lang/Integer$IntegerCache;", /* class_loader= */ nullptr);
- if (integer_cache_class == nullptr || !integer_cache_class->IsInitialized()) {
- return nullptr;
- }
- ArtField* cache_field =
- integer_cache_class->FindDeclaredStaticField("cache", "[Ljava/lang/Integer;");
- CHECK(cache_field != nullptr);
- ObjPtr<mirror::ObjectArray<mirror::Object>> integer_cache =
+template <typename T>
+static int32_t FillIntrinsicsObjects(
+ ArtField* cache_field,
+ ObjPtr<mirror::ObjectArray<mirror::Object>> live_objects,
+ int32_t expected_low,
+ int32_t expected_high,
+ T type_check,
+ int32_t index)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ ObjPtr<mirror::ObjectArray<mirror::Object>> cache =
ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast(
- cache_field->GetObject(integer_cache_class));
- CHECK(integer_cache != nullptr);
- return integer_cache;
+ cache_field->GetObject(cache_field->GetDeclaringClass()));
+ int32_t length = expected_high - expected_low + 1;
+ DCHECK_EQ(length, cache->GetLength());
+ for (int32_t i = 0; i != length; ++i) {
+ ObjPtr<mirror::Object> value = cache->GetWithoutChecks(i);
+ live_objects->Set(index + i, value);
+ type_check(value, expected_low + i);
+ }
+ return index + length;
+}
+
+void IntrinsicObjects::FillIntrinsicObjects(
+ ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, size_t start_index) {
+ DCHECK_EQ(start_index, ImageHeader::kIntrinsicObjectsStart);
+ int32_t index = dchecked_integral_cast<int32_t>(start_index);
+#define FILL_OBJECTS(name, low, high, type, offset) \
+ index = FillIntrinsicsObjects( \
+ WellKnownClasses::java_lang_ ##name ##_ ##name ##Cache_cache, \
+ boot_image_live_objects, \
+ low, \
+ high, \
+ [](ObjPtr<mirror::Object> obj, int32_t expected) REQUIRES_SHARED(Locks::mutator_lock_) { \
+ CHECK_EQ(expected, WellKnownClasses::java_lang_ ##name ##_value->Get ##name(obj)); \
+ }, \
+ index);
+ BOXED_TYPES(FILL_OBJECTS)
+#undef FILL_OBJECTS
+ DCHECK_EQ(dchecked_integral_cast<size_t>(index), start_index + GetNumberOfIntrinsicObjects());
}
static bool HasIntrinsicObjects(
@@ -53,43 +79,26 @@ static bool HasIntrinsicObjects(
return length != kIntrinsicObjectsOffset;
}
-ObjPtr<mirror::ObjectArray<mirror::Object>> IntrinsicObjects::GetIntegerValueOfCache(
- ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) {
- if (!HasIntrinsicObjects(boot_image_live_objects)) {
- return nullptr; // No intrinsic objects.
- }
- // No need for read barrier for boot image object or for verifying the value that was just stored.
- ObjPtr<mirror::Object> result =
- boot_image_live_objects->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>(
- kIntrinsicObjectsOffset);
- DCHECK(result != nullptr);
- DCHECK(result->IsObjectArray());
- DCHECK(result->GetClass()->DescriptorEquals("[Ljava/lang/Integer;"));
- return ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast(result);
-}
-
-ObjPtr<mirror::Object> IntrinsicObjects::GetIntegerValueOfObject(
+ObjPtr<mirror::Object> IntrinsicObjects::GetValueOfObject(
ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects,
+ size_t start_index,
uint32_t index) {
DCHECK(HasIntrinsicObjects(boot_image_live_objects));
- DCHECK_LT(index,
- static_cast<uint32_t>(GetIntegerValueOfCache(boot_image_live_objects)->GetLength()));
-
// No need for read barrier for boot image object or for verifying the value that was just stored.
ObjPtr<mirror::Object> result =
boot_image_live_objects->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>(
- kIntrinsicObjectsOffset + /* skip the IntegerCache.cache */ 1u + index);
+ kIntrinsicObjectsOffset + start_index + index);
DCHECK(result != nullptr);
- DCHECK(result->GetClass()->DescriptorEquals("Ljava/lang/Integer;"));
return result;
}
-MemberOffset IntrinsicObjects::GetIntegerValueOfArrayDataOffset(
- ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) {
+MemberOffset IntrinsicObjects::GetValueOfArrayDataOffset(
+ ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects,
+ size_t start_index) {
DCHECK(HasIntrinsicObjects(boot_image_live_objects));
MemberOffset result =
- mirror::ObjectArray<mirror::Object>::OffsetOfElement(kIntrinsicObjectsOffset + 1u);
- DCHECK_EQ(GetIntegerValueOfObject(boot_image_live_objects, 0u),
+ mirror::ObjectArray<mirror::Object>::OffsetOfElement(kIntrinsicObjectsOffset + start_index);
+ DCHECK_EQ(GetValueOfObject(boot_image_live_objects, start_index, 0u),
(boot_image_live_objects
->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(result)));
return result;
diff --git a/compiler/optimizing/intrinsic_objects.h b/compiler/optimizing/intrinsic_objects.h
index d750f2934b..52a6b81f0e 100644
--- a/compiler/optimizing/intrinsic_objects.h
+++ b/compiler/optimizing/intrinsic_objects.h
@@ -21,11 +21,12 @@
#include "base/bit_utils.h"
#include "base/macros.h"
#include "base/mutex.h"
+#include "obj_ptr.h"
+#include "offsets.h"
namespace art HIDDEN {
class ClassLinker;
-template <class MirrorType> class ObjPtr;
class MemberOffset;
class Thread;
@@ -34,17 +35,30 @@ class Object;
template <class T> class ObjectArray;
} // namespace mirror
+#define BOXED_TYPES(V) \
+ V(Byte, -128, 127, DataType::Type::kInt8, 0) \
+ V(Short, -128, 127, DataType::Type::kInt16, kByteCacheLastIndex) \
+ V(Character, 0, 127, DataType::Type::kUint16, kShortCacheLastIndex) \
+ V(Integer, -128, 127, DataType::Type::kInt32, kCharacterCacheLastIndex)
+
+#define DEFINE_BOXED_CONSTANTS(name, low, high, unused, start_index) \
+ static constexpr size_t k ##name ##CacheLastIndex = start_index + (high - low + 1); \
+ static constexpr size_t k ##name ##CacheFirstIndex = start_index;
+ BOXED_TYPES(DEFINE_BOXED_CONSTANTS)
+
+ static constexpr size_t kNumberOfBoxedCaches = kIntegerCacheLastIndex;
+#undef DEFINE_BOXED_CONSTANTS
+
class IntrinsicObjects {
public:
enum class PatchType {
- kIntegerValueOfObject,
- kIntegerValueOfArray,
+ kValueOfObject,
+ kValueOfArray,
- kLast = kIntegerValueOfArray
+ kLast = kValueOfArray
};
static uint32_t EncodePatch(PatchType patch_type, uint32_t index = 0u) {
- DCHECK(patch_type == PatchType::kIntegerValueOfObject || index == 0u);
return PatchTypeField::Encode(static_cast<uint32_t>(patch_type)) | IndexField::Encode(index);
}
@@ -56,18 +70,37 @@ class IntrinsicObjects {
return IndexField::Decode(intrinsic_data);
}
- // Functions for retrieving data for Integer.valueOf().
- EXPORT static ObjPtr<mirror::ObjectArray<mirror::Object>> LookupIntegerCache(
- Thread* self, ClassLinker* class_linker) REQUIRES_SHARED(Locks::mutator_lock_);
- EXPORT static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerValueOfCache(
- ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects)
+ // Helpers returning addresses of objects, suitable for embedding in generated code.
+#define DEFINE_BOXED_ACCESSES(name, unused1, unused2, unused3, start_index) \
+ static ObjPtr<mirror::Object> Get ##name ##ValueOfObject( \
+ ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, \
+ uint32_t index) REQUIRES_SHARED(Locks::mutator_lock_) { \
+ return GetValueOfObject(boot_image_live_objects, k ##name ##CacheFirstIndex, index); \
+ } \
+ static MemberOffset Get ##name ##ValueOfArrayDataOffset( \
+ ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) \
+ REQUIRES_SHARED(Locks::mutator_lock_) { \
+ return GetValueOfArrayDataOffset(boot_image_live_objects, k ##name ##CacheFirstIndex); \
+ }
+ BOXED_TYPES(DEFINE_BOXED_ACCESSES)
+#undef DEFINED_BOXED_ACCESSES
+
+ EXPORT static void FillIntrinsicObjects(
+ ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, size_t start_index)
REQUIRES_SHARED(Locks::mutator_lock_);
- EXPORT static ObjPtr<mirror::Object> GetIntegerValueOfObject(
+
+ static constexpr size_t GetNumberOfIntrinsicObjects() {
+ return kNumberOfBoxedCaches;
+ }
+
+ EXPORT static ObjPtr<mirror::Object> GetValueOfObject(
ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects,
+ size_t start_index,
uint32_t index) REQUIRES_SHARED(Locks::mutator_lock_);
- EXPORT static MemberOffset GetIntegerValueOfArrayDataOffset(
- ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects)
- REQUIRES_SHARED(Locks::mutator_lock_);
+
+ EXPORT static MemberOffset GetValueOfArrayDataOffset(
+ ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects,
+ size_t start_index) REQUIRES_SHARED(Locks::mutator_lock_);
private:
static constexpr size_t kPatchTypeBits =
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 774deec438..8330a973ff 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -27,10 +27,12 @@
#include "gc/space/image_space.h"
#include "image-inl.h"
#include "intrinsic_objects.h"
+#include "intrinsics_list.h"
#include "nodes.h"
#include "obj_ptr-inl.h"
#include "scoped_thread_state_change-inl.h"
#include "thread-current-inl.h"
+#include "well_known_classes-inl.h"
namespace art HIDDEN {
@@ -43,22 +45,12 @@ std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) {
case Intrinsics::k ## Name: \
os << # Name; \
break;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef STATIC_INTRINSICS_LIST
-#undef VIRTUAL_INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
}
return os;
}
-static const char kIntegerCacheDescriptor[] = "Ljava/lang/Integer$IntegerCache;";
-static const char kIntegerDescriptor[] = "Ljava/lang/Integer;";
-static const char kIntegerArrayDescriptor[] = "[Ljava/lang/Integer;";
-static const char kLowFieldName[] = "low";
-static const char kHighFieldName[] = "high";
-static const char kValueFieldName[] = "value";
-
static ObjPtr<mirror::ObjectArray<mirror::Object>> GetBootImageLiveObjects()
REQUIRES_SHARED(Locks::mutator_lock_) {
gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -73,79 +65,6 @@ static ObjPtr<mirror::ObjectArray<mirror::Object>> GetBootImageLiveObjects()
return boot_image_live_objects;
}
-static ObjPtr<mirror::Class> LookupInitializedClass(Thread* self,
- ClassLinker* class_linker,
- const char* descriptor)
- REQUIRES_SHARED(Locks::mutator_lock_) {
- ObjPtr<mirror::Class> klass =
- class_linker->LookupClass(self, descriptor, /* class_loader= */ nullptr);
- DCHECK(klass != nullptr);
- DCHECK(klass->IsInitialized());
- return klass;
-}
-
-static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerCacheArray(
- ObjPtr<mirror::Class> cache_class) REQUIRES_SHARED(Locks::mutator_lock_) {
- ArtField* cache_field = cache_class->FindDeclaredStaticField("cache", kIntegerArrayDescriptor);
- DCHECK(cache_field != nullptr);
- return ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast(cache_field->GetObject(cache_class));
-}
-
-static int32_t GetIntegerCacheField(ObjPtr<mirror::Class> cache_class, const char* field_name)
- REQUIRES_SHARED(Locks::mutator_lock_) {
- ArtField* field = cache_class->FindDeclaredStaticField(field_name, "I");
- DCHECK(field != nullptr);
- return field->GetInt(cache_class);
-}
-
-static bool CheckIntegerCache(Thread* self,
- ClassLinker* class_linker,
- ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects,
- ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_cache)
- REQUIRES_SHARED(Locks::mutator_lock_) {
- DCHECK(boot_image_cache != nullptr);
-
- // Since we have a cache in the boot image, both java.lang.Integer and
- // java.lang.Integer$IntegerCache must be initialized in the boot image.
- ObjPtr<mirror::Class> cache_class =
- LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor);
- ObjPtr<mirror::Class> integer_class =
- LookupInitializedClass(self, class_linker, kIntegerDescriptor);
-
- // Check that the current cache is the same as the `boot_image_cache`.
- ObjPtr<mirror::ObjectArray<mirror::Object>> current_cache = GetIntegerCacheArray(cache_class);
- if (current_cache != boot_image_cache) {
- return false; // Messed up IntegerCache.cache.
- }
-
- // Check that the range matches the boot image cache length.
- int32_t low = GetIntegerCacheField(cache_class, kLowFieldName);
- int32_t high = GetIntegerCacheField(cache_class, kHighFieldName);
- if (boot_image_cache->GetLength() != high - low + 1) {
- return false; // Messed up IntegerCache.low or IntegerCache.high.
- }
-
- // Check that the elements match the boot image intrinsic objects and check their values as well.
- ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I");
- DCHECK(value_field != nullptr);
- for (int32_t i = 0, len = boot_image_cache->GetLength(); i != len; ++i) {
- ObjPtr<mirror::Object> boot_image_object =
- IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, i);
- DCHECK(Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boot_image_object));
- // No need for read barrier for comparison with a boot image object.
- ObjPtr<mirror::Object> current_object =
- boot_image_cache->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>(i);
- if (boot_image_object != current_object) {
- return false; // Messed up IntegerCache.cache[i]
- }
- if (value_field->GetInt(boot_image_object) != low + i) {
- return false; // Messed up IntegerCache.cache[i].value.
- }
- }
-
- return true;
-}
-
static bool CanReferenceBootImageObjects(HInvoke* invoke, const CompilerOptions& compiler_options) {
// Piggyback on the method load kind to determine whether we can use PC-relative addressing
// for AOT. This should cover both the testing config (non-PIC boot image) and codegens that
@@ -161,95 +80,24 @@ static bool CanReferenceBootImageObjects(HInvoke* invoke, const CompilerOptions&
return true;
}
-void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke,
- CodeGenerator* codegen,
- Location return_location,
- Location first_argument_location) {
- // The intrinsic will call if it needs to allocate a j.l.Integer.
+void IntrinsicVisitor::ComputeValueOfLocations(HInvoke* invoke,
+ CodeGenerator* codegen,
+ int32_t low,
+ int32_t length,
+ Location return_location,
+ Location first_argument_location) {
+ // The intrinsic will call if it needs to allocate a boxed object.
LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
const CompilerOptions& compiler_options = codegen->GetCompilerOptions();
if (!CanReferenceBootImageObjects(invoke, compiler_options)) {
return;
}
HInstruction* const input = invoke->InputAt(0);
- if (compiler_options.IsBootImage()) {
- if (!compiler_options.IsImageClass(kIntegerCacheDescriptor) ||
- !compiler_options.IsImageClass(kIntegerDescriptor)) {
- return;
- }
- ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
- Thread* self = Thread::Current();
- ScopedObjectAccess soa(self);
- ObjPtr<mirror::Class> cache_class = class_linker->LookupClass(
- self, kIntegerCacheDescriptor, /* class_loader= */ nullptr);
- DCHECK(cache_class != nullptr);
- if (UNLIKELY(!cache_class->IsInitialized())) {
- LOG(WARNING) << "Image class " << cache_class->PrettyDescriptor() << " is uninitialized.";
- return;
- }
- ObjPtr<mirror::Class> integer_class =
- class_linker->LookupClass(self, kIntegerDescriptor, /* class_loader= */ nullptr);
- DCHECK(integer_class != nullptr);
- if (UNLIKELY(!integer_class->IsInitialized())) {
- LOG(WARNING) << "Image class " << integer_class->PrettyDescriptor() << " is uninitialized.";
- return;
- }
- int32_t low = GetIntegerCacheField(cache_class, kLowFieldName);
- int32_t high = GetIntegerCacheField(cache_class, kHighFieldName);
- if (kIsDebugBuild) {
- ObjPtr<mirror::ObjectArray<mirror::Object>> current_cache = GetIntegerCacheArray(cache_class);
- CHECK(current_cache != nullptr);
- CHECK_EQ(current_cache->GetLength(), high - low + 1);
- ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I");
- CHECK(value_field != nullptr);
- for (int32_t i = 0, len = current_cache->GetLength(); i != len; ++i) {
- ObjPtr<mirror::Object> current_object = current_cache->GetWithoutChecks(i);
- CHECK(current_object != nullptr);
- CHECK_EQ(value_field->GetInt(current_object), low + i);
- }
- }
- if (input->IsIntConstant()) {
- int32_t value = input->AsIntConstant()->GetValue();
- if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) <
- static_cast<uint32_t>(high - low + 1)) {
- // No call, we shall use direct pointer to the Integer object.
- call_kind = LocationSummary::kNoCall;
- }
- }
- } else {
- Runtime* runtime = Runtime::Current();
- Thread* self = Thread::Current();
- ScopedObjectAccess soa(self);
- ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects = GetBootImageLiveObjects();
- ObjPtr<mirror::ObjectArray<mirror::Object>> cache =
- IntrinsicObjects::GetIntegerValueOfCache(boot_image_live_objects);
- if (cache == nullptr) {
- return; // No cache in the boot image.
- }
- if (compiler_options.IsJitCompiler()) {
- if (!CheckIntegerCache(self, runtime->GetClassLinker(), boot_image_live_objects, cache)) {
- return; // The cache was somehow messed up, probably by using reflection.
- }
- } else {
- DCHECK(compiler_options.IsAotCompiler());
- DCHECK(CheckIntegerCache(self, runtime->GetClassLinker(), boot_image_live_objects, cache));
- if (input->IsIntConstant()) {
- int32_t value = input->AsIntConstant()->GetValue();
- // Retrieve the `value` from the lowest cached Integer.
- ObjPtr<mirror::Object> low_integer =
- IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, 0u);
- ObjPtr<mirror::Class> integer_class =
- low_integer->GetClass<kVerifyNone, kWithoutReadBarrier>();
- ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I");
- DCHECK(value_field != nullptr);
- int32_t low = value_field->GetInt(low_integer);
- if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) <
- static_cast<uint32_t>(cache->GetLength())) {
- // No call, we shall use direct pointer to the Integer object. Note that we cannot
- // do this for JIT as the "low" can change through reflection before emitting the code.
- call_kind = LocationSummary::kNoCall;
- }
- }
+ if (input->IsIntConstant()) {
+ int32_t value = input->AsIntConstant()->GetValue();
+ if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) < static_cast<uint32_t>(length)) {
+ // No call, we shall use direct pointer to the boxed object.
+ call_kind = LocationSummary::kNoCall;
}
}
@@ -265,98 +113,58 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke,
}
}
-static int32_t GetIntegerCacheLowFromIntegerCache(Thread* self, ClassLinker* class_linker)
- REQUIRES_SHARED(Locks::mutator_lock_) {
- ObjPtr<mirror::Class> cache_class =
- LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor);
- return GetIntegerCacheField(cache_class, kLowFieldName);
-}
-
-inline IntrinsicVisitor::IntegerValueOfInfo::IntegerValueOfInfo()
+inline IntrinsicVisitor::ValueOfInfo::ValueOfInfo()
: value_offset(0),
low(0),
length(0u),
value_boot_image_reference(kInvalidReference) {}
-IntrinsicVisitor::IntegerValueOfInfo IntrinsicVisitor::ComputeIntegerValueOfInfo(
- HInvoke* invoke, const CompilerOptions& compiler_options) {
- // Note that we could cache all of the data looked up here. but there's no good
- // location for it. We don't want to add it to WellKnownClasses, to avoid creating global
- // jni values. Adding it as state to the compiler singleton seems like wrong
- // separation of concerns.
- // The need for this data should be pretty rare though.
-
- // Note that at this point we can no longer abort the code generation. Therefore,
- // we need to provide data that shall not lead to a crash even if the fields were
- // modified through reflection since ComputeIntegerValueOfLocations() when JITting.
-
- ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
- Thread* self = Thread::Current();
- ScopedObjectAccess soa(self);
-
- IntegerValueOfInfo info;
+IntrinsicVisitor::ValueOfInfo IntrinsicVisitor::ComputeValueOfInfo(
+ HInvoke* invoke,
+ const CompilerOptions& compiler_options,
+ ArtField* value_field,
+ int32_t low,
+ int32_t length,
+ size_t base) {
+ ValueOfInfo info;
+ info.low = low;
+ info.length = length;
+ info.value_offset = value_field->GetOffset().Uint32Value();
if (compiler_options.IsBootImage()) {
- ObjPtr<mirror::Class> integer_class = invoke->GetResolvedMethod()->GetDeclaringClass();
- DCHECK(integer_class->DescriptorEquals(kIntegerDescriptor));
- ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I");
- DCHECK(value_field != nullptr);
- info.value_offset = value_field->GetOffset().Uint32Value();
- ObjPtr<mirror::Class> cache_class =
- LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor);
- info.low = GetIntegerCacheField(cache_class, kLowFieldName);
- int32_t high = GetIntegerCacheField(cache_class, kHighFieldName);
- info.length = dchecked_integral_cast<uint32_t>(high - info.low + 1);
-
if (invoke->InputAt(0)->IsIntConstant()) {
int32_t input_value = invoke->InputAt(0)->AsIntConstant()->GetValue();
uint32_t index = static_cast<uint32_t>(input_value) - static_cast<uint32_t>(info.low);
if (index < static_cast<uint32_t>(info.length)) {
info.value_boot_image_reference = IntrinsicObjects::EncodePatch(
- IntrinsicObjects::PatchType::kIntegerValueOfObject, index);
+ IntrinsicObjects::PatchType::kValueOfObject, index + base);
} else {
// Not in the cache.
- info.value_boot_image_reference = IntegerValueOfInfo::kInvalidReference;
+ info.value_boot_image_reference = ValueOfInfo::kInvalidReference;
}
} else {
info.array_data_boot_image_reference =
- IntrinsicObjects::EncodePatch(IntrinsicObjects::PatchType::kIntegerValueOfArray);
+ IntrinsicObjects::EncodePatch(IntrinsicObjects::PatchType::kValueOfArray, base);
}
} else {
+ ScopedObjectAccess soa(Thread::Current());
ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects = GetBootImageLiveObjects();
- ObjPtr<mirror::Object> low_integer =
- IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, 0u);
- ObjPtr<mirror::Class> integer_class = low_integer->GetClass<kVerifyNone, kWithoutReadBarrier>();
- ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I");
- DCHECK(value_field != nullptr);
- info.value_offset = value_field->GetOffset().Uint32Value();
- if (compiler_options.IsJitCompiler()) {
- // Use the current `IntegerCache.low` for JIT to avoid truly surprising behavior if the
- // code messes up the `value` field in the lowest cached Integer using reflection.
- info.low = GetIntegerCacheLowFromIntegerCache(self, class_linker);
- } else {
- // For app AOT, the `low_integer->value` should be the same as `IntegerCache.low`.
- info.low = value_field->GetInt(low_integer);
- DCHECK_EQ(info.low, GetIntegerCacheLowFromIntegerCache(self, class_linker));
- }
- // Do not look at `IntegerCache.high`, use the immutable length of the cache array instead.
- info.length = dchecked_integral_cast<uint32_t>(
- IntrinsicObjects::GetIntegerValueOfCache(boot_image_live_objects)->GetLength());
if (invoke->InputAt(0)->IsIntConstant()) {
int32_t input_value = invoke->InputAt(0)->AsIntConstant()->GetValue();
uint32_t index = static_cast<uint32_t>(input_value) - static_cast<uint32_t>(info.low);
if (index < static_cast<uint32_t>(info.length)) {
- ObjPtr<mirror::Object> integer =
- IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, index);
- info.value_boot_image_reference = CodeGenerator::GetBootImageOffset(integer);
+ ObjPtr<mirror::Object> object =
+ IntrinsicObjects::GetValueOfObject(boot_image_live_objects, base, index);
+ info.value_boot_image_reference = CodeGenerator::GetBootImageOffset(object);
} else {
// Not in the cache.
- info.value_boot_image_reference = IntegerValueOfInfo::kInvalidReference;
+ info.value_boot_image_reference = ValueOfInfo::kInvalidReference;
}
} else {
info.array_data_boot_image_reference =
CodeGenerator::GetBootImageOffset(boot_image_live_objects) +
- IntrinsicObjects::GetIntegerValueOfArrayDataOffset(boot_image_live_objects).Uint32Value();
+ IntrinsicObjects::GetValueOfArrayDataOffset(
+ boot_image_live_objects, base).Uint32Value();
}
}
@@ -392,8 +200,8 @@ void IntrinsicVisitor::CreateReferenceGetReferentLocations(HInvoke* invoke,
locations->SetOut(Location::RequiresRegister());
}
-void IntrinsicVisitor::CreateReferenceRefersToLocations(HInvoke* invoke) {
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+void IntrinsicVisitor::CreateReferenceRefersToLocations(HInvoke* invoke, CodeGenerator* codegen) {
+ if (codegen->EmitNonBakerReadBarrier()) {
// Unimplemented for non-Baker read barrier.
return;
}
@@ -414,4 +222,54 @@ void IntrinsicVisitor::AssertNonMovableStringClass() {
}
}
+void InsertFpToIntegralIntrinsic(HInvokeStaticOrDirect* invoke, size_t input_index) {
+ DCHECK_EQ(invoke->GetCodePtrLocation(), CodePtrLocation::kCallCriticalNative);
+ DCHECK(!invoke->GetBlock()->GetGraph()->IsDebuggable())
+ << "Unexpected direct @CriticalNative call in a debuggable graph!";
+ DCHECK_LT(input_index, invoke->GetNumberOfArguments());
+ HInstruction* input = invoke->InputAt(input_index);
+ DataType::Type input_type = input->GetType();
+ DCHECK(DataType::IsFloatingPointType(input_type));
+ bool is_double = (input_type == DataType::Type::kFloat64);
+ DataType::Type converted_type = is_double ? DataType::Type::kInt64 : DataType::Type::kInt32;
+ ArtMethod* resolved_method = is_double
+ ? WellKnownClasses::java_lang_Double_doubleToRawLongBits
+ : WellKnownClasses::java_lang_Float_floatToRawIntBits;
+ DCHECK(resolved_method != nullptr);
+ DCHECK(resolved_method->IsIntrinsic());
+ MethodReference target_method(nullptr, 0);
+ {
+ ScopedObjectAccess soa(Thread::Current());
+ target_method =
+ MethodReference(resolved_method->GetDexFile(), resolved_method->GetDexMethodIndex());
+ }
+ // Use arbitrary dispatch info that does not require the method argument.
+ HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
+ MethodLoadKind::kBssEntry,
+ CodePtrLocation::kCallArtMethod,
+ /*method_load_data=*/ 0u
+ };
+ HBasicBlock* block = invoke->GetBlock();
+ ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
+ HInvokeStaticOrDirect* new_input = new (allocator) HInvokeStaticOrDirect(
+ allocator,
+ /*number_of_arguments=*/ 1u,
+ converted_type,
+ invoke->GetDexPc(),
+ /*method_reference=*/ MethodReference(nullptr, dex::kDexNoIndex),
+ resolved_method,
+ dispatch_info,
+ kStatic,
+ target_method,
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ /*enable_intrinsic_opt=*/ true);
+ // The intrinsic has no side effects and does not need the environment.
+ new_input->SetSideEffects(SideEffects::None());
+ IntrinsicOptimizations opt(new_input);
+ opt.SetDoesNotNeedEnvironment();
+ new_input->SetRawInputAt(0u, input);
+ block->InsertInstructionBefore(new_input, invoke);
+ invoke->ReplaceInput(new_input, input_index);
+}
+
} // namespace art
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 893cd04411..d74d5d2a40 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -19,6 +19,7 @@
#include "base/macros.h"
#include "code_generator.h"
+#include "intrinsics_list.h"
#include "nodes.h"
#include "optimization.h"
#include "parallel_move_resolver.h"
@@ -48,9 +49,7 @@ class IntrinsicVisitor : public ValueObject {
case Intrinsics::k ## Name: \
Visit ## Name(invoke); \
return;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
// Do not put a default case. That way the compiler will complain if we missed a case.
@@ -60,11 +59,8 @@ class IntrinsicVisitor : public ValueObject {
// Define visitor methods.
#define OPTIMIZING_INTRINSICS(Name, ...) \
- virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
- }
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ virtual void Visit##Name([[maybe_unused]] HInvoke* invoke) {}
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
static void MoveArguments(HInvoke* invoke,
@@ -99,19 +95,20 @@ class IntrinsicVisitor : public ValueObject {
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
}
- static void ComputeIntegerValueOfLocations(HInvoke* invoke,
- CodeGenerator* codegen,
- Location return_location,
- Location first_argument_location);
+ static void ComputeValueOfLocations(HInvoke* invoke,
+ CodeGenerator* codegen,
+ int32_t low,
+ int32_t length,
+ Location return_location,
+ Location first_argument_location);
- // Temporary data structure for holding Integer.valueOf data for generating code.
- // We only use it if the boot image contains the IntegerCache objects.
- struct IntegerValueOfInfo {
+ // Temporary data structure for holding BoxedType.valueOf data for generating code.
+ struct ValueOfInfo {
static constexpr uint32_t kInvalidReference = static_cast<uint32_t>(-1);
- IntegerValueOfInfo();
+ ValueOfInfo();
- // Offset of the Integer.value field for initializing a newly allocated instance.
+ // Offset of the value field of the boxed object for initializing a newly allocated instance.
uint32_t value_offset;
// The low value in the cache.
int32_t low;
@@ -134,13 +131,18 @@ class IntrinsicVisitor : public ValueObject {
};
};
- static IntegerValueOfInfo ComputeIntegerValueOfInfo(
- HInvoke* invoke, const CompilerOptions& compiler_options);
+ static ValueOfInfo ComputeValueOfInfo(
+ HInvoke* invoke,
+ const CompilerOptions& compiler_options,
+ ArtField* value_field,
+ int32_t low,
+ int32_t length,
+ size_t base);
static MemberOffset GetReferenceDisableIntrinsicOffset();
static MemberOffset GetReferenceSlowPathEnabledOffset();
static void CreateReferenceGetReferentLocations(HInvoke* invoke, CodeGenerator* codegen);
- static void CreateReferenceRefersToLocations(HInvoke* invoke);
+ static void CreateReferenceRefersToLocations(HInvoke* invoke, CodeGenerator* codegen);
protected:
IntrinsicVisitor() {}
@@ -220,6 +222,7 @@ class SystemArrayCopyOptimizations : public IntrinsicOptimizations {
INTRINSIC_OPTIMIZATION(DestinationIsPrimitiveArray, 8);
INTRINSIC_OPTIMIZATION(SourceIsNonPrimitiveArray, 9);
INTRINSIC_OPTIMIZATION(SourceIsPrimitiveArray, 10);
+ INTRINSIC_OPTIMIZATION(SourcePositionIsDestinationPosition, 11);
private:
DISALLOW_COPY_AND_ASSIGN(SystemArrayCopyOptimizations);
@@ -254,11 +257,9 @@ class VarHandleOptimizations : public IntrinsicOptimizations {
// intrinsic to exploit e.g. no side-effects or exceptions, but otherwise not handled
// by this architecture-specific intrinsics code generator. Eventually it is implemented
// as a true method call.
-#define UNIMPLEMENTED_INTRINSIC(Arch, Name) \
-void IntrinsicLocationsBuilder ## Arch::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
-} \
-void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
-}
+#define UNIMPLEMENTED_INTRINSIC(Arch, Name) \
+ void IntrinsicLocationsBuilder##Arch::Visit##Name([[maybe_unused]] HInvoke* invoke) {} \
+ void IntrinsicCodeGenerator##Arch::Visit##Name([[maybe_unused]] HInvoke* invoke) {}
// Defines a list of unreached intrinsics: that is, method calls that are recognized as
// an intrinsic, and then always converted into HIR instructions before they reach any
@@ -334,6 +335,11 @@ bool IsCallFreeIntrinsic(HInvoke* invoke, Codegenerator* codegen) {
return false;
}
+// Insert a `Float.floatToRawIntBits()` or `Double.doubleToRawLongBits()` intrinsic for a
+// given input. These fake calls are needed on arm and riscv64 to satisfy type consistency
+// checks while passing certain FP args in core registers for direct @CriticalNative calls.
+void InsertFpToIntegralIntrinsic(HInvokeStaticOrDirect* invoke, size_t input_index);
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_H_
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index d2dbaa32e3..3183dac348 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -25,6 +25,7 @@
#include "data_type-inl.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "heap_poisoning.h"
+#include "intrinsic_objects.h"
#include "intrinsics.h"
#include "intrinsics_utils.h"
#include "lock_word.h"
@@ -36,6 +37,7 @@
#include "scoped_thread_state_change-inl.h"
#include "thread-current-inl.h"
#include "utils/arm64/assembler_arm64.h"
+#include "well_known_classes.h"
using namespace vixl::aarch64; // NOLINT(build/namespaces)
@@ -91,11 +93,10 @@ class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
public:
ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
: SlowPathCodeARM64(instruction), tmp_(tmp) {
- DCHECK(gUseReadBarrier);
- DCHECK(kUseBakerReadBarrier);
}
void EmitNativeCode(CodeGenerator* codegen_in) override {
+ DCHECK(codegen_in->EmitBakerReadBarrier());
CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
LocationSummary* locations = instruction_->GetLocations();
DCHECK(locations->CanCall());
@@ -700,7 +701,8 @@ static void GenUnsafeGet(HInvoke* invoke,
bool is_volatile,
CodeGeneratorARM64* codegen) {
LocationSummary* locations = invoke->GetLocations();
- DCHECK((type == DataType::Type::kInt32) ||
+ DCHECK((type == DataType::Type::kInt8) ||
+ (type == DataType::Type::kInt32) ||
(type == DataType::Type::kInt64) ||
(type == DataType::Type::kReference));
Location base_loc = locations->InAt(1);
@@ -710,7 +712,7 @@ static void GenUnsafeGet(HInvoke* invoke,
Location trg_loc = locations->Out();
Register trg = RegisterFrom(trg_loc, type);
- if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) {
+ if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
// UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
Register temp = WRegisterFrom(locations->GetTemp(0));
MacroAssembler* masm = codegen->GetVIXLAssembler();
@@ -738,22 +740,10 @@ static void GenUnsafeGet(HInvoke* invoke,
}
}
-static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) {
- switch (intrinsic) {
- case Intrinsics::kUnsafeGetObject:
- case Intrinsics::kUnsafeGetObjectVolatile:
- case Intrinsics::kJdkUnsafeGetObject:
- case Intrinsics::kJdkUnsafeGetObjectVolatile:
- case Intrinsics::kJdkUnsafeGetObjectAcquire:
- return true;
- default:
- break;
- }
- return false;
-}
-
-static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
+static void CreateUnsafeGetLocations(ArenaAllocator* allocator,
+ HInvoke* invoke,
+ CodeGeneratorARM64* codegen) {
+ bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
can_call
@@ -786,38 +776,44 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke)
VisitJdkUnsafeGetLongVolatile(invoke);
}
void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) {
- VisitJdkUnsafeGetObject(invoke);
+ VisitJdkUnsafeGetReference(invoke);
}
void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- VisitJdkUnsafeGetObjectVolatile(invoke);
+ VisitJdkUnsafeGetReferenceVolatile(invoke);
+}
+void IntrinsicLocationsBuilderARM64::VisitUnsafeGetByte(HInvoke* invoke) {
+ VisitJdkUnsafeGetByte(invoke);
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGet(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
+}
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
}
-void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetObject(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
}
-void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
}
-void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
@@ -833,10 +829,13 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
VisitJdkUnsafeGetLongVolatile(invoke);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
- VisitJdkUnsafeGetObject(invoke);
+ VisitJdkUnsafeGetReference(invoke);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- VisitJdkUnsafeGetObjectVolatile(invoke);
+ VisitJdkUnsafeGetReferenceVolatile(invoke);
+}
+void IntrinsicCodeGeneratorARM64::VisitUnsafeGetByte(HInvoke* invoke) {
+ VisitJdkUnsafeGetByte(invoke);
}
void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGet(HInvoke* invoke) {
@@ -857,17 +856,20 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke)
void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
}
-void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetObject(HInvoke* invoke) {
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
}
-void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
}
-void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
}
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
+ GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/ false, codegen_);
+}
-static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) {
+static void CreateUnsafePutLocations(ArenaAllocator* allocator, HInvoke* invoke) {
LocationSummary* locations =
new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
@@ -886,13 +888,13 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
VisitJdkUnsafePutVolatile(invoke);
}
void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) {
- VisitJdkUnsafePutObject(invoke);
+ VisitJdkUnsafePutReference(invoke);
}
void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
VisitJdkUnsafePutObjectOrdered(invoke);
}
void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
- VisitJdkUnsafePutObjectVolatile(invoke);
+ VisitJdkUnsafePutReferenceVolatile(invoke);
}
void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) {
VisitJdkUnsafePutLong(invoke);
@@ -903,42 +905,48 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke)
void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
VisitJdkUnsafePutLongVolatile(invoke);
}
+void IntrinsicLocationsBuilderARM64::VisitUnsafePutByte(HInvoke* invoke) {
+ VisitJdkUnsafePutByte(invoke);
+}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePut(HInvoke* invoke) {
- CreateIntIntIntIntToVoid(allocator_, invoke);
+ CreateUnsafePutLocations(allocator_, invoke);
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
- CreateIntIntIntIntToVoid(allocator_, invoke);
+ CreateUnsafePutLocations(allocator_, invoke);
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
- CreateIntIntIntIntToVoid(allocator_, invoke);
+ CreateUnsafePutLocations(allocator_, invoke);
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
- CreateIntIntIntIntToVoid(allocator_, invoke);
+ CreateUnsafePutLocations(allocator_, invoke);
}
-void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObject(HInvoke* invoke) {
- CreateIntIntIntIntToVoid(allocator_, invoke);
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutReference(HInvoke* invoke) {
+ CreateUnsafePutLocations(allocator_, invoke);
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
- CreateIntIntIntIntToVoid(allocator_, invoke);
+ CreateUnsafePutLocations(allocator_, invoke);
}
-void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
- CreateIntIntIntIntToVoid(allocator_, invoke);
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
+ CreateUnsafePutLocations(allocator_, invoke);
}
-void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
- CreateIntIntIntIntToVoid(allocator_, invoke);
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
+ CreateUnsafePutLocations(allocator_, invoke);
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLong(HInvoke* invoke) {
- CreateIntIntIntIntToVoid(allocator_, invoke);
+ CreateUnsafePutLocations(allocator_, invoke);
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
- CreateIntIntIntIntToVoid(allocator_, invoke);
+ CreateUnsafePutLocations(allocator_, invoke);
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
- CreateIntIntIntIntToVoid(allocator_, invoke);
+ CreateUnsafePutLocations(allocator_, invoke);
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
- CreateIntIntIntIntToVoid(allocator_, invoke);
+ CreateUnsafePutLocations(allocator_, invoke);
+}
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutByte(HInvoke* invoke) {
+ CreateUnsafePutLocations(allocator_, invoke);
}
static void GenUnsafePut(HInvoke* invoke,
@@ -991,13 +999,13 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
VisitJdkUnsafePutVolatile(invoke);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
- VisitJdkUnsafePutObject(invoke);
+ VisitJdkUnsafePutReference(invoke);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
VisitJdkUnsafePutObjectOrdered(invoke);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
- VisitJdkUnsafePutObjectVolatile(invoke);
+ VisitJdkUnsafePutReferenceVolatile(invoke);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
VisitJdkUnsafePutLong(invoke);
@@ -1008,6 +1016,9 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
VisitJdkUnsafePutLongVolatile(invoke);
}
+void IntrinsicCodeGeneratorARM64::VisitUnsafePutByte(HInvoke* invoke) {
+ VisitJdkUnsafePutByte(invoke);
+}
void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePut(HInvoke* invoke) {
GenUnsafePut(invoke,
@@ -1037,7 +1048,7 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
/*is_ordered=*/ false,
codegen_);
}
-void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObject(HInvoke* invoke) {
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutReference(HInvoke* invoke) {
GenUnsafePut(invoke,
DataType::Type::kReference,
/*is_volatile=*/ false,
@@ -1051,14 +1062,14 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke
/*is_ordered=*/ true,
codegen_);
}
-void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
GenUnsafePut(invoke,
DataType::Type::kReference,
/*is_volatile=*/ true,
/*is_ordered=*/ false,
codegen_);
}
-void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
GenUnsafePut(invoke,
DataType::Type::kReference,
/*is_volatile=*/ true,
@@ -1093,9 +1104,18 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongRelease(HInvoke* invoke)
/*is_ordered=*/ false,
codegen_);
}
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutByte(HInvoke* invoke) {
+ GenUnsafePut(invoke,
+ DataType::Type::kInt8,
+ /*is_volatile=*/ false,
+ /*is_ordered=*/ false,
+ codegen_);
+}
-static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- const bool can_call = gUseReadBarrier && IsUnsafeCASObject(invoke);
+static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
+ HInvoke* invoke,
+ CodeGeneratorARM64* codegen) {
+ const bool can_call = codegen->EmitReadBarrier() && IsUnsafeCASReference(invoke);
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
can_call
@@ -1257,7 +1277,7 @@ static void GenerateCompareAndSet(CodeGeneratorARM64* codegen,
// }
//
// Flag Z indicates whether `old_value == expected || old_value == expected2`.
- // (Is `expected2` is not valid, the `old_value == expected2` part is not emitted.)
+ // (If `expected2` is not valid, the `old_value == expected2` part is not emitted.)
vixl::aarch64::Label loop_head;
if (strong) {
@@ -1340,7 +1360,7 @@ class ReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 {
// Mark the `old_value_` from the main path and compare with `expected_`.
if (kUseBakerReadBarrier) {
DCHECK(mark_old_value_slow_path_ == nullptr);
- arm64_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_temp_, old_value_);
+ arm64_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_temp_, old_value_);
} else {
DCHECK(mark_old_value_slow_path_ != nullptr);
__ B(mark_old_value_slow_path_->GetEntryLabel());
@@ -1394,7 +1414,7 @@ class ReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 {
__ Bind(&mark_old_value);
if (kUseBakerReadBarrier) {
DCHECK(update_old_value_slow_path_ == nullptr);
- arm64_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_, old_value_temp_);
+ arm64_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_, old_value_temp_);
} else {
// Note: We could redirect the `failure` above directly to the entry label and bind
// the exit label in the main path, but the main path would need to access the
@@ -1447,7 +1467,7 @@ static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM6
vixl::aarch64::Label* exit_loop = &exit_loop_label;
vixl::aarch64::Label* cmp_failure = &exit_loop_label;
- if (gUseReadBarrier && type == DataType::Type::kReference) {
+ if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
// We need to store the `old_value` in a non-scratch register to make sure
// the read barrier in the slow path does not clobber it.
old_value = WRegisterFrom(locations->GetTemp(0)); // The old value from main path.
@@ -1511,23 +1531,23 @@ void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
// `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
- VisitJdkUnsafeCompareAndSetObject(invoke);
+ VisitJdkUnsafeCompareAndSetReference(invoke);
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
- CreateUnsafeCASLocations(allocator_, invoke);
+ CreateUnsafeCASLocations(allocator_, invoke, codegen_);
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
- CreateUnsafeCASLocations(allocator_, invoke);
+ CreateUnsafeCASLocations(allocator_, invoke, codegen_);
}
-void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
return;
}
- CreateUnsafeCASLocations(allocator_, invoke);
- if (gUseReadBarrier) {
+ CreateUnsafeCASLocations(allocator_, invoke, codegen_);
+ if (codegen_->EmitReadBarrier()) {
// We need two non-scratch temporary registers for read barrier.
LocationSummary* locations = invoke->GetLocations();
if (kUseBakerReadBarrier) {
@@ -1557,7 +1577,7 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
- // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
+ // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
VisitJdkUnsafeCompareAndSetInt(invoke);
}
void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
@@ -1566,7 +1586,7 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
// `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
- VisitJdkUnsafeCompareAndSetObject(invoke);
+ VisitJdkUnsafeCompareAndSetReference(invoke);
}
void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
@@ -1575,9 +1595,9 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke
void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
GenUnsafeCas(invoke, DataType::Type::kInt64, codegen_);
}
-void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
}
@@ -1672,6 +1692,138 @@ static void GenerateGetAndUpdate(CodeGeneratorARM64* codegen,
__ Cbnz(store_result, &loop_label);
}
+static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
+ HInvoke* invoke,
+ CodeGeneratorARM64* codegen) {
+ const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke,
+ can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall,
+ kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(3, Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+static void GenUnsafeGetAndUpdate(HInvoke* invoke,
+ DataType::Type type,
+ CodeGeneratorARM64* codegen,
+ GetAndUpdateOp get_and_update_op) {
+ MacroAssembler* masm = codegen->GetVIXLAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register out = RegisterFrom(locations->Out(), type); // Result.
+ Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
+ Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
+ Register arg = RegisterFrom(locations->InAt(3), type); // New value or addend.
+ Register tmp_ptr = XRegisterFrom(locations->GetTemp(0)); // Pointer to actual memory.
+
+ // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
+ if (type == DataType::Type::kReference) {
+ DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+ // Mark card for object as a new value shall be stored.
+ bool new_value_can_be_null = true; // TODO: Worth finding out this information?
+ codegen->MarkGCCard(base, /*value=*/ arg, new_value_can_be_null);
+ }
+
+ __ Add(tmp_ptr, base.X(), Operand(offset));
+ GenerateGetAndUpdate(codegen,
+ get_and_update_op,
+ type,
+ std::memory_order_seq_cst,
+ tmp_ptr,
+ arg,
+ /*old_value=*/ out);
+
+ if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
+ DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+ if (kUseBakerReadBarrier) {
+ codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out.W(), out.W());
+ } else {
+ codegen->GenerateReadBarrierSlow(
+ invoke,
+ Location::RegisterLocation(out.GetCode()),
+ Location::RegisterLocation(out.GetCode()),
+ Location::RegisterLocation(base.GetCode()),
+ /*offset=*/ 0u,
+ /*index=*/ Location::RegisterLocation(offset.GetCode()));
+ }
+ }
+}
+
+void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddInt(invoke);
+}
+void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddLong(invoke);
+}
+void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetInt(invoke);
+}
+void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetLong(invoke);
+}
+void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetReference(invoke);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddInt(invoke);
+}
+void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddLong(invoke);
+}
+void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetInt(invoke);
+}
+void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetLong(invoke);
+}
+void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetReference(invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
+}
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
+}
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
+}
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
+}
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
+}
+
void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
LocationSummary* locations =
new (allocator_) LocationSummary(invoke,
@@ -2272,7 +2424,7 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invo
locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
}
-static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+static void CreateFPFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
@@ -2582,7 +2734,7 @@ static constexpr int32_t kSystemArrayCopyCharThreshold = 192;
static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
uint32_t at,
HInstruction* input) {
- HIntConstant* const_input = input->AsIntConstant();
+ HIntConstant* const_input = input->AsIntConstantOrNull();
if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
locations->SetInAt(at, Location::RequiresRegister());
} else {
@@ -2593,8 +2745,8 @@ static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
// Check to see if we have known failures that will cause us to have to bail out
// to the runtime, and just generate the runtime call directly.
- HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
- HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant();
+ HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+ HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstantOrNull();
// The positions must be non-negative.
if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
@@ -2605,7 +2757,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
// The length must be >= 0 and not so long that we would (currently) prefer libcore's
// native implementation.
- HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+ HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
if (length != nullptr) {
int32_t len = length->GetValue();
if (len < 0 || len > kSystemArrayCopyCharThreshold) {
@@ -2897,14 +3049,14 @@ static constexpr int32_t kSystemArrayCopyThreshold = 128;
void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
return;
}
// Check to see if we have known failures that will cause us to have to bail out
// to the runtime, and just generate the runtime call directly.
- HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
- HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+ HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+ HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
// The positions must be non-negative.
if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
@@ -2914,7 +3066,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
}
// The length must be >= 0.
- HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+ HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
if (length != nullptr) {
int32_t len = length->GetValue();
if (len < 0 || len >= kSystemArrayCopyThreshold) {
@@ -2949,7 +3101,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// Temporary register IP0, obtained from the VIXL scratch register
// pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
// (because that register is clobbered by ReadBarrierMarkRegX
@@ -2967,7 +3119,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
MacroAssembler* masm = GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -3009,8 +3161,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
__ B(intrinsic_slow_path->GetEntryLabel(), eq);
}
// Checked when building locations.
- DCHECK(!optimizations.GetDestinationIsSource()
- || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
+ DCHECK(!optimizations.GetDestinationIsSource() ||
+ (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
} else {
if (!optimizations.GetDestinationIsSource()) {
__ Cmp(src, dest);
@@ -3074,7 +3226,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
UseScratchRegisterScope temps(masm);
Location temp3_loc; // Used only for Baker read barrier.
Register temp3;
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
temp3_loc = locations->GetTemp(2);
temp3 = WRegisterFrom(temp3_loc);
} else {
@@ -3087,7 +3239,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
@@ -3108,7 +3260,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
/* use_load_acquire= */ false);
__ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp1` has been unpoisoned
- // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // by the previous call to GenerateFieldLoadWithBakerReadBarrier.
// /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
__ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
@@ -3142,7 +3294,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
/* use_load_acquire= */ false);
__ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp2` has been unpoisoned
- // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // by the previous call to GenerateFieldLoadWithBakerReadBarrier.
// /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
__ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
@@ -3248,7 +3400,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
temp1_loc,
@@ -3267,7 +3419,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
/* use_load_acquire= */ false);
__ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp2` has been unpoisoned
- // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // by the previous call to GenerateFieldLoadWithBakerReadBarrier.
} else {
// /* HeapReference<Class> */ temp1 = src->klass_
__ Ldr(temp1, HeapOperand(src.W(), class_offset));
@@ -3298,7 +3450,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
__ Cbz(WRegisterFrom(length), &done);
}
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// TODO: Also convert this intrinsic to the IsGcMarking strategy?
// SystemArrayCopy implementation for Baker read barriers (see
@@ -3465,18 +3617,34 @@ void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
}
-void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
- InvokeRuntimeCallingConvention calling_convention;
- IntrinsicVisitor::ComputeIntegerValueOfLocations(
- invoke,
- codegen_,
- calling_convention.GetReturnLocation(DataType::Type::kReference),
- Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
-}
-
-void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
- IntrinsicVisitor::IntegerValueOfInfo info =
- IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
+#define VISIT_INTRINSIC(name, low, high, type, start_index) \
+ void IntrinsicLocationsBuilderARM64::Visit ##name ##ValueOf(HInvoke* invoke) { \
+ InvokeRuntimeCallingConvention calling_convention; \
+ IntrinsicVisitor::ComputeValueOfLocations( \
+ invoke, \
+ codegen_, \
+ low, \
+ high - low + 1, \
+ calling_convention.GetReturnLocation(DataType::Type::kReference), \
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); \
+ } \
+ void IntrinsicCodeGeneratorARM64::Visit ##name ##ValueOf(HInvoke* invoke) { \
+ IntrinsicVisitor::ValueOfInfo info = \
+ IntrinsicVisitor::ComputeValueOfInfo( \
+ invoke, \
+ codegen_->GetCompilerOptions(), \
+ WellKnownClasses::java_lang_ ##name ##_value, \
+ low, \
+ high - low + 1, \
+ start_index); \
+ HandleValueOf(invoke, info, type); \
+ }
+ BOXED_TYPES(VISIT_INTRINSIC)
+#undef VISIT_INTRINSIC
+
+void IntrinsicCodeGeneratorARM64::HandleValueOf(HInvoke* invoke,
+ const IntrinsicVisitor::ValueOfInfo& info,
+ DataType::Type type) {
LocationSummary* locations = invoke->GetLocations();
MacroAssembler* masm = GetVIXLAssembler();
@@ -3489,20 +3657,20 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
};
- if (invoke->InputAt(0)->IsConstant()) {
+ if (invoke->InputAt(0)->IsIntConstant()) {
int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
if (static_cast<uint32_t>(value - info.low) < info.length) {
- // Just embed the j.l.Integer in the code.
- DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
+ // Just embed the object in the code.
+ DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
} else {
DCHECK(locations->CanCall());
- // Allocate and initialize a new j.l.Integer.
- // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+ // Allocate and initialize a new object.
+ // TODO: If we JIT, we could allocate the object now, and store it in the
// JIT object table.
allocate_instance();
__ Mov(temp.W(), value);
- __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
+ codegen_->Store(type, temp.W(), HeapOperand(out.W(), info.value_offset));
// Class pointer and `value` final field stores require a barrier before publication.
codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
}
@@ -3514,7 +3682,7 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
__ Cmp(out.W(), info.length);
vixl::aarch64::Label allocate, done;
__ B(&allocate, hs);
- // If the value is within the bounds, load the j.l.Integer directly from the array.
+ // If the value is within the bounds, load the object directly from the array.
codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
MemOperand source = HeapOperand(
temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference));
@@ -3522,9 +3690,9 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
__ B(&done);
__ Bind(&allocate);
- // Otherwise allocate and initialize a new j.l.Integer.
+ // Otherwise allocate and initialize a new object.
allocate_instance();
- __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
+ codegen_->Store(type, in.W(), HeapOperand(out.W(), info.value_offset));
// Class pointer and `value` final field stores require a barrier before publication.
codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
__ Bind(&done);
@@ -3534,7 +3702,7 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) {
IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
- if (gUseReadBarrier && kUseBakerReadBarrier && invoke->GetLocations() != nullptr) {
+ if (codegen_->EmitBakerReadBarrier() && invoke->GetLocations() != nullptr) {
invoke->GetLocations()->AddTemp(Location::RequiresRegister());
}
}
@@ -3549,7 +3717,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
codegen_->AddSlowPath(slow_path);
- if (gUseReadBarrier) {
+ if (codegen_->EmitReadBarrier()) {
// Check self->GetWeakRefAccessEnabled().
UseScratchRegisterScope temps(masm);
Register temp = temps.AcquireW();
@@ -3576,7 +3744,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
// Load the value from the field.
uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
out,
WRegisterFrom(obj),
@@ -3594,7 +3762,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARM64::VisitReferenceRefersTo(HInvoke* invoke) {
- IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
+ IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitReferenceRefersTo(HInvoke* invoke) {
@@ -3616,7 +3784,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceRefersTo(HInvoke* invoke) {
__ Cmp(tmp, other);
- if (gUseReadBarrier) {
+ if (codegen_->EmitReadBarrier()) {
DCHECK(kUseBakerReadBarrier);
vixl::aarch64::Label calculate_result;
@@ -3676,7 +3844,7 @@ void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) {
locations->SetInAt(0, Location::Any());
}
-void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
+void IntrinsicCodeGeneratorARM64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) {
if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
@@ -4305,7 +4473,7 @@ static void GenerateMathFma(HInvoke* invoke, CodeGeneratorARM64* codegen) {
}
void IntrinsicLocationsBuilderARM64::VisitMathFmaDouble(HInvoke* invoke) {
- CreateFPFPFPToFPCallLocations(allocator_, invoke);
+ CreateFPFPFPToFPLocations(allocator_, invoke);
}
void IntrinsicCodeGeneratorARM64::VisitMathFmaDouble(HInvoke* invoke) {
@@ -4313,7 +4481,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathFmaDouble(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARM64::VisitMathFmaFloat(HInvoke* invoke) {
- CreateFPFPFPToFPCallLocations(allocator_, invoke);
+ CreateFPFPFPToFPLocations(allocator_, invoke);
}
void IntrinsicCodeGeneratorARM64::VisitMathFmaFloat(HInvoke* invoke) {
@@ -4695,24 +4863,24 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
__ Mov(target.offset, target_field->GetOffset().Uint32Value());
} else {
// For static fields, we need to fill the `target.object` with the declaring class,
- // so we can use `target.object` as temporary for the `ArtMethod*`. For instance fields,
- // we do not need the declaring class, so we can forget the `ArtMethod*` when
- // we load the `target.offset`, so use the `target.offset` to hold the `ArtMethod*`.
- Register method = (expected_coordinates_count == 0) ? target.object : target.offset;
+ // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
+ // we do not need the declaring class, so we can forget the `ArtField*` when
+ // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
+ Register field = (expected_coordinates_count == 0) ? target.object : target.offset;
const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
const MemberOffset offset_offset = ArtField::OffsetOffset();
- // Load the ArtField, the offset and, if needed, declaring class.
- __ Ldr(method.X(), HeapOperand(varhandle, art_field_offset.Int32Value()));
- __ Ldr(target.offset, MemOperand(method.X(), offset_offset.Int32Value()));
+ // Load the ArtField*, the offset and, if needed, declaring class.
+ __ Ldr(field.X(), HeapOperand(varhandle, art_field_offset.Int32Value()));
+ __ Ldr(target.offset, MemOperand(field.X(), offset_offset.Int32Value()));
if (expected_coordinates_count == 0u) {
codegen->GenerateGcRootFieldLoad(invoke,
LocationFrom(target.object),
- method.X(),
+ field.X(),
ArtField::DeclaringClassOffset().Int32Value(),
- /*fixup_label=*/ nullptr,
- gCompilerReadBarrierOption);
+ /*fixup_label=*/nullptr,
+ codegen->GetCompilerReadBarrierOption());
}
}
} else {
@@ -4732,7 +4900,8 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
}
}
-static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
+static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke,
+ CodeGeneratorARM64* codegen) {
size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
DataType::Type return_type = invoke->GetType();
@@ -4766,7 +4935,7 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
}
// Add a temporary for offset.
- if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
+ if (codegen->EmitNonBakerReadBarrier() &&
GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
// To preserve the offset value across the non-Baker read barrier slow path
// for loading the declaring class, use a fixed callee-save register.
@@ -4783,13 +4952,13 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
return locations;
}
-static void CreateVarHandleGetLocations(HInvoke* invoke) {
+static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorARM64* codegen) {
VarHandleOptimizations optimizations(invoke);
if (optimizations.GetDoNotIntrinsify()) {
return;
}
- if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
+ if (codegen->EmitNonBakerReadBarrier() &&
invoke->GetType() == DataType::Type::kReference &&
invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
@@ -4799,7 +4968,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke) {
return;
}
- CreateVarHandleCommonLocations(invoke);
+ CreateVarHandleCommonLocations(invoke, codegen);
}
static void GenerateVarHandleGet(HInvoke* invoke,
@@ -4829,7 +4998,7 @@ static void GenerateVarHandleGet(HInvoke* invoke,
DCHECK(use_load_acquire || order == std::memory_order_relaxed);
// Load the value from the target location.
- if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) {
+ if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
// Piggy-back on the field load path using introspection for the Baker read barrier.
// The `target.offset` is a temporary, use it for field address.
Register tmp_ptr = target.offset.X();
@@ -4882,7 +5051,7 @@ static void GenerateVarHandleGet(HInvoke* invoke,
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGet(HInvoke* invoke) {
- CreateVarHandleGetLocations(invoke);
+ CreateVarHandleGetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGet(HInvoke* invoke) {
@@ -4890,7 +5059,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGet(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetOpaque(HInvoke* invoke) {
- CreateVarHandleGetLocations(invoke);
+ CreateVarHandleGetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetOpaque(HInvoke* invoke) {
@@ -4898,7 +5067,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetOpaque(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAcquire(HInvoke* invoke) {
- CreateVarHandleGetLocations(invoke);
+ CreateVarHandleGetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAcquire(HInvoke* invoke) {
@@ -4906,20 +5075,20 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAcquire(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetVolatile(HInvoke* invoke) {
- CreateVarHandleGetLocations(invoke);
+ CreateVarHandleGetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetVolatile(HInvoke* invoke) {
GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst);
}
-static void CreateVarHandleSetLocations(HInvoke* invoke) {
+static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorARM64* codegen) {
VarHandleOptimizations optimizations(invoke);
if (optimizations.GetDoNotIntrinsify()) {
return;
}
- CreateVarHandleCommonLocations(invoke);
+ CreateVarHandleCommonLocations(invoke, codegen);
}
static void GenerateVarHandleSet(HInvoke* invoke,
@@ -4991,7 +5160,7 @@ static void GenerateVarHandleSet(HInvoke* invoke,
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleSet(HInvoke* invoke) {
- CreateVarHandleSetLocations(invoke);
+ CreateVarHandleSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleSet(HInvoke* invoke) {
@@ -4999,7 +5168,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleSet(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleSetOpaque(HInvoke* invoke) {
- CreateVarHandleSetLocations(invoke);
+ CreateVarHandleSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleSetOpaque(HInvoke* invoke) {
@@ -5007,7 +5176,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleSetOpaque(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleSetRelease(HInvoke* invoke) {
- CreateVarHandleSetLocations(invoke);
+ CreateVarHandleSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleSetRelease(HInvoke* invoke) {
@@ -5015,14 +5184,16 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleSetRelease(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleSetVolatile(HInvoke* invoke) {
- CreateVarHandleSetLocations(invoke);
+ CreateVarHandleSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleSetVolatile(HInvoke* invoke) {
GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst);
}
-static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, bool return_success) {
+static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
+ CodeGeneratorARM64* codegen,
+ bool return_success) {
VarHandleOptimizations optimizations(invoke);
if (optimizations.GetDoNotIntrinsify()) {
return;
@@ -5030,8 +5201,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
uint32_t number_of_arguments = invoke->GetNumberOfArguments();
DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
- if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
- value_type == DataType::Type::kReference) {
+ if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
// Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
// the passed reference and reloads it from the field. This breaks the read barriers
// in slow path in different ways. The marked old value may not actually be a to-space
@@ -5042,9 +5212,9 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
return;
}
- LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
+ LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+ if (codegen->EmitNonBakerReadBarrier()) {
// We need callee-save registers for both the class object and offset instead of
// the temporaries reserved in CreateVarHandleCommonLocations().
static_assert(POPCOUNT(kArm64CalleeSaveRefSpills) >= 2u);
@@ -5085,7 +5255,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
locations->AddTemp(Location::RequiresRegister());
}
}
- if (gUseReadBarrier && value_type == DataType::Type::kReference) {
+ if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
// Add a temporary for the `old_value_temp` in slow path.
locations->AddTemp(Location::RequiresRegister());
}
@@ -5151,7 +5321,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
// except for references that need the offset for the read barrier.
UseScratchRegisterScope temps(masm);
Register tmp_ptr = target.offset.X();
- if (gUseReadBarrier && value_type == DataType::Type::kReference) {
+ if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
tmp_ptr = temps.AcquireX();
}
__ Add(tmp_ptr, target.object.X(), target.offset.X());
@@ -5234,7 +5404,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
vixl::aarch64::Label* exit_loop = &exit_loop_label;
vixl::aarch64::Label* cmp_failure = &exit_loop_label;
- if (gUseReadBarrier && value_type == DataType::Type::kReference) {
+ if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
// The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
// reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register.
size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
@@ -5301,7 +5471,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
@@ -5310,7 +5480,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchange(HInvoke* invo
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
@@ -5319,7 +5489,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeAcquire(HInvok
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
@@ -5328,7 +5498,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeRelease(HInvok
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
@@ -5337,7 +5507,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
@@ -5346,7 +5516,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invok
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
@@ -5355,7 +5525,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
@@ -5364,7 +5534,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke*
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
@@ -5373,21 +5543,21 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke
}
static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
+ CodeGeneratorARM64* codegen,
GetAndUpdateOp get_and_update_op) {
VarHandleOptimizations optimizations(invoke);
if (optimizations.GetDoNotIntrinsify()) {
return;
}
- if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
- invoke->GetType() == DataType::Type::kReference) {
+ if (invoke->GetType() == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
// Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
// the passed reference and reloads it from the field, thus seeing the new value
// that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
return;
}
- LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
+ LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
size_t old_temp_count = locations->GetTempCount();
DCHECK_EQ(old_temp_count, (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u);
@@ -5455,8 +5625,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
// except for references that need the offset for the non-Baker read barrier.
UseScratchRegisterScope temps(masm);
Register tmp_ptr = target.offset.X();
- if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
- value_type == DataType::Type::kReference) {
+ if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
tmp_ptr = temps.AcquireX();
}
__ Add(tmp_ptr, target.object.X(), target.offset.X());
@@ -5485,8 +5654,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
// the new value unless it is zero bit pattern (+0.0f or +0.0) and need another one
// in GenerateGetAndUpdate(). We have allocated a normal temporary to handle that.
old_value = CPURegisterFrom(locations->GetTemp(1u), load_store_type);
- } else if ((gUseReadBarrier && kUseBakerReadBarrier) &&
- value_type == DataType::Type::kReference) {
+ } else if (value_type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
// Load the old value initially to a scratch register.
// We shall move it to `out` later with a read barrier.
old_value = temps.AcquireW();
@@ -5533,9 +5701,9 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
__ Sxtb(out.W(), old_value.W());
} else if (value_type == DataType::Type::kInt16) {
__ Sxth(out.W(), old_value.W());
- } else if (gUseReadBarrier && value_type == DataType::Type::kReference) {
+ } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
if (kUseBakerReadBarrier) {
- codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(out.W(), old_value.W());
+ codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out.W(), old_value.W());
} else {
codegen->GenerateReadBarrierSlow(
invoke,
@@ -5554,7 +5722,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSet(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSet(HInvoke* invoke) {
@@ -5562,7 +5730,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSet(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
@@ -5570,7 +5738,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
@@ -5578,7 +5746,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
@@ -5586,7 +5754,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
@@ -5594,7 +5762,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
@@ -5602,7 +5770,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
@@ -5610,7 +5778,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
@@ -5618,7 +5786,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke*
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
@@ -5626,7 +5794,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke*
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
@@ -5634,7 +5802,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke)
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
@@ -5642,7 +5810,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke*
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
@@ -5650,7 +5818,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke*
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
@@ -5658,7 +5826,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
@@ -5666,7 +5834,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke*
}
void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
}
void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index a0ccf87f7b..50e2e43f5f 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -19,6 +19,7 @@
#include "base/macros.h"
#include "intrinsics.h"
+#include "intrinsics_list.h"
namespace vixl {
namespace aarch64 {
@@ -47,9 +48,7 @@ class IntrinsicLocationsBuilderARM64 final : public IntrinsicVisitor {
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
// Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
@@ -72,9 +71,7 @@ class IntrinsicCodeGeneratorARM64 final : public IntrinsicVisitor {
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
private:
@@ -82,6 +79,10 @@ class IntrinsicCodeGeneratorARM64 final : public IntrinsicVisitor {
ArenaAllocator* GetAllocator();
+ void HandleValueOf(HInvoke* invoke,
+ const IntrinsicVisitor::ValueOfInfo& info,
+ DataType::Type type);
+
CodeGeneratorARM64* const codegen_;
DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorARM64);
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 266b5bc799..5f4de8cda2 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -22,6 +22,7 @@
#include "code_generator_arm_vixl.h"
#include "common_arm.h"
#include "heap_poisoning.h"
+#include "intrinsic_objects.h"
#include "intrinsics.h"
#include "intrinsics_utils.h"
#include "lock_word.h"
@@ -31,6 +32,7 @@
#include "mirror/string-inl.h"
#include "scoped_thread_state_change-inl.h"
#include "thread-current-inl.h"
+#include "well_known_classes.h"
#include "aarch32/constants-aarch32.h"
@@ -120,11 +122,10 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
: SlowPathCodeARMVIXL(instruction) {
- DCHECK(gUseReadBarrier);
- DCHECK(kUseBakerReadBarrier);
}
void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(codegen->EmitBakerReadBarrier());
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
LocationSummary* locations = instruction_->GetLocations();
@@ -1242,7 +1243,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invo
void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
return;
}
@@ -1252,9 +1253,9 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
return;
}
- HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
- HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
- HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+ HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+ HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
+ HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
locations->SetInAt(1, Location::RequiresRegister());
@@ -1265,7 +1266,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
locations->SetInAt(4, Location::RequiresRegister());
}
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// Temporary register IP cannot be used in
// ReadBarrierSystemArrayCopySlowPathARM (because that register
// is clobbered by ReadBarrierMarkRegX entry points). Get an extra
@@ -1339,7 +1340,7 @@ static void CheckPosition(ArmVIXLAssembler* assembler,
void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
ArmVIXLAssembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -1453,7 +1454,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
@@ -1464,7 +1465,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
__ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp1` has been unpoisoned
- // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // by the previous call to GenerateFieldLoadWithBakerReadBarrier.
// /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
__ Ldrh(temp1, MemOperand(temp1, primitive_offset));
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
@@ -1488,7 +1489,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check= */ false);
__ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp2` has been unpoisoned
- // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // by the previous call to GenerateFieldLoadWithBakerReadBarrier.
// /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
__ Ldrh(temp2, MemOperand(temp2, primitive_offset));
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
@@ -1584,7 +1585,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false);
@@ -1593,7 +1594,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
__ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp3` has been unpoisoned
- // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // by the previous call to GenerateFieldLoadWithBakerReadBarrier.
} else {
// /* HeapReference<Class> */ temp1 = src->klass_
__ Ldr(temp1, MemOperand(src, class_offset));
@@ -1621,7 +1622,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
__ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target= */ false);
}
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// TODO: Also convert this intrinsic to the IsGcMarking strategy?
// SystemArrayCopy implementation for Baker read barriers (see
@@ -2433,18 +2434,35 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
__ Vrintm(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
}
-void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
- InvokeRuntimeCallingConventionARMVIXL calling_convention;
- IntrinsicVisitor::ComputeIntegerValueOfLocations(
- invoke,
- codegen_,
- LocationFrom(r0),
- LocationFrom(calling_convention.GetRegisterAt(0)));
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
- IntrinsicVisitor::IntegerValueOfInfo info =
- IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
+#define VISIT_INTRINSIC(name, low, high, type, start_index) \
+ void IntrinsicLocationsBuilderARMVIXL::Visit ##name ##ValueOf(HInvoke* invoke) { \
+ InvokeRuntimeCallingConventionARMVIXL calling_convention; \
+ IntrinsicVisitor::ComputeValueOfLocations( \
+ invoke, \
+ codegen_, \
+ low, \
+ high - low + 1, \
+ LocationFrom(r0), \
+ LocationFrom(calling_convention.GetRegisterAt(0))); \
+ } \
+ void IntrinsicCodeGeneratorARMVIXL::Visit ##name ##ValueOf(HInvoke* invoke) { \
+ IntrinsicVisitor::ValueOfInfo info = \
+ IntrinsicVisitor::ComputeValueOfInfo( \
+ invoke, \
+ codegen_->GetCompilerOptions(), \
+ WellKnownClasses::java_lang_ ##name ##_value, \
+ low, \
+ high - low + 1, \
+ start_index); \
+ HandleValueOf(invoke, info, type); \
+ }
+ BOXED_TYPES(VISIT_INTRINSIC)
+#undef VISIT_INTRINSIC
+
+
+void IntrinsicCodeGeneratorARMVIXL::HandleValueOf(HInvoke* invoke,
+ const IntrinsicVisitor::ValueOfInfo& info,
+ DataType::Type type) {
LocationSummary* locations = invoke->GetLocations();
ArmVIXLAssembler* const assembler = GetAssembler();
@@ -2457,20 +2475,20 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
};
- if (invoke->InputAt(0)->IsConstant()) {
+ if (invoke->InputAt(0)->IsIntConstant()) {
int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
if (static_cast<uint32_t>(value - info.low) < info.length) {
- // Just embed the j.l.Integer in the code.
- DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
+ // Just embed the object in the code.
+ DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
} else {
DCHECK(locations->CanCall());
- // Allocate and initialize a new j.l.Integer.
- // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+ // Allocate and initialize a new object.
+ // TODO: If we JIT, we could allocate the object now, and store it in the
// JIT object table.
allocate_instance();
__ Mov(temp, value);
- assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
+ assembler->StoreToOffset(GetStoreOperandType(type), temp, out, info.value_offset);
// Class pointer and `value` final field stores require a barrier before publication.
codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
}
@@ -2482,15 +2500,15 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
__ Cmp(out, info.length);
vixl32::Label allocate, done;
__ B(hs, &allocate, /* is_far_target= */ false);
- // If the value is within the bounds, load the j.l.Integer directly from the array.
+ // If the value is within the bounds, load the object directly from the array.
codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out);
assembler->MaybeUnpoisonHeapReference(out);
__ B(&done);
__ Bind(&allocate);
- // Otherwise allocate and initialize a new j.l.Integer.
+ // Otherwise allocate and initialize a new object.
allocate_instance();
- assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
+ assembler->StoreToOffset(GetStoreOperandType(type), in, out, info.value_offset);
// Class pointer and `value` final field stores require a barrier before publication.
codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
__ Bind(&done);
@@ -2511,7 +2529,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
codegen_->AddSlowPath(slow_path);
- if (gUseReadBarrier) {
+ if (codegen_->EmitReadBarrier()) {
// Check self->GetWeakRefAccessEnabled().
UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
vixl32::Register temp = temps.Acquire();
@@ -2539,7 +2557,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
// Load the value from the field.
uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
out,
RegisterFrom(obj),
@@ -2560,7 +2578,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
- IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
+ IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
@@ -2587,7 +2605,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
assembler->MaybeUnpoisonHeapReference(tmp);
codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
- if (gUseReadBarrier) {
+ if (codegen_->EmitReadBarrier()) {
DCHECK(kUseBakerReadBarrier);
vixl32::Label calculate_result;
@@ -2613,7 +2631,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
__ Bind(&calculate_result);
} else {
- DCHECK(!gUseReadBarrier);
+ DCHECK(!codegen_->EmitReadBarrier());
__ Sub(out, tmp, other);
}
@@ -2653,7 +2671,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) {
locations->SetInAt(0, Location::Any());
}
-void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
+void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
void IntrinsicLocationsBuilderARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) {
CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
@@ -2732,7 +2750,7 @@ static void GenerateIntrinsicGet(HInvoke* invoke,
}
break;
case DataType::Type::kReference:
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen->EmitBakerReadBarrier()) {
// Piggy-back on the field load path using introspection for the Baker read barrier.
vixl32::Register temp = RegisterFrom(maybe_temp);
__ Add(temp, base, offset);
@@ -2777,32 +2795,18 @@ static void GenerateIntrinsicGet(HInvoke* invoke,
codegen->GenerateMemoryBarrier(
seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
}
- if (type == DataType::Type::kReference && !(gUseReadBarrier && kUseBakerReadBarrier)) {
+ if (type == DataType::Type::kReference && !codegen->EmitBakerReadBarrier()) {
Location base_loc = LocationFrom(base);
Location index_loc = LocationFrom(offset);
codegen->MaybeGenerateReadBarrierSlow(invoke, out, out, base_loc, /* offset=*/ 0u, index_loc);
}
}
-static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) {
- switch (intrinsic) {
- case Intrinsics::kUnsafeGetObject:
- case Intrinsics::kUnsafeGetObjectVolatile:
- case Intrinsics::kJdkUnsafeGetObject:
- case Intrinsics::kJdkUnsafeGetObjectVolatile:
- case Intrinsics::kJdkUnsafeGetObjectAcquire:
- return true;
- default:
- break;
- }
- return false;
-}
-
static void CreateUnsafeGetLocations(HInvoke* invoke,
CodeGeneratorARMVIXL* codegen,
DataType::Type type,
bool atomic) {
- bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
+ bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
@@ -2818,7 +2822,7 @@ static void CreateUnsafeGetLocations(HInvoke* invoke,
locations->SetInAt(2, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(),
(can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
- if ((gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) ||
+ if ((type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) ||
(type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier,
@@ -2837,7 +2841,7 @@ static void GenUnsafeGet(HInvoke* invoke,
vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
Location out = locations->Out();
Location maybe_temp = Location::NoLocation();
- if ((gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) ||
+ if ((type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) ||
(type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
maybe_temp = locations->GetTemp(0);
}
@@ -2887,19 +2891,27 @@ void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke)
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
- VisitJdkUnsafeGetObject(invoke);
+ VisitJdkUnsafeGetReference(invoke);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
- VisitJdkUnsafeGetObject(invoke);
+ VisitJdkUnsafeGetReference(invoke);
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- VisitJdkUnsafeGetObjectVolatile(invoke);
+ VisitJdkUnsafeGetReferenceVolatile(invoke);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- VisitJdkUnsafeGetObjectVolatile(invoke);
+ VisitJdkUnsafeGetReferenceVolatile(invoke);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetByte(HInvoke* invoke) {
+ VisitJdkUnsafeGetByte(invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetByte(HInvoke* invoke) {
+ VisitJdkUnsafeGetByte(invoke);
}
void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGet(HInvoke* invoke) {
@@ -2956,33 +2968,42 @@ void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke
invoke, codegen_, DataType::Type::kInt64, std::memory_order_acquire, /*atomic=*/ true);
}
-void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetObject(HInvoke* invoke) {
+void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReference(HInvoke* invoke) {
CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false);
}
-void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetObject(HInvoke* invoke) {
+void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReference(HInvoke* invoke) {
GenUnsafeGet(
invoke, codegen_, DataType::Type::kReference, std::memory_order_relaxed, /*atomic=*/ false);
}
-void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
+void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
}
-void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
+void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
GenUnsafeGet(
invoke, codegen_, DataType::Type::kReference, std::memory_order_seq_cst, /*atomic=*/ true);
}
-void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
+void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
}
-void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
+void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
GenUnsafeGet(
invoke, codegen_, DataType::Type::kReference, std::memory_order_acquire, /*atomic=*/ true);
}
+void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetByte(HInvoke* invoke) {
+ CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt8, /*atomic=*/ false);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetByte(HInvoke* invoke) {
+ GenUnsafeGet(
+ invoke, codegen_, DataType::Type::kInt8, std::memory_order_relaxed, /*atomic=*/ false);
+}
+
static void GenerateIntrinsicSet(CodeGeneratorARMVIXL* codegen,
DataType::Type type,
std::memory_order order,
@@ -3156,11 +3177,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
VisitJdkUnsafePutVolatile(invoke);
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
- VisitJdkUnsafePutObject(invoke);
+ VisitJdkUnsafePutReference(invoke);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
- VisitJdkUnsafePutObject(invoke);
+ VisitJdkUnsafePutReference(invoke);
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
@@ -3172,11 +3193,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke)
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
- VisitJdkUnsafePutObjectVolatile(invoke);
+ VisitJdkUnsafePutReferenceVolatile(invoke);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
- VisitJdkUnsafePutObjectVolatile(invoke);
+ VisitJdkUnsafePutReferenceVolatile(invoke);
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
@@ -3203,6 +3224,14 @@ void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke)
VisitJdkUnsafePutLongVolatile(invoke);
}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutByte(HInvoke* invoke) {
+ VisitJdkUnsafePutByte(invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutByte(HInvoke* invoke) {
+ VisitJdkUnsafePutByte(invoke);
+}
+
void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePut(HInvoke* invoke) {
CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false);
}
@@ -3215,6 +3244,18 @@ void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePut(HInvoke* invoke) {
codegen_);
}
+void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutByte(HInvoke* invoke) {
+ CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt8, /*atomic=*/ false);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutByte(HInvoke* invoke) {
+ GenUnsafePut(invoke,
+ DataType::Type::kInt8,
+ std::memory_order_relaxed,
+ /*atomic=*/ false,
+ codegen_);
+}
+
void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true);
}
@@ -3251,11 +3292,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutRelease(HInvoke* invoke) {
codegen_);
}
-void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutObject(HInvoke* invoke) {
+void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReference(HInvoke* invoke) {
CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false);
}
-void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObject(HInvoke* invoke) {
+void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReference(HInvoke* invoke) {
GenUnsafePut(invoke,
DataType::Type::kReference,
std::memory_order_relaxed,
@@ -3275,11 +3316,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObjectOrdered(HInvoke* invo
codegen_);
}
-void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
+void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
}
-void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
+void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
GenUnsafePut(invoke,
DataType::Type::kReference,
std::memory_order_seq_cst,
@@ -3287,11 +3328,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObjectVolatile(HInvoke* inv
codegen_);
}
-void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
+void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true);
}
-void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
+void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
GenUnsafePut(invoke,
DataType::Type::kReference,
std::memory_order_release,
@@ -3470,7 +3511,7 @@ static void GenerateCompareAndSet(CodeGeneratorARMVIXL* codegen,
// branch goes to the read barrier slow path that clobbers `success` anyway.
bool init_failure_for_cmp =
success.IsValid() &&
- !(gUseReadBarrier && type == DataType::Type::kReference && expected.IsRegister());
+ !(type == DataType::Type::kReference && codegen->EmitReadBarrier() && expected.IsRegister());
// Instruction scheduling: Loading a constant between LDREX* and using the loaded value
// is essentially free, so prepare the failure value here if we can.
bool init_failure_for_cmp_early =
@@ -3574,7 +3615,7 @@ class ReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
// Mark the `old_value_` from the main path and compare with `expected_`.
if (kUseBakerReadBarrier) {
DCHECK(mark_old_value_slow_path_ == nullptr);
- arm_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_temp_, old_value_);
+ arm_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_temp_, old_value_);
} else {
DCHECK(mark_old_value_slow_path_ != nullptr);
__ B(mark_old_value_slow_path_->GetEntryLabel());
@@ -3627,7 +3668,7 @@ class ReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
__ Bind(&mark_old_value);
if (kUseBakerReadBarrier) {
DCHECK(update_old_value_slow_path_ == nullptr);
- arm_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_, old_value_temp_);
+ arm_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_, old_value_temp_);
} else {
// Note: We could redirect the `failure` above directly to the entry label and bind
// the exit label in the main path, but the main path would need to access the
@@ -3654,8 +3695,9 @@ class ReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
SlowPathCodeARMVIXL* update_old_value_slow_path_;
};
-static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- const bool can_call = gUseReadBarrier && IsUnsafeCASObject(invoke);
+static void CreateUnsafeCASLocations(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
+ const bool can_call = codegen->EmitReadBarrier() && IsUnsafeCASReference(invoke);
+ ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
can_call
@@ -3706,7 +3748,7 @@ static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMV
vixl32::Label* exit_loop = &exit_loop_label;
vixl32::Label* cmp_failure = &exit_loop_label;
- if (gUseReadBarrier && type == DataType::Type::kReference) {
+ if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
// If marking, check if the stored reference is a from-space reference to the same
// object as the to-space reference `expected`. If so, perform a custom CAS loop.
ReadBarrierCasSlowPathARMVIXL* slow_path =
@@ -3762,19 +3804,19 @@ void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCASInt(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCASObject(HInvoke* invoke) {
// `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
- VisitJdkUnsafeCompareAndSetObject(invoke);
+ VisitJdkUnsafeCompareAndSetReference(invoke);
}
void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
- CreateUnsafeCASLocations(allocator_, invoke);
+ CreateUnsafeCASLocations(invoke, codegen_);
}
-void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
+void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers (b/173104084).
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
return;
}
- CreateUnsafeCASLocations(allocator_, invoke);
+ CreateUnsafeCASLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
@@ -3790,15 +3832,15 @@ void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCASInt(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCASObject(HInvoke* invoke) {
// `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
- VisitJdkUnsafeCompareAndSetObject(invoke);
+ VisitJdkUnsafeCompareAndSetReference(invoke);
}
void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
GenUnsafeCas(invoke, DataType::Type::kInt32, codegen_);
}
-void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
+void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers (b/173104084).
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
}
@@ -3944,6 +3986,172 @@ static void GenerateGetAndUpdate(CodeGeneratorARMVIXL* codegen,
__ B(ne, &loop_label);
}
+static void CreateUnsafeGetAndUpdateLocations(HInvoke* invoke,
+ CodeGeneratorARMVIXL* codegen,
+ DataType::Type type,
+ GetAndUpdateOp get_and_update_op) {
+ const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
+ ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke,
+ can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall,
+ kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(3, Location::RequiresRegister());
+
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+
+ size_t num_temps = 1u; // We always need `tmp_ptr`.
+ if (get_and_update_op == GetAndUpdateOp::kAdd) {
+ // Add `maybe_temp` used for the new value in `GenerateGetAndUpdate()`.
+ num_temps += (type == DataType::Type::kInt64) ? 2u : 1u;
+ if (type == DataType::Type::kInt64) {
+ // There are enough available registers but the register allocator can fail to allocate
+ // them correctly because it can block register pairs by single-register inputs and temps.
+ // To work around this limitation, use a fixed register pair for both the output as well
+ // as the offset which is not needed anymore after the address calculation.
+ // (Alternatively, we could set up distinct fixed locations for `offset`, `arg` and `out`.)
+ locations->SetInAt(2, LocationFrom(r0, r1));
+ locations->UpdateOut(LocationFrom(r0, r1));
+ }
+ }
+ locations->AddRegisterTemps(num_temps);
+}
+
+static void GenUnsafeGetAndUpdate(HInvoke* invoke,
+ CodeGeneratorARMVIXL* codegen,
+ DataType::Type type,
+ GetAndUpdateOp get_and_update_op) {
+ ArmVIXLAssembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ Location out = locations->Out(); // Result.
+ vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
+ vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B).
+ Location arg = locations->InAt(3); // New value or addend.
+ vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0)); // Pointer to actual memory.
+ Location maybe_temp = Location::NoLocation();
+ if (get_and_update_op == GetAndUpdateOp::kAdd) {
+ maybe_temp = (type == DataType::Type::kInt64)
+ ? LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2)))
+ : locations->GetTemp(1);
+ }
+
+ UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+
+ if (type == DataType::Type::kReference) {
+ DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+ // Mark card for object as a new value shall be stored.
+ bool new_value_can_be_null = true; // TODO: Worth finding out this information?
+ vixl32::Register card = tmp_ptr; // Use the `tmp_ptr` also as the `card` temporary.
+ codegen->MarkGCCard(temp, card, base, /*value=*/ RegisterFrom(arg), new_value_can_be_null);
+ }
+
+ // Note: UnsafeGetAndUpdate operations are sequentially consistent, requiring
+ // a barrier before and after the raw load/store-exclusive operation.
+
+ __ Add(tmp_ptr, base, Operand(offset));
+ codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ GenerateGetAndUpdate(codegen,
+ get_and_update_op,
+ type,
+ tmp_ptr,
+ arg,
+ /*old_value=*/ out,
+ /*store_result=*/ temp,
+ maybe_temp,
+ /*maybe_vreg_temp=*/ Location::NoLocation());
+ codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+
+ if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
+ DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+ if (kUseBakerReadBarrier) {
+ codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(out), RegisterFrom(out));
+ } else {
+ codegen->GenerateReadBarrierSlow(
+ invoke,
+ out,
+ out,
+ Location::RegisterLocation(base.GetCode()),
+ /*offset=*/ 0u,
+ /*index=*/ Location::RegisterLocation(offset.GetCode()));
+ }
+ }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddInt(invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddLong(invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetInt(invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetLong(invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetReference(invoke);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(
+ invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddInt(invoke);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddLong(invoke);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetInt(invoke);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetLong(invoke);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetReference(invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet);
+}
+
class VarHandleSlowPathARMVIXL : public IntrinsicSlowPathARMVIXL {
public:
VarHandleSlowPathARMVIXL(HInvoke* invoke, std::memory_order order)
@@ -4335,23 +4543,23 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
__ Mov(target.offset, target_field->GetOffset().Uint32Value());
} else {
// For static fields, we need to fill the `target.object` with the declaring class,
- // so we can use `target.object` as temporary for the `ArtMethod*`. For instance fields,
- // we do not need the declaring class, so we can forget the `ArtMethod*` when
- // we load the `target.offset`, so use the `target.offset` to hold the `ArtMethod*`.
- vixl32::Register method = (expected_coordinates_count == 0) ? target.object : target.offset;
+ // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
+ // we do not need the declaring class, so we can forget the `ArtField*` when
+ // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
+ vixl32::Register field = (expected_coordinates_count == 0) ? target.object : target.offset;
const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
const MemberOffset offset_offset = ArtField::OffsetOffset();
- // Load the ArtField, the offset and, if needed, declaring class.
- __ Ldr(method, MemOperand(varhandle, art_field_offset.Int32Value()));
- __ Ldr(target.offset, MemOperand(method, offset_offset.Int32Value()));
+ // Load the ArtField*, the offset and, if needed, declaring class.
+ __ Ldr(field, MemOperand(varhandle, art_field_offset.Int32Value()));
+ __ Ldr(target.offset, MemOperand(field, offset_offset.Int32Value()));
if (expected_coordinates_count == 0u) {
codegen->GenerateGcRootFieldLoad(invoke,
LocationFrom(target.object),
- method,
+ field,
ArtField::DeclaringClassOffset().Int32Value(),
- gCompilerReadBarrierOption);
+ codegen->GetCompilerReadBarrierOption());
}
}
} else {
@@ -4371,7 +4579,8 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
}
}
-static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
+static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke,
+ CodeGeneratorARMVIXL* codegen) {
size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
DataType::Type return_type = invoke->GetType();
@@ -4403,7 +4612,7 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
}
// Add a temporary for offset.
- if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
+ if (codegen->EmitNonBakerReadBarrier() &&
GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
// To preserve the offset value across the non-Baker read barrier slow path
// for loading the declaring class, use a fixed callee-save register.
@@ -4428,7 +4637,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke,
return;
}
- if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
+ if (codegen->EmitNonBakerReadBarrier() &&
invoke->GetType() == DataType::Type::kReference &&
invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
@@ -4438,7 +4647,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke,
return;
}
- LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
+ LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
DataType::Type type = invoke->GetType();
if (type == DataType::Type::kFloat64 && Use64BitExclusiveLoadStore(atomic, codegen)) {
@@ -4476,7 +4685,7 @@ static void GenerateVarHandleGet(HInvoke* invoke,
Location maybe_temp = Location::NoLocation();
Location maybe_temp2 = Location::NoLocation();
Location maybe_temp3 = Location::NoLocation();
- if (gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) {
+ if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
// Reuse the offset temporary.
maybe_temp = LocationFrom(target.offset);
} else if (DataType::Is64BitType(type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
@@ -4580,7 +4789,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke,
return;
}
- LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
+ LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
uint32_t number_of_arguments = invoke->GetNumberOfArguments();
DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
@@ -4741,7 +4950,9 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetVolatile(HInvoke* invoke) {
GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst, /*atomic=*/ true);
}
-static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, bool return_success) {
+static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
+ CodeGeneratorARMVIXL* codegen,
+ bool return_success) {
VarHandleOptimizations optimizations(invoke);
if (optimizations.GetDoNotIntrinsify()) {
return;
@@ -4749,8 +4960,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
uint32_t number_of_arguments = invoke->GetNumberOfArguments();
DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
- if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
- value_type == DataType::Type::kReference) {
+ if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
// Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
// the passed reference and reloads it from the field. This breaks the read barriers
// in slow path in different ways. The marked old value may not actually be a to-space
@@ -4761,9 +4971,9 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
return;
}
- LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
+ LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+ if (codegen->EmitNonBakerReadBarrier()) {
// We need callee-save registers for both the class object and offset instead of
// the temporaries reserved in CreateVarHandleCommonLocations().
static_assert(POPCOUNT(kArmCalleeSaveRefSpills) >= 2u);
@@ -4799,7 +5009,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
locations->AddRegisterTemps(2u);
}
}
- if (gUseReadBarrier && value_type == DataType::Type::kReference) {
+ if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
// Add a temporary for store result, also used for the `old_value_temp` in slow path.
locations->AddTemp(Location::RequiresRegister());
}
@@ -4930,7 +5140,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
vixl32::Label* exit_loop = &exit_loop_label;
vixl32::Label* cmp_failure = &exit_loop_label;
- if (gUseReadBarrier && value_type == DataType::Type::kReference) {
+ if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
// The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
// reloaded old value for subsequent CAS in the slow path. This must not clobber `old_value`.
vixl32::Register old_value_temp = return_success ? RegisterFrom(out) : store_result;
@@ -5008,7 +5218,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
@@ -5017,7 +5227,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* in
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
@@ -5026,7 +5236,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInv
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
@@ -5035,7 +5245,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInv
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) {
@@ -5044,7 +5254,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke)
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
@@ -5053,7 +5263,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* inv
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
@@ -5062,7 +5272,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvo
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
@@ -5071,7 +5281,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
@@ -5080,21 +5290,21 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvo
}
static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
+ CodeGeneratorARMVIXL* codegen,
GetAndUpdateOp get_and_update_op) {
VarHandleOptimizations optimizations(invoke);
if (optimizations.GetDoNotIntrinsify()) {
return;
}
- if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
- invoke->GetType() == DataType::Type::kReference) {
+ if (invoke->GetType() == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
// Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
// the passed reference and reloads it from the field, thus seeing the new value
// that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
return;
}
- LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
+ LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
// We can reuse the declaring class (if present) and offset temporary, except for
// non-Baker read barriers that need them for the slow path.
@@ -5107,8 +5317,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
// Add temps needed to do the GenerateGetAndUpdate() with core registers.
size_t temps_needed = (value_type == DataType::Type::kFloat64) ? 5u : 3u;
locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
- } else if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
- value_type == DataType::Type::kReference) {
+ } else if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
// We need to preserve the declaring class (if present) and offset for read barrier
// slow paths, so we must use a separate temporary for the exclusive store result.
locations->AddTemp(Location::RequiresRegister());
@@ -5213,7 +5422,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
if (byte_swap) {
GenerateReverseBytes(assembler, DataType::Type::kInt32, arg, arg);
}
- } else if (gUseReadBarrier && value_type == DataType::Type::kReference) {
+ } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
if (kUseBakerReadBarrier) {
// Load the old value initially to a temporary register.
// We shall move it to `out` later with a read barrier.
@@ -5296,10 +5505,10 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
} else {
__ Vmov(SRegisterFrom(out), RegisterFrom(old_value));
}
- } else if (gUseReadBarrier && value_type == DataType::Type::kReference) {
+ } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
if (kUseBakerReadBarrier) {
- codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(RegisterFrom(out),
- RegisterFrom(old_value));
+ codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(out),
+ RegisterFrom(old_value));
} else {
codegen->GenerateReadBarrierSlow(
invoke,
@@ -5327,7 +5536,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
@@ -5335,7 +5544,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
@@ -5343,7 +5552,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invo
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
@@ -5351,7 +5560,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invo
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) {
@@ -5359,7 +5568,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
@@ -5367,7 +5576,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invo
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
@@ -5375,7 +5584,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invo
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
@@ -5383,7 +5592,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invo
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
@@ -5391,7 +5600,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvok
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
@@ -5399,7 +5608,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvok
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
@@ -5407,7 +5616,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invok
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
@@ -5415,7 +5624,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
@@ -5423,7 +5632,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
@@ -5431,7 +5640,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invo
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
@@ -5439,7 +5648,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvok
}
void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
- CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
}
void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h
index 54475bcc7e..fd86866d4e 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.h
+++ b/compiler/optimizing/intrinsics_arm_vixl.h
@@ -19,6 +19,7 @@
#include "base/macros.h"
#include "intrinsics.h"
+#include "intrinsics_list.h"
#include "utils/arm/assembler_arm_vixl.h"
namespace art HIDDEN {
@@ -36,9 +37,7 @@ class IntrinsicLocationsBuilderARMVIXL final : public IntrinsicVisitor {
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
// Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
@@ -63,15 +62,17 @@ class IntrinsicCodeGeneratorARMVIXL final : public IntrinsicVisitor {
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
private:
ArenaAllocator* GetAllocator();
ArmVIXLAssembler* GetAssembler();
+ void HandleValueOf(HInvoke* invoke,
+ const IntrinsicVisitor::ValueOfInfo& info,
+ DataType::Type type);
+
CodeGeneratorARMVIXL* const codegen_;
DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorARMVIXL);
diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc
new file mode 100644
index 0000000000..7f99f91374
--- /dev/null
+++ b/compiler/optimizing/intrinsics_riscv64.cc
@@ -0,0 +1,4584 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_riscv64.h"
+
+#include "code_generator_riscv64.h"
+#include "intrinsic_objects.h"
+#include "intrinsics_utils.h"
+#include "well_known_classes.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+using IntrinsicSlowPathRISCV64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorRISCV64,
+ SlowPathCodeRISCV64,
+ Riscv64Assembler>;
+
+#define __ assembler->
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ ReadBarrierSystemArrayCopySlowPathRISCV64(HInstruction* instruction, Location tmp)
+ : SlowPathCodeRISCV64(instruction), tmp_(tmp) {}
+
+ void EmitNativeCode(CodeGenerator* codegen_in) override {
+ DCHECK(codegen_in->EmitBakerReadBarrier());
+ CodeGeneratorRISCV64* codegen = down_cast<CodeGeneratorRISCV64*>(codegen_in);
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ const int32_t element_size = DataType::Size(DataType::Type::kReference);
+
+ XRegister src_curr_addr = locations->GetTemp(0).AsRegister<XRegister>();
+ XRegister dst_curr_addr = locations->GetTemp(1).AsRegister<XRegister>();
+ XRegister src_stop_addr = locations->GetTemp(2).AsRegister<XRegister>();
+ XRegister tmp_reg = tmp_.AsRegister<XRegister>();
+
+ __ Bind(GetEntryLabel());
+ Riscv64Label slow_copy_loop;
+ __ Bind(&slow_copy_loop);
+ __ Loadwu(tmp_reg, src_curr_addr, 0);
+ codegen->MaybeUnpoisonHeapReference(tmp_reg);
+ // TODO: Inline the mark bit check before calling the runtime?
+ // tmp_reg = ReadBarrier::Mark(tmp_reg);
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ // (See ReadBarrierMarkSlowPathRISCV64::EmitNativeCode for more
+ // explanations.)
+ int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(tmp_);
+ // This runtime call does not require a stack map.
+ codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ codegen->MaybePoisonHeapReference(tmp_reg);
+ __ Storew(tmp_reg, dst_curr_addr, 0);
+ __ Addi(src_curr_addr, src_curr_addr, element_size);
+ __ Addi(dst_curr_addr, dst_curr_addr, element_size);
+ __ Bne(src_curr_addr, src_stop_addr, &slow_copy_loop);
+ __ J(GetExitLabel());
+ }
+
+ const char* GetDescription() const override {
+ return "ReadBarrierSystemArrayCopySlowPathRISCV64";
+ }
+
+ private:
+ Location tmp_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathRISCV64);
+};
+
+bool IntrinsicLocationsBuilderRISCV64::TryDispatch(HInvoke* invoke) {
+ Dispatch(invoke);
+ LocationSummary* res = invoke->GetLocations();
+ if (res == nullptr) {
+ return false;
+ }
+ return res->Intrinsified();
+}
+
+Riscv64Assembler* IntrinsicCodeGeneratorRISCV64::GetAssembler() {
+ return codegen_->GetAssembler();
+}
+
+static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+ DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
+ DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
+ DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
+
+ LocationSummary* const locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+
+ locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
+}
+
+static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+ DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
+ DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
+ DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
+ DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
+
+ LocationSummary* const locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+
+ locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+ locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
+}
+
+static void CreateFpFpFpToFpNoOverlapLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+ DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
+ DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
+ DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
+ DCHECK(DataType::IsFloatingPointType(invoke->InputAt(2)->GetType()));
+ DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
+
+ LocationSummary* const locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+ CreateFPToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ Riscv64Assembler* assembler = GetAssembler();
+ __ FMvXD(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+ CreateIntToFPLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ Riscv64Assembler* assembler = GetAssembler();
+ __ FMvDX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+ CreateFPToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ Riscv64Assembler* assembler = GetAssembler();
+ __ FMvXW(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+ CreateIntToFPLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ Riscv64Assembler* assembler = GetAssembler();
+ __ FMvWX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) {
+ CreateFPToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ Riscv64Assembler* assembler = GetAssembler();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ __ FClassD(out, locations->InAt(0).AsFpuRegister<FRegister>());
+ __ Andi(out, out, kPositiveInfinity | kNegativeInfinity);
+ __ Snez(out, out);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitFloatIsInfinite(HInvoke* invoke) {
+ CreateFPToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitFloatIsInfinite(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ Riscv64Assembler* assembler = GetAssembler();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ __ FClassS(out, locations->InAt(0).AsFpuRegister<FRegister>());
+ __ Andi(out, out, kPositiveInfinity | kNegativeInfinity);
+ __ Snez(out, out);
+}
+
+static void CreateIntToIntNoOverlapLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+template <typename EmitOp>
+void EmitMemoryPeek(HInvoke* invoke, EmitOp&& emit_op) {
+ LocationSummary* locations = invoke->GetLocations();
+ emit_op(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekByte(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekByte(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lb(rd, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lw(rd, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Ld(rd, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lh(rd, rs1, 0); });
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+}
+
+static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode.
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+template <typename EmitOp>
+void EmitMemoryPoke(HInvoke* invoke, EmitOp&& emit_op) {
+ LocationSummary* locations = invoke->GetLocations();
+ emit_op(locations->InAt(1).AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeByte(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeByte(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sb(rs2, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sw(rs2, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sd(rs2, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sh(rs2, rs1, 0); });
+}
+
+static void GenerateReverseBytes(CodeGeneratorRISCV64* codegen,
+ Location rd,
+ XRegister rs1,
+ DataType::Type type) {
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ switch (type) {
+ case DataType::Type::kUint16:
+ // There is no 16-bit reverse bytes instruction.
+ __ Rev8(rd.AsRegister<XRegister>(), rs1);
+ __ Srli(rd.AsRegister<XRegister>(), rd.AsRegister<XRegister>(), 48);
+ break;
+ case DataType::Type::kInt16:
+ // There is no 16-bit reverse bytes instruction.
+ __ Rev8(rd.AsRegister<XRegister>(), rs1);
+ __ Srai(rd.AsRegister<XRegister>(), rd.AsRegister<XRegister>(), 48);
+ break;
+ case DataType::Type::kInt32:
+ // There is no 32-bit reverse bytes instruction.
+ __ Rev8(rd.AsRegister<XRegister>(), rs1);
+ __ Srai(rd.AsRegister<XRegister>(), rd.AsRegister<XRegister>(), 32);
+ break;
+ case DataType::Type::kInt64:
+ __ Rev8(rd.AsRegister<XRegister>(), rs1);
+ break;
+ case DataType::Type::kFloat32:
+ // There is no 32-bit reverse bytes instruction.
+ __ Rev8(rs1, rs1); // Note: Clobbers `rs1`.
+ __ Srai(rs1, rs1, 32);
+ __ FMvWX(rd.AsFpuRegister<FRegister>(), rs1);
+ break;
+ case DataType::Type::kFloat64:
+ __ Rev8(rs1, rs1); // Note: Clobbers `rs1`.
+ __ FMvDX(rd.AsFpuRegister<FRegister>(), rs1);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type: " << type;
+ UNREACHABLE();
+ }
+}
+
+static void GenerateReverseBytes(CodeGeneratorRISCV64* codegen,
+ HInvoke* invoke,
+ DataType::Type type) {
+ DCHECK_EQ(type, invoke->GetType());
+ LocationSummary* locations = invoke->GetLocations();
+ GenerateReverseBytes(codegen, locations->Out(), locations->InAt(0).AsRegister<XRegister>(), type);
+}
+
+static void GenerateReverse(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) {
+ DCHECK_EQ(type, invoke->GetType());
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ XRegister in = locations->InAt(0).AsRegister<XRegister>();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ ScratchRegisterScope srs(assembler);
+ XRegister temp1 = srs.AllocateXRegister();
+ XRegister temp2 = srs.AllocateXRegister();
+
+ auto maybe_extend_mask = [type, assembler](XRegister mask, XRegister temp) {
+ if (type == DataType::Type::kInt64) {
+ __ Slli(temp, mask, 32);
+ __ Add(mask, mask, temp);
+ }
+ };
+
+ // Swap bits in bit pairs.
+ __ Li(temp1, 0x55555555);
+ maybe_extend_mask(temp1, temp2);
+ __ Srli(temp2, in, 1);
+ __ And(out, in, temp1);
+ __ And(temp2, temp2, temp1);
+ __ Sh1Add(out, out, temp2);
+
+ // Swap bit pairs in 4-bit groups.
+ __ Li(temp1, 0x33333333);
+ maybe_extend_mask(temp1, temp2);
+ __ Srli(temp2, out, 2);
+ __ And(out, out, temp1);
+ __ And(temp2, temp2, temp1);
+ __ Sh2Add(out, out, temp2);
+
+ // Swap 4-bit groups in 8-bit groups.
+ __ Li(temp1, 0x0f0f0f0f);
+ maybe_extend_mask(temp1, temp2);
+ __ Srli(temp2, out, 4);
+ __ And(out, out, temp1);
+ __ And(temp2, temp2, temp1);
+ __ Slli(out, out, 4);
+ __ Add(out, out, temp2);
+
+ GenerateReverseBytes(codegen, Location::RegisterLocation(out), out, type);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerReverse(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerReverse(HInvoke* invoke) {
+ GenerateReverse(codegen_, invoke, DataType::Type::kInt32);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongReverse(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongReverse(HInvoke* invoke) {
+ GenerateReverse(codegen_, invoke, DataType::Type::kInt64);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) {
+ GenerateReverseBytes(codegen_, invoke, DataType::Type::kInt32);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongReverseBytes(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongReverseBytes(HInvoke* invoke) {
+ GenerateReverseBytes(codegen_, invoke, DataType::Type::kInt64);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitShortReverseBytes(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitShortReverseBytes(HInvoke* invoke) {
+ GenerateReverseBytes(codegen_, invoke, DataType::Type::kInt16);
+}
+
+template <typename EmitOp>
+void EmitIntegralUnOp(HInvoke* invoke, EmitOp&& emit_op) {
+ LocationSummary* locations = invoke->GetLocations();
+ emit_op(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerBitCount(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerBitCount(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpopw(rd, rs1); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongBitCount(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongBitCount(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpop(rd, rs1); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
+ ScratchRegisterScope srs(assembler);
+ XRegister tmp = srs.AllocateXRegister();
+ XRegister tmp2 = srs.AllocateXRegister();
+ __ Clzw(tmp, rs1);
+ __ Li(tmp2, INT64_C(-0x80000000));
+ __ Srlw(tmp2, tmp2, tmp);
+ __ And(rd, rs1, tmp2); // Make sure the result is zero if the input is zero.
+ });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongHighestOneBit(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongHighestOneBit(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
+ ScratchRegisterScope srs(assembler);
+ XRegister tmp = srs.AllocateXRegister();
+ XRegister tmp2 = srs.AllocateXRegister();
+ __ Clz(tmp, rs1);
+ __ Li(tmp2, INT64_C(-0x8000000000000000));
+ __ Srl(tmp2, tmp2, tmp);
+ __ And(rd, rs1, tmp2); // Make sure the result is zero if the input is zero.
+ });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
+ ScratchRegisterScope srs(assembler);
+ XRegister tmp = srs.AllocateXRegister();
+ __ NegW(tmp, rs1);
+ __ And(rd, rs1, tmp);
+ });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongLowestOneBit(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongLowestOneBit(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
+ ScratchRegisterScope srs(assembler);
+ XRegister tmp = srs.AllocateXRegister();
+ __ Neg(tmp, rs1);
+ __ And(rd, rs1, tmp);
+ });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clzw(rd, rs1); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clz(rd, rs1); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctzw(rd, rs1); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+ CreateIntToIntNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctz(rd, rs1); });
+}
+
+static void GenerateDivideUnsigned(HInvoke* invoke, CodeGeneratorRISCV64* codegen) {
+ LocationSummary* locations = invoke->GetLocations();
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ DataType::Type type = invoke->GetType();
+ DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
+
+ XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
+ XRegister divisor = locations->InAt(1).AsRegister<XRegister>();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+
+ // Check if divisor is zero, bail to managed implementation to handle.
+ SlowPathCodeRISCV64* slow_path =
+ new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ Beqz(divisor, slow_path->GetEntryLabel());
+
+ if (type == DataType::Type::kInt32) {
+ __ Divuw(out, dividend, divisor);
+ } else {
+ __ Divu(out, dividend, divisor);
+ }
+
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
+ CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
+ GenerateDivideUnsigned(invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongDivideUnsigned(HInvoke* invoke) {
+ CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongDivideUnsigned(HInvoke* invoke) {
+ GenerateDivideUnsigned(invoke, codegen_);
+}
+
+#define VISIT_INTRINSIC(name, low, high, type, start_index) \
+ void IntrinsicLocationsBuilderRISCV64::Visit ##name ##ValueOf(HInvoke* invoke) { \
+ InvokeRuntimeCallingConvention calling_convention; \
+ IntrinsicVisitor::ComputeValueOfLocations( \
+ invoke, \
+ codegen_, \
+ low, \
+ high - low + 1, \
+ calling_convention.GetReturnLocation(DataType::Type::kReference), \
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0))); \
+ } \
+ void IntrinsicCodeGeneratorRISCV64::Visit ##name ##ValueOf(HInvoke* invoke) { \
+ IntrinsicVisitor::ValueOfInfo info = \
+ IntrinsicVisitor::ComputeValueOfInfo( \
+ invoke, \
+ codegen_->GetCompilerOptions(), \
+ WellKnownClasses::java_lang_ ##name ##_value, \
+ low, \
+ high - low + 1, \
+ start_index); \
+ HandleValueOf(invoke, info, type); \
+ }
+ BOXED_TYPES(VISIT_INTRINSIC)
+#undef VISIT_INTRINSIC
+
+void IntrinsicCodeGeneratorRISCV64::HandleValueOf(HInvoke* invoke,
+ const IntrinsicVisitor::ValueOfInfo& info,
+ DataType::Type type) {
+ Riscv64Assembler* assembler = codegen_->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ ScratchRegisterScope srs(assembler);
+ XRegister temp = srs.AllocateXRegister();
+ auto allocate_instance = [&]() {
+ DCHECK_EQ(out, InvokeRuntimeCallingConvention().GetRegisterAt(0));
+ codegen_->LoadIntrinsicDeclaringClass(out, invoke);
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ };
+ if (invoke->InputAt(0)->IsIntConstant()) {
+ int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (static_cast<uint32_t>(value - info.low) < info.length) {
+ // Just embed the object in the code.
+ DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
+ codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
+ } else {
+ DCHECK(locations->CanCall());
+ // Allocate and initialize a new object.
+ // TODO: If we JIT, we could allocate the object now, and store it in the
+ // JIT object table.
+ allocate_instance();
+ __ Li(temp, value);
+ codegen_->GetInstructionVisitor()->Store(
+ Location::RegisterLocation(temp), out, info.value_offset, type);
+ // Class pointer and `value` final field stores require a barrier before publication.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ }
+ } else {
+ DCHECK(locations->CanCall());
+ XRegister in = locations->InAt(0).AsRegister<XRegister>();
+ Riscv64Label allocate, done;
+ // Check bounds of our cache.
+ __ AddConst32(out, in, -info.low);
+ __ Li(temp, info.length);
+ __ Bgeu(out, temp, &allocate);
+ // If the value is within the bounds, load the object directly from the array.
+ codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
+ __ Sh2Add(temp, out, temp);
+ __ Loadwu(out, temp, 0);
+ codegen_->MaybeUnpoisonHeapReference(out);
+ __ J(&done);
+ __ Bind(&allocate);
+ // Otherwise allocate and initialize a new object.
+ allocate_instance();
+ codegen_->GetInstructionVisitor()->Store(
+ Location::RegisterLocation(in), out, info.value_offset, type);
+ // Class pointer and `value` final field stores require a barrier before publication.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ __ Bind(&done);
+ }
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitReferenceGetReferent(HInvoke* invoke) {
+ IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
+
+ if (codegen_->EmitBakerReadBarrier() && invoke->GetLocations() != nullptr) {
+ invoke->GetLocations()->AddTemp(Location::RequiresRegister());
+ }
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitReferenceGetReferent(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ Location obj = locations->InAt(0);
+ Location out = locations->Out();
+
+ SlowPathCodeRISCV64* slow_path =
+ new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
+ codegen_->AddSlowPath(slow_path);
+
+ if (codegen_->EmitReadBarrier()) {
+ // Check self->GetWeakRefAccessEnabled().
+ ScratchRegisterScope srs(assembler);
+ XRegister temp = srs.AllocateXRegister();
+ __ Loadwu(temp, TR, Thread::WeakRefAccessEnabledOffset<kRiscv64PointerSize>().Int32Value());
+ static_assert(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled) == 0);
+ __ Bnez(temp, slow_path->GetEntryLabel());
+ }
+
+ {
+ // Load the java.lang.ref.Reference class.
+ ScratchRegisterScope srs(assembler);
+ XRegister temp = srs.AllocateXRegister();
+ codegen_->LoadIntrinsicDeclaringClass(temp, invoke);
+
+ // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
+ MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
+ DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
+ DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
+ IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
+ __ Loadhu(temp, temp, disable_intrinsic_offset.Int32Value());
+ __ Bnez(temp, slow_path->GetEntryLabel());
+ }
+
+ // Load the value from the field.
+ uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
+ if (codegen_->EmitBakerReadBarrier()) {
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ out,
+ obj.AsRegister<XRegister>(),
+ referent_offset,
+ /*maybe_temp=*/ locations->GetTemp(0),
+ /*needs_null_check=*/ false);
+ } else {
+ codegen_->GetInstructionVisitor()->Load(
+ out, obj.AsRegister<XRegister>(), referent_offset, DataType::Type::kReference);
+ codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
+ }
+ // Emit memory barrier for load-acquire.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitReferenceRefersTo(HInvoke* invoke) {
+ IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitReferenceRefersTo(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ XRegister obj = locations->InAt(0).AsRegister<XRegister>();
+ XRegister other = locations->InAt(1).AsRegister<XRegister>();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+
+ uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+ codegen_->GetInstructionVisitor()->Load(
+ Location::RegisterLocation(out), obj, referent_offset, DataType::Type::kReference);
+ codegen_->MaybeRecordImplicitNullCheck(invoke);
+ codegen_->MaybeUnpoisonHeapReference(out);
+
+ // Emit memory barrier for load-acquire.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+ if (codegen_->EmitReadBarrier()) {
+ DCHECK(kUseBakerReadBarrier);
+
+ Riscv64Label calculate_result;
+
+ // If equal to `other`, the loaded reference is final (it cannot be a from-space reference).
+ __ Beq(out, other, &calculate_result);
+
+ // If the GC is not marking, the loaded reference is final.
+ ScratchRegisterScope srs(assembler);
+ XRegister tmp = srs.AllocateXRegister();
+ __ Loadwu(tmp, TR, Thread::IsGcMarkingOffset<kRiscv64PointerSize>().Int32Value());
+ __ Beqz(tmp, &calculate_result);
+
+ // Check if the loaded reference is null.
+ __ Beqz(out, &calculate_result);
+
+ // For correct memory visibility, we need a barrier before loading the lock word to
+ // synchronize with the publishing of `other` by the CC GC. However, as long as the
+ // load-acquire above is implemented as a plain load followed by a barrier (rather
+ // than an atomic load-acquire instruction which synchronizes only with other
+ // instructions on the same memory location), that barrier is sufficient.
+
+ // Load the lockword and check if it is a forwarding address.
+ static_assert(LockWord::kStateShift == 30u);
+ static_assert(LockWord::kStateForwardingAddress == 3u);
+ // Load the lock word sign-extended. Comparing it to the sign-extended forwarding
+ // address bits as unsigned is the same as comparing both zero-extended.
+ __ Loadw(tmp, out, monitor_offset);
+ // Materialize sign-extended forwarding address bits. This is a single LUI instruction.
+ XRegister tmp2 = srs.AllocateXRegister();
+ __ Li(tmp2, INT64_C(-1) & ~static_cast<int64_t>((1 << LockWord::kStateShift) - 1));
+ // If we do not have a forwarding address, the loaded reference cannot be the same as `other`,
+ // so we proceed to calculate the result with `out != other`.
+ __ Bltu(tmp, tmp2, &calculate_result);
+
+ // Extract the forwarding address for comparison with `other`.
+ // Note that the high 32 bits shall not be used for the result calculation.
+ __ Slliw(out, tmp, LockWord::kForwardingAddressShift);
+
+ __ Bind(&calculate_result);
+ }
+
+ // Calculate the result `out == other`.
+ __ Subw(out, out, other);
+ __ Seqz(out, out);
+}
+
+static void GenerateVisitStringIndexOf(HInvoke* invoke,
+ Riscv64Assembler* assembler,
+ CodeGeneratorRISCV64* codegen,
+ bool start_at_zero) {
+ LocationSummary* locations = invoke->GetLocations();
+
+ // Note that the null check must have been done earlier.
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+ // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+ // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
+ SlowPathCodeRISCV64* slow_path = nullptr;
+ HInstruction* code_point = invoke->InputAt(1);
+ if (code_point->IsIntConstant()) {
+ if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
+ // Always needs the slow-path. We could directly dispatch to it, but this case should be
+ // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+ slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ J(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
+ }
+ } else if (code_point->GetType() != DataType::Type::kUint16) {
+ slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
+ codegen->AddSlowPath(slow_path);
+ ScratchRegisterScope srs(assembler);
+ XRegister tmp = srs.AllocateXRegister();
+ __ Srliw(tmp, locations->InAt(1).AsRegister<XRegister>(), 16);
+ __ Bnez(tmp, slow_path->GetEntryLabel());
+ }
+
+ if (start_at_zero) {
+ // Start-index = 0.
+ XRegister tmp_reg = locations->GetTemp(0).AsRegister<XRegister>();
+ __ Li(tmp_reg, 0);
+ }
+
+ codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
+ CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
+
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitStringIndexOf(HInvoke* invoke) {
+ LocationSummary* locations = new (allocator_) LocationSummary(
+ invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
+ // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+ // best to align the inputs accordingly.
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
+
+ // Need to send start_index=0.
+ locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitStringIndexOf(HInvoke* invoke) {
+ GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitStringIndexOfAfter(HInvoke* invoke) {
+ LocationSummary* locations = new (allocator_) LocationSummary(
+ invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
+ // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+ // best to align the inputs accordingly.
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+ locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitStringIndexOfAfter(HInvoke* invoke) {
+ GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
+}
+
+static void GenerateSet(CodeGeneratorRISCV64* codegen,
+ std::memory_order order,
+ Location value,
+ XRegister rs1,
+ int32_t offset,
+ DataType::Type type) {
+ if (order == std::memory_order_seq_cst) {
+ codegen->GetInstructionVisitor()->StoreSeqCst(value, rs1, offset, type);
+ } else {
+ if (order == std::memory_order_release) {
+ codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ } else {
+ DCHECK(order == std::memory_order_relaxed);
+ }
+ codegen->GetInstructionVisitor()->Store(value, rs1, offset, type);
+ }
+}
+
+std::pair<AqRl, AqRl> GetLrScAqRl(std::memory_order order) {
+ AqRl load_aqrl = AqRl::kNone;
+ AqRl store_aqrl = AqRl::kNone;
+ if (order == std::memory_order_acquire) {
+ load_aqrl = AqRl::kAcquire;
+ } else if (order == std::memory_order_release) {
+ store_aqrl = AqRl::kRelease;
+ } else if (order == std::memory_order_seq_cst) {
+ load_aqrl = AqRl::kAqRl;
+ store_aqrl = AqRl::kRelease;
+ } else {
+ DCHECK(order == std::memory_order_relaxed);
+ }
+ return {load_aqrl, store_aqrl};
+}
+
+AqRl GetAmoAqRl(std::memory_order order) {
+ AqRl amo_aqrl = AqRl::kNone;
+ if (order == std::memory_order_acquire) {
+ amo_aqrl = AqRl::kAcquire;
+ } else if (order == std::memory_order_release) {
+ amo_aqrl = AqRl::kRelease;
+ } else {
+ DCHECK(order == std::memory_order_seq_cst);
+ amo_aqrl = AqRl::kAqRl;
+ }
+ return amo_aqrl;
+}
+
+static void EmitLoadReserved(Riscv64Assembler* assembler,
+ DataType::Type type,
+ XRegister ptr,
+ XRegister old_value,
+ AqRl aqrl) {
+ switch (type) {
+ case DataType::Type::kInt32:
+ __ LrW(old_value, ptr, aqrl);
+ break;
+ case DataType::Type::kReference:
+ __ LrW(old_value, ptr, aqrl);
+ // TODO(riscv64): The `ZextW()` macro currently emits `SLLI+SRLI` which are from the
+ // base "I" instruction set. When the assembler is updated to use a single-instruction
+ // `ZextW()` macro, either the ADD.UW, or the C.ZEXT.W (16-bit encoding), we need to
+ // rewrite this to avoid these non-"I" instructions. We could, for example, sign-extend
+ // the reference and do the CAS as `Int32`.
+ __ ZextW(old_value, old_value);
+ break;
+ case DataType::Type::kInt64:
+ __ LrD(old_value, ptr, aqrl);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type: " << type;
+ UNREACHABLE();
+ }
+}
+
+static void EmitStoreConditional(Riscv64Assembler* assembler,
+ DataType::Type type,
+ XRegister ptr,
+ XRegister store_result,
+ XRegister to_store,
+ AqRl aqrl) {
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kReference:
+ __ ScW(store_result, to_store, ptr, aqrl);
+ break;
+ case DataType::Type::kInt64:
+ __ ScD(store_result, to_store, ptr, aqrl);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type: " << type;
+ UNREACHABLE();
+ }
+}
+
+static void GenerateCompareAndSet(Riscv64Assembler* assembler,
+ DataType::Type type,
+ std::memory_order order,
+ bool strong,
+ Riscv64Label* cmp_failure,
+ XRegister ptr,
+ XRegister new_value,
+ XRegister old_value,
+ XRegister mask,
+ XRegister masked,
+ XRegister store_result,
+ XRegister expected,
+ XRegister expected2 = kNoXRegister) {
+ DCHECK(!DataType::IsFloatingPointType(type));
+ DCHECK_GE(DataType::Size(type), 4u);
+
+ // The `expected2` is valid only for reference slow path and represents the unmarked old value
+ // from the main path attempt to emit CAS when the marked old value matched `expected`.
+ DCHECK_IMPLIES(expected2 != kNoXRegister, type == DataType::Type::kReference);
+
+ auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
+
+ // repeat: {
+ // old_value = [ptr]; // Load exclusive.
+ // cmp_value = old_value & mask; // Extract relevant bits if applicable.
+ // if (cmp_value != expected && cmp_value != expected2) goto cmp_failure;
+ // store_result = failed([ptr] <- new_value); // Store exclusive.
+ // }
+ // if (strong) {
+ // if (store_result) goto repeat; // Repeat until compare fails or store exclusive succeeds.
+ // } else {
+ // store_result = store_result ^ 1; // Report success as 1, failure as 0.
+ // }
+ //
+ // (If `mask` is not valid, `expected` is compared with `old_value` instead of `cmp_value`.)
+ // (If `expected2` is not valid, the `cmp_value == expected2` part is not emitted.)
+
+ // Note: We're using "bare" local branches to enforce that they shall not be expanded
+ // and the scrach register `TMP` shall not be clobbered if taken. Taking the branch to
+ // `cmp_failure` can theoretically clobber `TMP` (if outside the 1 MiB range).
+ Riscv64Label loop;
+ if (strong) {
+ __ Bind(&loop);
+ }
+ EmitLoadReserved(assembler, type, ptr, old_value, load_aqrl);
+ XRegister to_store = new_value;
+ if (mask != kNoXRegister) {
+ DCHECK_EQ(expected2, kNoXRegister);
+ DCHECK_NE(masked, kNoXRegister);
+ __ And(masked, old_value, mask);
+ __ Bne(masked, expected, cmp_failure);
+ // The `old_value` does not need to be preserved as the caller shall use `masked`
+ // to return the old value if needed.
+ to_store = old_value;
+ // TODO(riscv64): We could XOR the old and new value before the loop and use a single XOR here
+ // instead of the XOR+OR. (The `new_value` is either Zero or a temporary we can clobber.)
+ __ Xor(to_store, old_value, masked);
+ __ Or(to_store, to_store, new_value);
+ } else if (expected2 != kNoXRegister) {
+ Riscv64Label match2;
+ __ Beq(old_value, expected2, &match2, /*is_bare=*/ true);
+ __ Bne(old_value, expected, cmp_failure);
+ __ Bind(&match2);
+ } else {
+ __ Bne(old_value, expected, cmp_failure);
+ }
+ EmitStoreConditional(assembler, type, ptr, store_result, to_store, store_aqrl);
+ if (strong) {
+ __ Bnez(store_result, &loop, /*is_bare=*/ true);
+ } else {
+ // Flip the `store_result` register to indicate success by 1 and failure by 0.
+ __ Xori(store_result, store_result, 1);
+ }
+}
+
+class ReadBarrierCasSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+ ReadBarrierCasSlowPathRISCV64(HInvoke* invoke,
+ std::memory_order order,
+ bool strong,
+ XRegister base,
+ XRegister offset,
+ XRegister expected,
+ XRegister new_value,
+ XRegister old_value,
+ XRegister old_value_temp,
+ XRegister store_result,
+ bool update_old_value,
+ CodeGeneratorRISCV64* riscv64_codegen)
+ : SlowPathCodeRISCV64(invoke),
+ order_(order),
+ strong_(strong),
+ base_(base),
+ offset_(offset),
+ expected_(expected),
+ new_value_(new_value),
+ old_value_(old_value),
+ old_value_temp_(old_value_temp),
+ store_result_(store_result),
+ update_old_value_(update_old_value),
+ mark_old_value_slow_path_(nullptr),
+ update_old_value_slow_path_(nullptr) {
+ // We need to add slow paths now, it is too late when emitting slow path code.
+ Location old_value_loc = Location::RegisterLocation(old_value);
+ Location old_value_temp_loc = Location::RegisterLocation(old_value_temp);
+ if (kUseBakerReadBarrier) {
+ mark_old_value_slow_path_ = riscv64_codegen->AddGcRootBakerBarrierBarrierSlowPath(
+ invoke, old_value_temp_loc, kBakerReadBarrierTemp);
+ if (update_old_value_) {
+ update_old_value_slow_path_ = riscv64_codegen->AddGcRootBakerBarrierBarrierSlowPath(
+ invoke, old_value_loc, kBakerReadBarrierTemp);
+ }
+ } else {
+ Location base_loc = Location::RegisterLocation(base);
+ Location index = Location::RegisterLocation(offset);
+ mark_old_value_slow_path_ = riscv64_codegen->AddReadBarrierSlowPath(
+ invoke, old_value_temp_loc, old_value_loc, base_loc, /*offset=*/ 0u, index);
+ if (update_old_value_) {
+ update_old_value_slow_path_ = riscv64_codegen->AddReadBarrierSlowPath(
+ invoke, old_value_loc, old_value_temp_loc, base_loc, /*offset=*/ 0u, index);
+ }
+ }
+ }
+
+ const char* GetDescription() const override { return "ReadBarrierCasSlowPathRISCV64"; }
+
+ // We return to a different label on success for a strong CAS that does not return old value.
+ Riscv64Label* GetSuccessExitLabel() {
+ return &success_exit_label_;
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+ Riscv64Assembler* assembler = riscv64_codegen->GetAssembler();
+ __ Bind(GetEntryLabel());
+
+ // Mark the `old_value_` from the main path and compare with `expected_`.
+ DCHECK(mark_old_value_slow_path_ != nullptr);
+ if (kUseBakerReadBarrier) {
+ __ Mv(old_value_temp_, old_value_);
+ riscv64_codegen->EmitBakerReadBarierMarkingCheck(mark_old_value_slow_path_,
+ Location::RegisterLocation(old_value_temp_),
+ kBakerReadBarrierTemp);
+ } else {
+ __ J(mark_old_value_slow_path_->GetEntryLabel());
+ __ Bind(mark_old_value_slow_path_->GetExitLabel());
+ }
+ Riscv64Label move_marked_old_value;
+ __ Bne(old_value_temp_, expected_, update_old_value_ ? &move_marked_old_value : GetExitLabel());
+
+ // The `old_value` we have read did not match `expected` (which is always a to-space
+ // reference) but after the read barrier the marked to-space value matched, so the
+ // `old_value` must be a from-space reference to the same object. Do the same CAS loop
+ // as the main path but check for both `expected` and the unmarked old value
+ // representing the to-space and from-space references for the same object.
+
+ ScratchRegisterScope srs(assembler);
+ XRegister tmp_ptr = srs.AllocateXRegister();
+ XRegister store_result =
+ store_result_ != kNoXRegister ? store_result_ : srs.AllocateXRegister();
+
+ // Recalculate the `tmp_ptr` from main path potentially clobbered by the read barrier above
+ // or by an expanded conditional branch (clobbers `TMP` if beyond 1MiB).
+ __ Add(tmp_ptr, base_, offset_);
+
+ Riscv64Label mark_old_value;
+ GenerateCompareAndSet(riscv64_codegen->GetAssembler(),
+ DataType::Type::kReference,
+ order_,
+ strong_,
+ /*cmp_failure=*/ update_old_value_ ? &mark_old_value : GetExitLabel(),
+ tmp_ptr,
+ new_value_,
+ /*old_value=*/ old_value_temp_,
+ /*mask=*/ kNoXRegister,
+ /*masked=*/ kNoXRegister,
+ store_result,
+ expected_,
+ /*expected2=*/ old_value_);
+ if (update_old_value_) {
+ // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
+ // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
+ __ Mv(old_value_, expected_);
+ }
+ if (!update_old_value_ && strong_) {
+ // Load success value to the result register.
+ // We must jump to the instruction that loads the success value in the main path.
+ // Note that a SC failure in the CAS loop sets the `store_result` to 1, so the main
+ // path must not use the `store_result` as an indication of success.
+ __ J(GetSuccessExitLabel());
+ } else {
+ __ J(GetExitLabel());
+ }
+
+ if (update_old_value_) {
+ // TODO(riscv64): If we initially saw a from-space reference and then saw
+ // a different reference, can the latter be also a from-space reference?
+ // (Shouldn't every reference write store a to-space reference?)
+ DCHECK(update_old_value_slow_path_ != nullptr);
+ __ Bind(&mark_old_value);
+ if (kUseBakerReadBarrier) {
+ DCHECK(update_old_value_slow_path_ == nullptr);
+ __ Mv(old_value_, old_value_temp_);
+ riscv64_codegen->EmitBakerReadBarierMarkingCheck(update_old_value_slow_path_,
+ Location::RegisterLocation(old_value_),
+ kBakerReadBarrierTemp);
+ } else {
+ // Note: We could redirect the `failure` above directly to the entry label and bind
+ // the exit label in the main path, but the main path would need to access the
+ // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps.
+ __ J(update_old_value_slow_path_->GetEntryLabel());
+ __ Bind(update_old_value_slow_path_->GetExitLabel());
+ }
+ __ J(GetExitLabel());
+
+ __ Bind(&move_marked_old_value);
+ __ Mv(old_value_, old_value_temp_);
+ __ J(GetExitLabel());
+ }
+ }
+
+ private:
+ // Use RA as temp. It is clobbered in the slow path anyway.
+ static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA);
+
+ std::memory_order order_;
+ bool strong_;
+ XRegister base_;
+ XRegister offset_;
+ XRegister expected_;
+ XRegister new_value_;
+ XRegister old_value_;
+ XRegister old_value_temp_;
+ XRegister store_result_;
+ bool update_old_value_;
+ SlowPathCodeRISCV64* mark_old_value_slow_path_;
+ SlowPathCodeRISCV64* update_old_value_slow_path_;
+ Riscv64Label success_exit_label_;
+};
+
+static void EmitBlt32(Riscv64Assembler* assembler,
+ XRegister rs1,
+ Location rs2,
+ Riscv64Label* label,
+ XRegister temp) {
+ if (rs2.IsConstant()) {
+ __ Li(temp, rs2.GetConstant()->AsIntConstant()->GetValue());
+ __ Blt(rs1, temp, label);
+ } else {
+ __ Blt(rs1, rs2.AsRegister<XRegister>(), label);
+ }
+}
+
+static void CheckSystemArrayCopyPosition(Riscv64Assembler* assembler,
+ XRegister array,
+ Location pos,
+ Location length,
+ SlowPathCodeRISCV64* slow_path,
+ XRegister temp1,
+ XRegister temp2,
+ bool length_is_array_length,
+ bool position_sign_checked) {
+ const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
+ if (pos.IsConstant()) {
+ int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
+ DCHECK_GE(pos_const, 0); // Checked in location builder.
+ if (pos_const == 0) {
+ if (!length_is_array_length) {
+ // Check that length(array) >= length.
+ __ Loadw(temp1, array, length_offset);
+ EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2);
+ }
+ } else {
+ // Calculate length(array) - pos.
+ // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
+ // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
+ __ Loadw(temp1, array, length_offset);
+ __ AddConst32(temp1, temp1, -pos_const);
+
+ // Check that (length(array) - pos) >= length.
+ EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2);
+ }
+ } else if (length_is_array_length) {
+ // The only way the copy can succeed is if pos is zero.
+ __ Bnez(pos.AsRegister<XRegister>(), slow_path->GetEntryLabel());
+ } else {
+ // Check that pos >= 0.
+ XRegister pos_reg = pos.AsRegister<XRegister>();
+ if (!position_sign_checked) {
+ __ Bltz(pos_reg, slow_path->GetEntryLabel());
+ }
+
+ // Calculate length(array) - pos.
+ // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
+ // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
+ __ Loadw(temp1, array, length_offset);
+ __ Sub(temp1, temp1, pos_reg);
+
+ // Check that (length(array) - pos) >= length.
+ EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2);
+ }
+}
+
+static void GenArrayAddress(CodeGeneratorRISCV64* codegen,
+ XRegister dest,
+ XRegister base,
+ Location pos,
+ DataType::Type type,
+ int32_t data_offset) {
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ if (pos.IsConstant()) {
+ int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConst64(dest, base, DataType::Size(type) * constant + data_offset);
+ } else {
+ codegen->GetInstructionVisitor()->ShNAdd(dest, pos.AsRegister<XRegister>(), base, type);
+ if (data_offset != 0) {
+ __ AddConst64(dest, dest, data_offset);
+ }
+ }
+}
+
+// Compute base source address, base destination address, and end
+// source address for System.arraycopy* intrinsics in `src_base`,
+// `dst_base` and `src_end` respectively.
+static void GenSystemArrayCopyAddresses(CodeGeneratorRISCV64* codegen,
+ DataType::Type type,
+ XRegister src,
+ Location src_pos,
+ XRegister dst,
+ Location dst_pos,
+ Location copy_length,
+ XRegister src_base,
+ XRegister dst_base,
+ XRegister src_end) {
+ // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
+ DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16)
+ << "Unexpected element type: " << type;
+ const int32_t element_size = DataType::Size(type);
+ const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ GenArrayAddress(codegen, src_base, src, src_pos, type, data_offset);
+ GenArrayAddress(codegen, dst_base, dst, dst_pos, type, data_offset);
+ GenArrayAddress(codegen, src_end, src_base, copy_length, type, /*data_offset=*/ 0);
+}
+
+static Location LocationForSystemArrayCopyInput(HInstruction* input) {
+ HIntConstant* const_input = input->AsIntConstantOrNull();
+ if (const_input != nullptr && IsInt<12>(const_input->GetValue())) {
+ return Location::ConstantLocation(const_input);
+ } else {
+ return Location::RequiresRegister();
+ }
+}
+
+// We can choose to use the native implementation there for longer copy lengths.
+static constexpr int32_t kSystemArrayCopyThreshold = 128;
+
+void IntrinsicLocationsBuilderRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (codegen_->EmitNonBakerReadBarrier()) {
+ return;
+ }
+
+ size_t num_temps = codegen_->EmitBakerReadBarrier() ? 4u : 2u;
+ LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
+ invoke, kSystemArrayCopyThreshold, num_temps);
+ if (locations != nullptr) {
+ // We request position and length as constants only for small integral values.
+ locations->SetInAt(1, LocationForSystemArrayCopyInput(invoke->InputAt(1)));
+ locations->SetInAt(3, LocationForSystemArrayCopyInput(invoke->InputAt(3)));
+ locations->SetInAt(4, LocationForSystemArrayCopyInput(invoke->InputAt(4)));
+ }
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
+
+ Riscv64Assembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+ uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+ uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+ XRegister src = locations->InAt(0).AsRegister<XRegister>();
+ Location src_pos = locations->InAt(1);
+ XRegister dest = locations->InAt(2).AsRegister<XRegister>();
+ Location dest_pos = locations->InAt(3);
+ Location length = locations->InAt(4);
+ XRegister temp1 = locations->GetTemp(0).AsRegister<XRegister>();
+ XRegister temp2 = locations->GetTemp(1).AsRegister<XRegister>();
+
+ SlowPathCodeRISCV64* intrinsic_slow_path =
+ new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
+
+ Riscv64Label conditions_on_positions_validated;
+ SystemArrayCopyOptimizations optimizations(invoke);
+
+ // If source and destination are the same, we go to slow path if we need to do forward copying.
+ // We do not need to do this check if the source and destination positions are the same.
+ if (!optimizations.GetSourcePositionIsDestinationPosition()) {
+ if (src_pos.IsConstant()) {
+ int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+ if (dest_pos.IsConstant()) {
+ int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ if (optimizations.GetDestinationIsSource()) {
+ // Checked when building locations.
+ DCHECK_GE(src_pos_constant, dest_pos_constant);
+ } else if (src_pos_constant < dest_pos_constant) {
+ __ Beq(src, dest, intrinsic_slow_path->GetEntryLabel());
+ }
+ } else {
+ if (!optimizations.GetDestinationIsSource()) {
+ __ Bne(src, dest, &conditions_on_positions_validated);
+ }
+ __ Li(temp1, src_pos_constant);
+ __ Bgt(dest_pos.AsRegister<XRegister>(), temp1, intrinsic_slow_path->GetEntryLabel());
+ }
+ } else {
+ if (!optimizations.GetDestinationIsSource()) {
+ __ Bne(src, dest, &conditions_on_positions_validated);
+ }
+ XRegister src_pos_reg = src_pos.AsRegister<XRegister>();
+ EmitBlt32(assembler, src_pos_reg, dest_pos, intrinsic_slow_path->GetEntryLabel(), temp2);
+ }
+ }
+
+ __ Bind(&conditions_on_positions_validated);
+
+ if (!optimizations.GetSourceIsNotNull()) {
+ // Bail out if the source is null.
+ __ Beqz(src, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
+ // Bail out if the destination is null.
+ __ Beqz(dest, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ // We have already checked in the LocationsBuilder for the constant case.
+ if (!length.IsConstant()) {
+ // Merge the following two comparisons into one:
+ // If the length is negative, bail out (delegate to libcore's native implementation).
+ // If the length >= 128 then (currently) prefer native implementation.
+ __ Li(temp1, kSystemArrayCopyThreshold);
+ __ Bgeu(length.AsRegister<XRegister>(), temp1, intrinsic_slow_path->GetEntryLabel());
+ }
+ // Validity checks: source.
+ CheckSystemArrayCopyPosition(assembler,
+ src,
+ src_pos,
+ length,
+ intrinsic_slow_path,
+ temp1,
+ temp2,
+ optimizations.GetCountIsSourceLength(),
+ /*position_sign_checked=*/ false);
+
+ // Validity checks: dest.
+ bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
+ CheckSystemArrayCopyPosition(assembler,
+ dest,
+ dest_pos,
+ length,
+ intrinsic_slow_path,
+ temp1,
+ temp2,
+ optimizations.GetCountIsDestinationLength(),
+ dest_position_sign_checked);
+ {
+ // We use a block to end the scratch scope before the write barrier, thus
+ // freeing the temporary registers so they can be used in `MarkGCCard`.
+ ScratchRegisterScope srs(assembler);
+ bool emit_rb = codegen_->EmitBakerReadBarrier();
+ XRegister temp3 =
+ emit_rb ? locations->GetTemp(2).AsRegister<XRegister>() : srs.AllocateXRegister();
+
+ auto check_non_primitive_array_class = [&](XRegister klass, XRegister temp) {
+ // No read barrier is needed for reading a chain of constant references for comparing
+ // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
+ // /* HeapReference<Class> */ temp = klass->component_type_
+ __ Loadwu(temp, klass, component_offset);
+ codegen_->MaybeUnpoisonHeapReference(temp);
+ __ Beqz(temp, intrinsic_slow_path->GetEntryLabel());
+ // /* uint16_t */ temp = static_cast<uint16>(klass->primitive_type_);
+ __ Loadhu(temp, temp, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Bnez(temp, intrinsic_slow_path->GetEntryLabel());
+ };
+
+ if (!optimizations.GetDoesNotNeedTypeCheck()) {
+ // Check whether all elements of the source array are assignable to the component
+ // type of the destination array. We do two checks: the classes are the same,
+ // or the destination is Object[]. If none of these checks succeed, we go to the
+ // slow path.
+
+ if (emit_rb) {
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ Location::RegisterLocation(temp1),
+ dest,
+ class_offset,
+ Location::RegisterLocation(temp3),
+ /* needs_null_check= */ false);
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ Location::RegisterLocation(temp2),
+ src,
+ class_offset,
+ Location::RegisterLocation(temp3),
+ /* needs_null_check= */ false);
+ } else {
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ Loadwu(temp1, dest, class_offset);
+ codegen_->MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ __ Loadwu(temp2, src, class_offset);
+ codegen_->MaybeUnpoisonHeapReference(temp2);
+ }
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+ Riscv64Label do_copy;
+ // For class match, we can skip the source type check regardless of the optimization flag.
+ __ Beq(temp1, temp2, &do_copy);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ // No read barrier is needed for reading a chain of constant references
+ // for comparing with null, see `ReadBarrierOption`.
+ __ Loadwu(temp1, temp1, component_offset);
+ codegen_->MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ Loadwu(temp1, temp1, super_offset);
+ // No need to unpoison the result, we're comparing against null.
+ __ Bnez(temp1, intrinsic_slow_path->GetEntryLabel());
+ // Bail out if the source is not a non primitive array.
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ check_non_primitive_array_class(temp2, temp3);
+ }
+ __ Bind(&do_copy);
+ } else {
+ DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
+ // For class match, we can skip the array type check completely if at least one of source
+ // and destination is known to be a non primitive array, otherwise one check is enough.
+ __ Bne(temp1, temp2, intrinsic_slow_path->GetEntryLabel());
+ if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
+ !optimizations.GetSourceIsNonPrimitiveArray()) {
+ check_non_primitive_array_class(temp2, temp3);
+ }
+ }
+ } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+ // Bail out if the source is not a non primitive array.
+ // No read barrier is needed for reading a chain of constant references for comparing
+ // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ Loadwu(temp2, src, class_offset);
+ codegen_->MaybeUnpoisonHeapReference(temp2);
+ check_non_primitive_array_class(temp2, temp3);
+ }
+
+ if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
+ // Null constant length: not need to emit the loop code at all.
+ } else {
+ XRegister src_curr_addr = temp1;
+ XRegister dst_curr_addr = temp2;
+ XRegister src_stop_addr = temp3;
+ Riscv64Label done;
+ const DataType::Type type = DataType::Type::kReference;
+ const int32_t element_size = DataType::Size(type);
+
+ if (length.IsRegister()) {
+ // Don't enter the copy loop if the length is null.
+ __ Beqz(length.AsRegister<XRegister>(), &done);
+ }
+
+ XRegister tmp = kNoXRegister;
+ SlowPathCodeRISCV64* read_barrier_slow_path = nullptr;
+ if (emit_rb) {
+ // TODO: Also convert this intrinsic to the IsGcMarking strategy?
+
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+
+ // /* uint32_t */ monitor = src->monitor_
+ tmp = locations->GetTemp(3).AsRegister<XRegister>();
+ __ Loadwu(tmp, src, monitor_offset);
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Shift the RB state bit to the sign bit while also clearing the low 32 bits
+ // for the fake dependency below.
+ static_assert(LockWord::kReadBarrierStateShift < 31);
+ __ Slli(tmp, tmp, 63 - LockWord::kReadBarrierStateShift);
+
+ // Introduce a dependency on the lock_word including rb_state, to prevent load-load
+ // reordering, and without using a memory barrier (which would be more expensive).
+ // `src` is unchanged by this operation (since Adduw adds low 32 bits
+ // which are zero after left shift), but its value now depends on `tmp`.
+ __ AddUw(src, tmp, src);
+
+ // Slow path used to copy array when `src` is gray.
+ read_barrier_slow_path = new (codegen_->GetScopedAllocator())
+ ReadBarrierSystemArrayCopySlowPathRISCV64(invoke, Location::RegisterLocation(tmp));
+ codegen_->AddSlowPath(read_barrier_slow_path);
+ }
+
+ // Compute base source address, base destination address, and end source address for
+ // System.arraycopy* intrinsics in `src_base`, `dst_base` and `src_end` respectively.
+ // Note that `src_curr_addr` is computed from from `src` (and `src_pos`) here, and
+ // thus honors the artificial dependency of `src` on `tmp` for read barriers.
+ GenSystemArrayCopyAddresses(codegen_,
+ type,
+ src,
+ src_pos,
+ dest,
+ dest_pos,
+ length,
+ src_curr_addr,
+ dst_curr_addr,
+ src_stop_addr);
+
+ if (emit_rb) {
+ // Given the numeric representation, it's enough to check the low bit of the RB state.
+ static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ DCHECK_NE(tmp, kNoXRegister);
+ __ Bltz(tmp, read_barrier_slow_path->GetEntryLabel());
+ } else {
+ // After allocating the last scrach register, we cannot use macro load/store instructions
+ // such as `Loadwu()` and need to use raw instructions. However, all offsets below are 0.
+ DCHECK_EQ(tmp, kNoXRegister);
+ tmp = srs.AllocateXRegister();
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ Riscv64Label loop;
+ __ Bind(&loop);
+ __ Lwu(tmp, src_curr_addr, 0);
+ __ Sw(tmp, dst_curr_addr, 0);
+ __ Addi(src_curr_addr, src_curr_addr, element_size);
+ __ Addi(dst_curr_addr, dst_curr_addr, element_size);
+ // Bare: `TMP` shall not be clobbered.
+ __ Bne(src_curr_addr, src_stop_addr, &loop, /*is_bare=*/ true);
+ __ Bind(&done);
+
+ if (emit_rb) {
+ DCHECK(read_barrier_slow_path != nullptr);
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ }
+ }
+ }
+
+ // We only need one card marking on the destination array.
+ codegen_->MarkGCCard(dest, XRegister(kNoXRegister), /* emit_null_check= */ false);
+
+ __ Bind(intrinsic_slow_path->GetExitLabel());
+}
+
+enum class GetAndUpdateOp {
+ kSet,
+ kAdd,
+ kAnd,
+ kOr,
+ kXor
+};
+
+// Generate a GetAndUpdate operation.
+//
+// Only 32-bit and 64-bit atomics are currently supported, therefore smaller types need
+// special handling. The caller emits code to prepare aligned `ptr` and adjusted `arg`
+// and extract the needed bits from `old_value`. For bitwise operations, no extra
+// handling is needed here. For `GetAndUpdateOp::kSet` and `GetAndUpdateOp::kAdd` we
+// also use a special LR/SC sequence that uses a `mask` to update only the desired bits.
+// Note: The `mask` must contain the bits to keep for `GetAndUpdateOp::kSet` and
+// the bits to replace for `GetAndUpdateOp::kAdd`.
+static void GenerateGetAndUpdate(CodeGeneratorRISCV64* codegen,
+ GetAndUpdateOp get_and_update_op,
+ DataType::Type type,
+ std::memory_order order,
+ XRegister ptr,
+ XRegister arg,
+ XRegister old_value,
+ XRegister mask,
+ XRegister temp) {
+ DCHECK_EQ(mask != kNoXRegister, temp != kNoXRegister);
+ DCHECK_IMPLIES(mask != kNoXRegister, type == DataType::Type::kInt32);
+ DCHECK_IMPLIES(
+ mask != kNoXRegister,
+ (get_and_update_op == GetAndUpdateOp::kSet) || (get_and_update_op == GetAndUpdateOp::kAdd));
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ AqRl amo_aqrl = GetAmoAqRl(order);
+ switch (get_and_update_op) {
+ case GetAndUpdateOp::kSet:
+ if (type == DataType::Type::kInt64) {
+ __ AmoSwapD(old_value, arg, ptr, amo_aqrl);
+ } else if (mask == kNoXRegister) {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ __ AmoSwapW(old_value, arg, ptr, amo_aqrl);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ DCHECK_NE(temp, kNoXRegister);
+ auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
+ Riscv64Label retry;
+ __ Bind(&retry);
+ __ LrW(old_value, ptr, load_aqrl);
+ __ And(temp, old_value, mask);
+ __ Or(temp, temp, arg);
+ __ ScW(temp, temp, ptr, store_aqrl);
+ __ Bnez(temp, &retry, /*is_bare=*/ true); // Bare: `TMP` shall not be clobbered.
+ }
+ break;
+ case GetAndUpdateOp::kAdd:
+ if (type == DataType::Type::kInt64) {
+ __ AmoAddD(old_value, arg, ptr, amo_aqrl);
+ } else if (mask == kNoXRegister) {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ __ AmoAddW(old_value, arg, ptr, amo_aqrl);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ DCHECK_NE(temp, kNoXRegister);
+ auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
+ Riscv64Label retry;
+ __ Bind(&retry);
+ __ LrW(old_value, ptr, load_aqrl);
+ __ Add(temp, old_value, arg);
+ // We use `(A ^ B) ^ A == B` and with the masking `((A ^ B) & mask) ^ A`, the result
+ // contains bits from `B` for bits specified in `mask` and bits from `A` elsewhere.
+ // Note: These instructions directly depend on each other, so it's not necessarily the
+ // fastest approach but for `(A ^ ~mask) | (B & mask)` we would need an extra register for
+ // `~mask` because ANDN is not in the "I" instruction set as required for a LR/SC sequence.
+ __ Xor(temp, temp, old_value);
+ __ And(temp, temp, mask);
+ __ Xor(temp, temp, old_value);
+ __ ScW(temp, temp, ptr, store_aqrl);
+ __ Bnez(temp, &retry, /*is_bare=*/ true); // Bare: `TMP` shall not be clobbered.
+ }
+ break;
+ case GetAndUpdateOp::kAnd:
+ if (type == DataType::Type::kInt64) {
+ __ AmoAndD(old_value, arg, ptr, amo_aqrl);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ __ AmoAndW(old_value, arg, ptr, amo_aqrl);
+ }
+ break;
+ case GetAndUpdateOp::kOr:
+ if (type == DataType::Type::kInt64) {
+ __ AmoOrD(old_value, arg, ptr, amo_aqrl);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ __ AmoOrW(old_value, arg, ptr, amo_aqrl);
+ }
+ break;
+ case GetAndUpdateOp::kXor:
+ if (type == DataType::Type::kInt64) {
+ __ AmoXorD(old_value, arg, ptr, amo_aqrl);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ __ AmoXorW(old_value, arg, ptr, amo_aqrl);
+ }
+ break;
+ }
+}
+
+static void CreateUnsafeGetLocations(ArenaAllocator* allocator,
+ HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen) {
+ bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
+ LocationSummary* locations = new (allocator) LocationSummary(
+ invoke,
+ can_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall,
+ kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(),
+ (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
+}
+
+static void GenUnsafeGet(HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen,
+ std::memory_order order,
+ DataType::Type type) {
+ DCHECK((type == DataType::Type::kInt8) ||
+ (type == DataType::Type::kInt32) ||
+ (type == DataType::Type::kInt64) ||
+ (type == DataType::Type::kReference));
+ LocationSummary* locations = invoke->GetLocations();
+ Location object_loc = locations->InAt(1);
+ XRegister object = object_loc.AsRegister<XRegister>(); // Object pointer.
+ Location offset_loc = locations->InAt(2);
+ XRegister offset = offset_loc.AsRegister<XRegister>(); // Long offset.
+ Location out_loc = locations->Out();
+ XRegister out = out_loc.AsRegister<XRegister>();
+
+ bool seq_cst_barrier = (order == std::memory_order_seq_cst);
+ bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
+ DCHECK(acquire_barrier || order == std::memory_order_relaxed);
+
+ if (seq_cst_barrier) {
+ codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+
+ if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
+ // JdkUnsafeGetReference/JdkUnsafeGetReferenceVolatile with Baker's read barrier case.
+ // TODO(riscv64): Revisit when we add checking if the holder is black.
+ Location temp = Location::NoLocation();
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
+ out_loc,
+ object,
+ /*offset=*/ 0,
+ /*index=*/ offset_loc,
+ temp,
+ /*needs_null_check=*/ false);
+ } else {
+ // Other cases.
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ __ Add(out, object, offset);
+ codegen->GetInstructionVisitor()->Load(out_loc, out, /*offset=*/ 0, type);
+
+ if (type == DataType::Type::kReference) {
+ codegen->MaybeGenerateReadBarrierSlow(
+ invoke, out_loc, out_loc, object_loc, /*offset=*/ 0u, /*index=*/ offset_loc);
+ }
+ }
+
+ if (acquire_barrier) {
+ codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGet(HInvoke* invoke) {
+ VisitJdkUnsafeGet(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGet(HInvoke* invoke) {
+ VisitJdkUnsafeGet(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetVolatile(HInvoke* invoke) {
+ VisitJdkUnsafeGetVolatile(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetVolatile(HInvoke* invoke) {
+ VisitJdkUnsafeGetVolatile(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetObject(HInvoke* invoke) {
+ VisitJdkUnsafeGetReference(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetObject(HInvoke* invoke) {
+ VisitJdkUnsafeGetReference(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+ VisitJdkUnsafeGetReferenceVolatile(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+ VisitJdkUnsafeGetReferenceVolatile(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetLong(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetLong(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+ VisitJdkUnsafeGetLongVolatile(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+ VisitJdkUnsafeGetLongVolatile(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetByte(HInvoke* invoke) {
+ VisitJdkUnsafeGetByte(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetByte(HInvoke* invoke) {
+ VisitJdkUnsafeGetByte(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGet(HInvoke* invoke) {
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGet(HInvoke* invoke) {
+ GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt32);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
+ GenUnsafeGet(invoke, codegen_, std::memory_order_acquire, DataType::Type::kInt32);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
+ GenUnsafeGet(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt32);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
+ GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kReference);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
+ GenUnsafeGet(invoke, codegen_, std::memory_order_acquire, DataType::Type::kReference);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
+ GenUnsafeGet(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kReference);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
+ GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt64);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
+ GenUnsafeGet(invoke, codegen_, std::memory_order_acquire, DataType::Type::kInt64);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
+ GenUnsafeGet(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt64);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
+ CreateUnsafeGetLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
+ GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt8);
+}
+
+static void CreateUnsafePutLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(3, Location::RequiresRegister());
+}
+
+static void GenUnsafePut(HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen,
+ std::memory_order order,
+ DataType::Type type) {
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ XRegister base = locations->InAt(1).AsRegister<XRegister>(); // Object pointer.
+ XRegister offset = locations->InAt(2).AsRegister<XRegister>(); // Long offset.
+ Location value = locations->InAt(3);
+
+ {
+ // We use a block to end the scratch scope before the write barrier, thus
+ // freeing the temporary registers so they can be used in `MarkGCCard()`.
+ ScratchRegisterScope srs(assembler);
+ XRegister address = srs.AllocateXRegister();
+ __ Add(address, base, offset);
+ GenerateSet(codegen, order, value, address, /*offset=*/ 0, type);
+ }
+
+ if (type == DataType::Type::kReference) {
+ bool value_can_be_null = true; // TODO: Worth finding out this information?
+ codegen->MarkGCCard(base, value.AsRegister<XRegister>(), value_can_be_null);
+ }
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafePut(HInvoke* invoke) {
+ VisitJdkUnsafePut(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafePut(HInvoke* invoke) {
+ VisitJdkUnsafePut(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutOrdered(HInvoke* invoke) {
+ VisitJdkUnsafePutOrdered(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutOrdered(HInvoke* invoke) {
+ VisitJdkUnsafePutOrdered(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutVolatile(HInvoke* invoke) {
+ VisitJdkUnsafePutVolatile(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutVolatile(HInvoke* invoke) {
+ VisitJdkUnsafePutVolatile(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutObject(HInvoke* invoke) {
+ VisitJdkUnsafePutReference(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutObject(HInvoke* invoke) {
+ VisitJdkUnsafePutReference(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+ VisitJdkUnsafePutObjectOrdered(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+ VisitJdkUnsafePutObjectOrdered(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+ VisitJdkUnsafePutReferenceVolatile(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+ VisitJdkUnsafePutReferenceVolatile(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutLong(HInvoke* invoke) {
+ VisitJdkUnsafePutLong(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutLong(HInvoke* invoke) {
+ VisitJdkUnsafePutLong(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+ VisitJdkUnsafePutLongOrdered(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+ VisitJdkUnsafePutLongOrdered(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+ VisitJdkUnsafePutLongVolatile(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+ VisitJdkUnsafePutLongVolatile(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutByte(HInvoke* invoke) {
+ VisitJdkUnsafePutByte(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutByte(HInvoke* invoke) {
+ VisitJdkUnsafePutByte(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePut(HInvoke* invoke) {
+ CreateUnsafePutLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePut(HInvoke* invoke) {
+ GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt32);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
+ CreateUnsafePutLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt32);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
+ CreateUnsafePutLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
+ GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt32);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
+ CreateUnsafePutLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt32);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutReference(HInvoke* invoke) {
+ CreateUnsafePutLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutReference(HInvoke* invoke) {
+ GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kReference);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
+ CreateUnsafePutLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kReference);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
+ CreateUnsafePutLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
+ GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kReference);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
+ CreateUnsafePutLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kReference);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLong(HInvoke* invoke) {
+ CreateUnsafePutLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLong(HInvoke* invoke) {
+ GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt64);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
+ CreateUnsafePutLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt64);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
+ CreateUnsafePutLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
+ GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt64);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
+ CreateUnsafePutLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt64);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutByte(HInvoke* invoke) {
+ CreateUnsafePutLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutByte(HInvoke* invoke) {
+ GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt8);
+}
+
+static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
+ HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen) {
+ const bool can_call = codegen->EmitReadBarrier() && IsUnsafeCASReference(invoke);
+ LocationSummary* locations = new (allocator) LocationSummary(
+ invoke,
+ can_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall,
+ kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(3, Location::RequiresRegister());
+ locations->SetInAt(4, Location::RequiresRegister());
+
+ locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenUnsafeCas(HInvoke* invoke, CodeGeneratorRISCV64* codegen, DataType::Type type) {
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ XRegister out = locations->Out().AsRegister<XRegister>(); // Boolean result.
+ XRegister object = locations->InAt(1).AsRegister<XRegister>(); // Object pointer.
+ XRegister offset = locations->InAt(2).AsRegister<XRegister>(); // Long offset.
+ XRegister expected = locations->InAt(3).AsRegister<XRegister>(); // Expected.
+ XRegister new_value = locations->InAt(4).AsRegister<XRegister>(); // New value.
+
+ // This needs to be before the temp registers, as MarkGCCard also uses scratch registers.
+ if (type == DataType::Type::kReference) {
+ // Mark card for object assuming new value is stored.
+ bool new_value_can_be_null = true; // TODO: Worth finding out this information?
+ codegen->MarkGCCard(object, new_value, new_value_can_be_null);
+ }
+
+ ScratchRegisterScope srs(assembler);
+ XRegister tmp_ptr = srs.AllocateXRegister(); // Pointer to actual memory.
+ XRegister old_value; // Value in memory.
+
+ Riscv64Label exit_loop_label;
+ Riscv64Label* exit_loop = &exit_loop_label;
+ Riscv64Label* cmp_failure = &exit_loop_label;
+
+ ReadBarrierCasSlowPathRISCV64* slow_path = nullptr;
+ if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
+ // We need to store the `old_value` in a non-scratch register to make sure
+ // the read barrier in the slow path does not clobber it.
+ old_value = locations->GetTemp(0).AsRegister<XRegister>(); // The old value from main path.
+ // The `old_value_temp` is used first for marking the `old_value` and then for the unmarked
+ // reloaded old value for subsequent CAS in the slow path. We make this a scratch register
+ // as we do have marking entrypoints on riscv64 even for scratch registers.
+ XRegister old_value_temp = srs.AllocateXRegister();
+ slow_path = new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathRISCV64(
+ invoke,
+ std::memory_order_seq_cst,
+ /*strong=*/ true,
+ object,
+ offset,
+ expected,
+ new_value,
+ old_value,
+ old_value_temp,
+ /*store_result=*/ old_value_temp, // Let the SC result clobber the reloaded old_value.
+ /*update_old_value=*/ false,
+ codegen);
+ codegen->AddSlowPath(slow_path);
+ exit_loop = slow_path->GetExitLabel();
+ cmp_failure = slow_path->GetEntryLabel();
+ } else {
+ old_value = srs.AllocateXRegister();
+ }
+
+ __ Add(tmp_ptr, object, offset);
+
+ // Pre-populate the result register with failure.
+ __ Li(out, 0);
+
+ GenerateCompareAndSet(assembler,
+ type,
+ std::memory_order_seq_cst,
+ /*strong=*/ true,
+ cmp_failure,
+ tmp_ptr,
+ new_value,
+ old_value,
+ /*mask=*/ kNoXRegister,
+ /*masked=*/ kNoXRegister,
+ /*store_result=*/ old_value, // Let the SC result clobber the `old_value`.
+ expected);
+
+ DCHECK_EQ(slow_path != nullptr, type == DataType::Type::kReference && codegen->EmitReadBarrier());
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetSuccessExitLabel());
+ }
+
+ // Indicate success if we successfully execute the SC.
+ __ Li(out, 1);
+
+ __ Bind(exit_loop);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafeCASInt(HInvoke* invoke) {
+ VisitJdkUnsafeCASInt(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafeCASInt(HInvoke* invoke) {
+ VisitJdkUnsafeCASInt(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafeCASLong(HInvoke* invoke) {
+ VisitJdkUnsafeCASLong(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafeCASLong(HInvoke* invoke) {
+ VisitJdkUnsafeCASLong(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafeCASObject(HInvoke* invoke) {
+ VisitJdkUnsafeCASObject(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafeCASObject(HInvoke* invoke) {
+ VisitJdkUnsafeCASObject(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
+ // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
+ VisitJdkUnsafeCompareAndSetInt(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
+ // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
+ VisitJdkUnsafeCompareAndSetInt(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
+ // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
+ VisitJdkUnsafeCompareAndSetLong(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
+ // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
+ VisitJdkUnsafeCompareAndSetLong(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
+ // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
+ VisitJdkUnsafeCompareAndSetReference(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
+ // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
+ VisitJdkUnsafeCompareAndSetReference(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
+ CreateUnsafeCASLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
+ GenUnsafeCas(invoke, codegen_, DataType::Type::kInt32);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
+ CreateUnsafeCASLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
+ GenUnsafeCas(invoke, codegen_, DataType::Type::kInt64);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
+ // The only supported read barrier implementation is the Baker-style read barriers.
+ if (codegen_->EmitNonBakerReadBarrier()) {
+ return;
+ }
+
+ CreateUnsafeCASLocations(allocator_, invoke, codegen_);
+ if (codegen_->EmitReadBarrier()) {
+ DCHECK(kUseBakerReadBarrier);
+ // We need one non-scratch temporary register for read barrier.
+ LocationSummary* locations = invoke->GetLocations();
+ locations->AddTemp(Location::RequiresRegister());
+ }
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
+ GenUnsafeCas(invoke, codegen_, DataType::Type::kReference);
+}
+
+static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
+ HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen) {
+ const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
+ LocationSummary* locations = new (allocator) LocationSummary(
+ invoke,
+ can_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall,
+ kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(3, Location::RequiresRegister());
+
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+static void GenUnsafeGetAndUpdate(HInvoke* invoke,
+ DataType::Type type,
+ CodeGeneratorRISCV64* codegen,
+ GetAndUpdateOp get_and_update_op) {
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ Location out_loc = locations->Out();
+ XRegister out = out_loc.AsRegister<XRegister>(); // Result.
+ XRegister base = locations->InAt(1).AsRegister<XRegister>(); // Object pointer.
+ XRegister offset = locations->InAt(2).AsRegister<XRegister>(); // Long offset.
+ XRegister arg = locations->InAt(3).AsRegister<XRegister>(); // New value or addend.
+
+ // This needs to be before the temp registers, as MarkGCCard also uses scratch registers.
+ if (type == DataType::Type::kReference) {
+ DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+ // Mark card for object as a new value shall be stored.
+ bool new_value_can_be_null = true; // TODO: Worth finding out this information?
+ codegen->MarkGCCard(base, /*value=*/ arg, new_value_can_be_null);
+ }
+
+ ScratchRegisterScope srs(assembler);
+ XRegister tmp_ptr = srs.AllocateXRegister(); // Pointer to actual memory.
+ __ Add(tmp_ptr, base, offset);
+ GenerateGetAndUpdate(codegen,
+ get_and_update_op,
+ (type == DataType::Type::kReference) ? DataType::Type::kInt32 : type,
+ std::memory_order_seq_cst,
+ tmp_ptr,
+ arg,
+ /*old_value=*/ out,
+ /*mask=*/ kNoXRegister,
+ /*temp=*/ kNoXRegister);
+
+ if (type == DataType::Type::kReference) {
+ __ ZextW(out, out);
+ if (codegen->EmitReadBarrier()) {
+ DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+ if (kUseBakerReadBarrier) {
+ // Use RA as temp. It is clobbered in the slow path anyway.
+ static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA);
+ SlowPathCodeRISCV64* rb_slow_path =
+ codegen->AddGcRootBakerBarrierBarrierSlowPath(invoke, out_loc, kBakerReadBarrierTemp);
+ codegen->EmitBakerReadBarierMarkingCheck(rb_slow_path, out_loc, kBakerReadBarrierTemp);
+ } else {
+ codegen->GenerateReadBarrierSlow(
+ invoke,
+ out_loc,
+ out_loc,
+ Location::RegisterLocation(base),
+ /*offset=*/ 0u,
+ /*index=*/ Location::RegisterLocation(offset));
+ }
+ }
+ }
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddInt(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddInt(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddLong(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddLong(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetInt(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetInt(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetLong(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetLong(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetReference(invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetReference(invoke);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
+}
+
+class VarHandleSlowPathRISCV64 : public IntrinsicSlowPathRISCV64 {
+ public:
+ VarHandleSlowPathRISCV64(HInvoke* invoke, std::memory_order order)
+ : IntrinsicSlowPathRISCV64(invoke),
+ order_(order),
+ return_success_(false),
+ strong_(false),
+ get_and_update_op_(GetAndUpdateOp::kAdd) {
+ }
+
+ Riscv64Label* GetByteArrayViewCheckLabel() {
+ return &byte_array_view_check_label_;
+ }
+
+ Riscv64Label* GetNativeByteOrderLabel() {
+ return &native_byte_order_label_;
+ }
+
+ void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) {
+ if (return_success) {
+ DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet);
+ } else {
+ DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange);
+ }
+ return_success_ = return_success;
+ strong_ = strong;
+ }
+
+ void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
+ DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate);
+ get_and_update_op_ = get_and_update_op;
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen_in) override {
+ if (GetByteArrayViewCheckLabel()->IsLinked()) {
+ EmitByteArrayViewCode(codegen_in);
+ }
+ IntrinsicSlowPathRISCV64::EmitNativeCode(codegen_in);
+ }
+
+ private:
+ HInvoke* GetInvoke() const {
+ return GetInstruction()->AsInvoke();
+ }
+
+ mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
+ return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
+ }
+
+ void EmitByteArrayViewCode(CodeGenerator* codegen_in);
+
+ Riscv64Label byte_array_view_check_label_;
+ Riscv64Label native_byte_order_label_;
+ // Shared parameter for all VarHandle intrinsics.
+ std::memory_order order_;
+ // Extra arguments for GenerateVarHandleCompareAndSetOrExchange().
+ bool return_success_;
+ bool strong_;
+ // Extra argument for GenerateVarHandleGetAndUpdate().
+ GetAndUpdateOp get_and_update_op_;
+};
+
+// Generate subtype check without read barriers.
+static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorRISCV64* codegen,
+ SlowPathCodeRISCV64* slow_path,
+ XRegister object,
+ XRegister type,
+ bool object_can_be_null = true) {
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+
+ const MemberOffset class_offset = mirror::Object::ClassOffset();
+ const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
+
+ Riscv64Label success;
+ if (object_can_be_null) {
+ __ Beqz(object, &success);
+ }
+
+ ScratchRegisterScope srs(assembler);
+ XRegister temp = srs.AllocateXRegister();
+
+ // Note: The `type` can be `TMP`. We're using "bare" local branches to enforce that they shall
+ // not be expanded and the scrach register `TMP` shall not be clobbered if taken. Taking the
+ // branch to the slow path can theoretically clobber `TMP` (if outside the 1 MiB range).
+ __ Loadwu(temp, object, class_offset.Int32Value());
+ codegen->MaybeUnpoisonHeapReference(temp);
+ Riscv64Label loop;
+ __ Bind(&loop);
+ __ Beq(type, temp, &success, /*is_bare=*/ true);
+ // We may not have another scratch register for `Loadwu()`. Use `Lwu()` directly.
+ DCHECK(IsInt<12>(super_class_offset.Int32Value()));
+ __ Lwu(temp, temp, super_class_offset.Int32Value());
+ codegen->MaybeUnpoisonHeapReference(temp);
+ __ Beqz(temp, slow_path->GetEntryLabel());
+ __ J(&loop, /*is_bare=*/ true);
+ __ Bind(&success);
+}
+
+// Check access mode and the primitive type from VarHandle.varType.
+// Check reference arguments against the VarHandle.varType; for references this is a subclass
+// check without read barrier, so it can have false negatives which we handle in the slow path.
+static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen,
+ SlowPathCodeRISCV64* slow_path,
+ DataType::Type type) {
+ mirror::VarHandle::AccessMode access_mode =
+ mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
+ Primitive::Type primitive_type = DataTypeToPrimitive(type);
+
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
+
+ const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
+ const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
+ const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
+
+ ScratchRegisterScope srs(assembler);
+ XRegister temp = srs.AllocateXRegister();
+ XRegister temp2 = srs.AllocateXRegister();
+
+ // Check that the operation is permitted.
+ __ Loadw(temp, varhandle, access_mode_bit_mask_offset.Int32Value());
+ DCHECK_LT(enum_cast<uint32_t>(access_mode), 31u); // We cannot avoid the shift below.
+ __ Slliw(temp, temp, 31 - enum_cast<uint32_t>(access_mode)); // Shift tested bit to sign bit.
+ __ Bgez(temp, slow_path->GetEntryLabel()); // If not permitted, go to slow path.
+
+ // For primitive types, we do not need a read barrier when loading a reference only for loading
+ // constant field through the reference. For reference types, we deliberately avoid the read
+ // barrier, letting the slow path handle the false negatives.
+ __ Loadwu(temp, varhandle, var_type_offset.Int32Value());
+ codegen->MaybeUnpoisonHeapReference(temp);
+
+ // Check the varType.primitiveType field against the type we're trying to use.
+ __ Loadhu(temp2, temp, primitive_type_offset.Int32Value());
+ if (primitive_type == Primitive::kPrimNot) {
+ static_assert(Primitive::kPrimNot == 0);
+ __ Bnez(temp2, slow_path->GetEntryLabel());
+ } else {
+ __ Li(temp, enum_cast<int32_t>(primitive_type)); // `temp` can be clobbered.
+ __ Bne(temp2, temp, slow_path->GetEntryLabel());
+ }
+
+ srs.FreeXRegister(temp2);
+
+ if (type == DataType::Type::kReference) {
+ // Check reference arguments against the varType.
+ // False negatives due to varType being an interface or array type
+ // or due to the missing read barrier are handled by the slow path.
+ size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
+ uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
+ uint32_t number_of_arguments = invoke->GetNumberOfArguments();
+ for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
+ HInstruction* arg = invoke->InputAt(arg_index);
+ DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
+ if (!arg->IsNullConstant()) {
+ XRegister arg_reg = locations->InAt(arg_index).AsRegister<XRegister>();
+ GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, temp);
+ }
+ }
+ }
+}
+
+static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen,
+ SlowPathCodeRISCV64* slow_path) {
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ XRegister varhandle = invoke->GetLocations()->InAt(0).AsRegister<XRegister>();
+
+ const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
+
+ ScratchRegisterScope srs(assembler);
+ XRegister temp = srs.AllocateXRegister();
+
+ // Check that the VarHandle references a static field by checking that coordinateType0 == null.
+ // Do not emit read barrier (or unpoison the reference) for comparing to null.
+ __ Loadwu(temp, varhandle, coordinate_type0_offset.Int32Value());
+ __ Bnez(temp, slow_path->GetEntryLabel());
+}
+
+static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen,
+ SlowPathCodeRISCV64* slow_path) {
+ VarHandleOptimizations optimizations(invoke);
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
+ XRegister object = locations->InAt(1).AsRegister<XRegister>();
+
+ const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
+ const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
+
+ // Null-check the object.
+ if (!optimizations.GetSkipObjectNullCheck()) {
+ __ Beqz(object, slow_path->GetEntryLabel());
+ }
+
+ if (!optimizations.GetUseKnownBootImageVarHandle()) {
+ ScratchRegisterScope srs(assembler);
+ XRegister temp = srs.AllocateXRegister();
+
+ // Check that the VarHandle references an instance field by checking that
+ // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
+ // type compatibility check with the source object's type, which will fail for null.
+ __ Loadwu(temp, varhandle, coordinate_type1_offset.Int32Value());
+ // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
+ __ Bnez(temp, slow_path->GetEntryLabel());
+
+ // Check that the object has the correct type.
+ // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
+ __ Loadwu(temp, varhandle, coordinate_type0_offset.Int32Value());
+ codegen->MaybeUnpoisonHeapReference(temp);
+ GenerateSubTypeObjectCheckNoReadBarrier(
+ codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
+ }
+}
+
+static void GenerateVarHandleArrayChecks(HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen,
+ VarHandleSlowPathRISCV64* slow_path) {
+ VarHandleOptimizations optimizations(invoke);
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
+ XRegister object = locations->InAt(1).AsRegister<XRegister>();
+ XRegister index = locations->InAt(2).AsRegister<XRegister>();
+ DataType::Type value_type =
+ GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
+ Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
+
+ const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
+ const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
+ const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
+ const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
+ const MemberOffset class_offset = mirror::Object::ClassOffset();
+ const MemberOffset array_length_offset = mirror::Array::LengthOffset();
+
+ // Null-check the object.
+ if (!optimizations.GetSkipObjectNullCheck()) {
+ __ Beqz(object, slow_path->GetEntryLabel());
+ }
+
+ ScratchRegisterScope srs(assembler);
+ XRegister temp = srs.AllocateXRegister();
+ XRegister temp2 = srs.AllocateXRegister();
+
+ // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
+ // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
+ // coordinateType0 shall not be null but we do not explicitly verify that.
+ __ Loadwu(temp, varhandle, coordinate_type1_offset.Int32Value());
+ // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
+ __ Beqz(temp, slow_path->GetEntryLabel());
+
+ // Check object class against componentType0.
+ //
+ // This is an exact check and we defer other cases to the runtime. This includes
+ // conversion to array of superclass references, which is valid but subsequently
+ // requires all update operations to check that the value can indeed be stored.
+ // We do not want to perform such extra checks in the intrinsified code.
+ //
+ // We do this check without read barrier, so there can be false negatives which we
+ // defer to the slow path. There shall be no false negatives for array classes in the
+ // boot image (including Object[] and primitive arrays) because they are non-movable.
+ __ Loadwu(temp, varhandle, coordinate_type0_offset.Int32Value());
+ __ Loadwu(temp2, object, class_offset.Int32Value());
+ __ Bne(temp, temp2, slow_path->GetEntryLabel());
+
+ // Check that the coordinateType0 is an array type. We do not need a read barrier
+ // for loading constant reference fields (or chains of them) for comparison with null,
+ // nor for finally loading a constant primitive field (primitive type) below.
+ codegen->MaybeUnpoisonHeapReference(temp);
+ __ Loadwu(temp2, temp, component_type_offset.Int32Value());
+ codegen->MaybeUnpoisonHeapReference(temp2);
+ __ Beqz(temp2, slow_path->GetEntryLabel());
+
+ // Check that the array component type matches the primitive type.
+ __ Loadhu(temp, temp2, primitive_type_offset.Int32Value());
+ if (primitive_type == Primitive::kPrimNot) {
+ static_assert(Primitive::kPrimNot == 0);
+ __ Bnez(temp, slow_path->GetEntryLabel());
+ } else {
+ // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
+ // we shall check for a byte array view in the slow path.
+ // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
+ // so we cannot emit that if we're JITting without boot image.
+ bool boot_image_available =
+ codegen->GetCompilerOptions().IsBootImage() ||
+ !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
+ bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
+ Riscv64Label* slow_path_label =
+ can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
+ __ Li(temp2, enum_cast<int32_t>(primitive_type));
+ __ Bne(temp, temp2, slow_path_label);
+ }
+
+ // Check for array index out of bounds.
+ __ Loadw(temp, object, array_length_offset.Int32Value());
+ __ Bgeu(index, temp, slow_path->GetEntryLabel());
+}
+
+static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen,
+ VarHandleSlowPathRISCV64* slow_path) {
+ size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
+ if (expected_coordinates_count == 0u) {
+ GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
+ } else if (expected_coordinates_count == 1u) {
+ GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
+ } else {
+ DCHECK_EQ(expected_coordinates_count, 2u);
+ GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
+ }
+}
+
+static VarHandleSlowPathRISCV64* GenerateVarHandleChecks(HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen,
+ std::memory_order order,
+ DataType::Type type) {
+ size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
+ VarHandleOptimizations optimizations(invoke);
+ if (optimizations.GetUseKnownBootImageVarHandle()) {
+ DCHECK_NE(expected_coordinates_count, 2u);
+ if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
+ return nullptr;
+ }
+ }
+
+ VarHandleSlowPathRISCV64* slow_path =
+ new (codegen->GetScopedAllocator()) VarHandleSlowPathRISCV64(invoke, order);
+ codegen->AddSlowPath(slow_path);
+
+ if (!optimizations.GetUseKnownBootImageVarHandle()) {
+ GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
+ }
+ GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
+
+ return slow_path;
+}
+
+struct VarHandleTarget {
+ XRegister object; // The object holding the value to operate on.
+ XRegister offset; // The offset of the value to operate on.
+};
+
+static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
+ size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
+ LocationSummary* locations = invoke->GetLocations();
+
+ VarHandleTarget target;
+ // The temporary allocated for loading the offset.
+ target.offset = locations->GetTemp(0u).AsRegister<XRegister>();
+ // The reference to the object that holds the value to operate on.
+ target.object = (expected_coordinates_count == 0u)
+ ? locations->GetTemp(1u).AsRegister<XRegister>()
+ : locations->InAt(1).AsRegister<XRegister>();
+ return target;
+}
+
+static void GenerateVarHandleTarget(HInvoke* invoke,
+ const VarHandleTarget& target,
+ CodeGeneratorRISCV64* codegen) {
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
+ size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
+
+ if (expected_coordinates_count <= 1u) {
+ if (VarHandleOptimizations(invoke).GetUseKnownBootImageVarHandle()) {
+ ScopedObjectAccess soa(Thread::Current());
+ ArtField* target_field = GetBootImageVarHandleField(invoke);
+ if (expected_coordinates_count == 0u) {
+ ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
+ if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
+ uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(declaring_class);
+ codegen->LoadBootImageRelRoEntry(target.object, boot_image_offset);
+ } else {
+ codegen->LoadTypeForBootImageIntrinsic(
+ target.object,
+ TypeReference(&declaring_class->GetDexFile(), declaring_class->GetDexTypeIndex()));
+ }
+ }
+ __ Li(target.offset, target_field->GetOffset().Uint32Value());
+ } else {
+ // For static fields, we need to fill the `target.object` with the declaring class,
+ // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
+ // we do not need the declaring class, so we can forget the `ArtField*` when
+ // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
+ XRegister field = (expected_coordinates_count == 0) ? target.object : target.offset;
+
+ const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
+ const MemberOffset offset_offset = ArtField::OffsetOffset();
+
+ // Load the ArtField*, the offset and, if needed, declaring class.
+ __ Loadd(field, varhandle, art_field_offset.Int32Value());
+ __ Loadwu(target.offset, field, offset_offset.Int32Value());
+ if (expected_coordinates_count == 0u) {
+ codegen->GenerateGcRootFieldLoad(
+ invoke,
+ Location::RegisterLocation(target.object),
+ field,
+ ArtField::DeclaringClassOffset().Int32Value(),
+ codegen->GetCompilerReadBarrierOption());
+ }
+ }
+ } else {
+ DCHECK_EQ(expected_coordinates_count, 2u);
+ DataType::Type value_type =
+ GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
+ MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
+
+ XRegister index = locations->InAt(2).AsRegister<XRegister>();
+ __ Li(target.offset, data_offset.Int32Value());
+ codegen->GetInstructionVisitor()->ShNAdd(target.offset, index, target.offset, value_type);
+ }
+}
+
+static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen) {
+ size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
+ DataType::Type return_type = invoke->GetType();
+
+ ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ // Require coordinates in registers. These are the object holding the value
+ // to operate on (except for static fields) and index (for arrays and views).
+ for (size_t i = 0; i != expected_coordinates_count; ++i) {
+ locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
+ }
+ if (return_type != DataType::Type::kVoid) {
+ if (DataType::IsFloatingPointType(return_type)) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ } else {
+ locations->SetOut(Location::RequiresRegister());
+ }
+ }
+ uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
+ uint32_t number_of_arguments = invoke->GetNumberOfArguments();
+ for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
+ HInstruction* arg = invoke->InputAt(arg_index);
+ if (IsZeroBitPattern(arg)) {
+ locations->SetInAt(arg_index, Location::ConstantLocation(arg));
+ } else if (DataType::IsFloatingPointType(arg->GetType())) {
+ locations->SetInAt(arg_index, Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(arg_index, Location::RequiresRegister());
+ }
+ }
+
+ // Add a temporary for offset.
+ if (codegen->EmitNonBakerReadBarrier() &&
+ GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
+ // To preserve the offset value across the non-Baker read barrier slow path
+ // for loading the declaring class, use a fixed callee-save register.
+ constexpr int first_callee_save = CTZ(kRiscv64CalleeSaveRefSpills);
+ locations->AddTemp(Location::RegisterLocation(first_callee_save));
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ if (expected_coordinates_count == 0u) {
+ // Add a temporary to hold the declaring class.
+ locations->AddTemp(Location::RequiresRegister());
+ }
+
+ return locations;
+}
+
+static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorRISCV64* codegen) {
+ VarHandleOptimizations optimizations(invoke);
+ if (optimizations.GetDoNotIntrinsify()) {
+ return;
+ }
+
+ if (codegen->EmitNonBakerReadBarrier() &&
+ invoke->GetType() == DataType::Type::kReference &&
+ invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
+ invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
+ // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
+ // the passed reference and reloads it from the field. This gets the memory visibility
+ // wrong for Acquire/Volatile operations. b/173104084
+ return;
+ }
+
+ CreateVarHandleCommonLocations(invoke, codegen);
+}
+
+DataType::Type IntTypeForFloatingPointType(DataType::Type fp_type) {
+ DCHECK(DataType::IsFloatingPointType(fp_type));
+ return (fp_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64;
+}
+
+static void GenerateVarHandleGet(HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen,
+ std::memory_order order,
+ bool byte_swap = false) {
+ DataType::Type type = invoke->GetType();
+ DCHECK_NE(type, DataType::Type::kVoid);
+
+ LocationSummary* locations = invoke->GetLocations();
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ Location out = locations->Out();
+
+ VarHandleTarget target = GetVarHandleTarget(invoke);
+ VarHandleSlowPathRISCV64* slow_path = nullptr;
+ if (!byte_swap) {
+ slow_path = GenerateVarHandleChecks(invoke, codegen, order, type);
+ GenerateVarHandleTarget(invoke, target, codegen);
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetNativeByteOrderLabel());
+ }
+ }
+
+ bool seq_cst_barrier = (order == std::memory_order_seq_cst);
+ bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire);
+ DCHECK(acquire_barrier || order == std::memory_order_relaxed);
+
+ if (seq_cst_barrier) {
+ codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+
+ // Load the value from the target location.
+ if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) {
+ Location index = Location::RegisterLocation(target.offset);
+ // TODO(riscv64): Revisit when we add checking if the holder is black.
+ Location temp = Location::NoLocation();
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
+ out,
+ target.object,
+ /*offset=*/ 0,
+ index,
+ temp,
+ /*needs_null_check=*/ false);
+ DCHECK(!byte_swap);
+ } else {
+ ScratchRegisterScope srs(assembler);
+ XRegister address = srs.AllocateXRegister();
+ __ Add(address, target.object, target.offset);
+ Location load_loc = out;
+ DataType::Type load_type = type;
+ if (byte_swap && DataType::IsFloatingPointType(type)) {
+ load_loc = Location::RegisterLocation(target.offset); // Load to the offset temporary.
+ load_type = IntTypeForFloatingPointType(type);
+ }
+ codegen->GetInstructionVisitor()->Load(load_loc, address, /*offset=*/ 0, load_type);
+ if (type == DataType::Type::kReference) {
+ DCHECK(!byte_swap);
+ Location object_loc = Location::RegisterLocation(target.object);
+ Location offset_loc = Location::RegisterLocation(target.offset);
+ codegen->MaybeGenerateReadBarrierSlow(
+ invoke, out, out, object_loc, /*offset=*/ 0u, /*index=*/ offset_loc);
+ } else if (byte_swap) {
+ GenerateReverseBytes(codegen, out, load_loc.AsRegister<XRegister>(), type);
+ }
+ }
+
+ if (acquire_barrier) {
+ codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+
+ if (slow_path != nullptr) {
+ DCHECK(!byte_swap);
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGet(HInvoke* invoke) {
+ CreateVarHandleGetLocations(invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGet(HInvoke* invoke) {
+ GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetOpaque(HInvoke* invoke) {
+ CreateVarHandleGetLocations(invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetOpaque(HInvoke* invoke) {
+ GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAcquire(HInvoke* invoke) {
+ CreateVarHandleGetLocations(invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAcquire(HInvoke* invoke) {
+ GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetVolatile(HInvoke* invoke) {
+ CreateVarHandleGetLocations(invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetVolatile(HInvoke* invoke) {
+ GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst);
+}
+
+static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorRISCV64* codegen) {
+ VarHandleOptimizations optimizations(invoke);
+ if (optimizations.GetDoNotIntrinsify()) {
+ return;
+ }
+
+ CreateVarHandleCommonLocations(invoke, codegen);
+}
+
+static void GenerateVarHandleSet(HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen,
+ std::memory_order order,
+ bool byte_swap = false) {
+ uint32_t value_index = invoke->GetNumberOfArguments() - 1;
+ DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
+
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ Location value = invoke->GetLocations()->InAt(value_index);
+
+ VarHandleTarget target = GetVarHandleTarget(invoke);
+ VarHandleSlowPathRISCV64* slow_path = nullptr;
+ if (!byte_swap) {
+ slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
+ GenerateVarHandleTarget(invoke, target, codegen);
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetNativeByteOrderLabel());
+ }
+ }
+
+ {
+ ScratchRegisterScope srs(assembler);
+ XRegister address = srs.AllocateXRegister();
+ __ Add(address, target.object, target.offset);
+
+ if (byte_swap) {
+ DCHECK(!value.IsConstant()); // Zero uses the main path as it does not need a byte swap.
+ // The offset is no longer needed, so reuse the offset temporary for the byte-swapped value.
+ Location new_value = Location::RegisterLocation(target.offset);
+ if (DataType::IsFloatingPointType(value_type)) {
+ value_type = IntTypeForFloatingPointType(value_type);
+ codegen->MoveLocation(new_value, value, value_type);
+ value = new_value;
+ }
+ GenerateReverseBytes(codegen, new_value, value.AsRegister<XRegister>(), value_type);
+ value = new_value;
+ }
+
+ GenerateSet(codegen, order, value, address, /*offset=*/ 0, value_type);
+ }
+
+ if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
+ codegen->MarkGCCard(target.object, value.AsRegister<XRegister>(), /* emit_null_check= */ true);
+ }
+
+ if (slow_path != nullptr) {
+ DCHECK(!byte_swap);
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSet(HInvoke* invoke) {
+ CreateVarHandleSetLocations(invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSet(HInvoke* invoke) {
+ GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSetOpaque(HInvoke* invoke) {
+ CreateVarHandleSetLocations(invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSetOpaque(HInvoke* invoke) {
+ GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSetRelease(HInvoke* invoke) {
+ CreateVarHandleSetLocations(invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSetRelease(HInvoke* invoke) {
+ GenerateVarHandleSet(invoke, codegen_, std::memory_order_release);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSetVolatile(HInvoke* invoke) {
+ CreateVarHandleSetLocations(invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSetVolatile(HInvoke* invoke) {
+ GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst);
+}
+
+static bool ScratchXRegisterNeeded(Location loc, DataType::Type type, bool byte_swap) {
+ if (loc.IsConstant()) {
+ DCHECK(loc.GetConstant()->IsZeroBitPattern());
+ return false;
+ }
+ return DataType::IsFloatingPointType(type) || DataType::Size(type) < 4u || byte_swap;
+}
+
+static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen,
+ bool return_success) {
+ VarHandleOptimizations optimizations(invoke);
+ if (optimizations.GetDoNotIntrinsify()) {
+ return;
+ }
+
+ uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
+ uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
+ DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
+ DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
+
+ bool is_reference = (value_type == DataType::Type::kReference);
+ if (is_reference && codegen->EmitNonBakerReadBarrier()) {
+ // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
+ // the passed reference and reloads it from the field. This breaks the read barriers
+ // in slow path in different ways. The marked old value may not actually be a to-space
+ // reference to the same object as `old_value`, breaking slow path assumptions. And
+ // for CompareAndExchange, marking the old value after comparison failure may actually
+ // return the reference to `expected`, erroneously indicating success even though we
+ // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084
+ return;
+ }
+
+ LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
+ DCHECK_EQ(expected_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
+
+ if (codegen->EmitNonBakerReadBarrier()) {
+ // We need callee-save registers for both the class object and offset instead of
+ // the temporaries reserved in CreateVarHandleCommonLocations().
+ static_assert(POPCOUNT(kRiscv64CalleeSaveRefSpills) >= 2u);
+ uint32_t first_callee_save = CTZ(kRiscv64CalleeSaveRefSpills);
+ uint32_t second_callee_save = CTZ(kRiscv64CalleeSaveRefSpills ^ (1u << first_callee_save));
+ if (expected_index == 1u) { // For static fields.
+ DCHECK_EQ(locations->GetTempCount(), 2u);
+ DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
+ DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save)));
+ locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save));
+ } else {
+ DCHECK_EQ(locations->GetTempCount(), 1u);
+ DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister()));
+ locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save));
+ }
+ }
+
+ size_t old_temp_count = locations->GetTempCount();
+ DCHECK_EQ(old_temp_count, (expected_index == 1u) ? 2u : 1u);
+ Location expected = locations->InAt(expected_index);
+ Location new_value = locations->InAt(new_value_index);
+ size_t data_size = DataType::Size(value_type);
+ bool is_small = (data_size < 4u);
+ bool can_byte_swap =
+ (expected_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u);
+ bool is_fp = DataType::IsFloatingPointType(value_type);
+ size_t temps_needed =
+ // The offset temp is used for the `tmp_ptr`, except for the read barrier case. For read
+ // barrier we must preserve the offset and class pointer (if any) for the slow path and
+ // use a separate temp for `tmp_ptr` and we also need another temp for `old_value_temp`.
+ ((is_reference && codegen->EmitReadBarrier()) ? old_temp_count + 2u : 1u) +
+ // For small values, we need a temp for the `mask`, `masked` and maybe also for the `shift`.
+ (is_small ? (return_success ? 2u : 3u) : 0u) +
+ // Some cases need modified copies of `new_value` and `expected`.
+ (ScratchXRegisterNeeded(expected, value_type, can_byte_swap) ? 1u : 0u) +
+ (ScratchXRegisterNeeded(new_value, value_type, can_byte_swap) ? 1u : 0u) +
+ // We need a scratch register either for the old value or for the result of SC.
+ // If we need to return a floating point old value, we need a temp for each.
+ ((!return_success && is_fp) ? 2u : 1u);
+ size_t scratch_registers_available = 2u;
+ DCHECK_EQ(scratch_registers_available,
+ ScratchRegisterScope(codegen->GetAssembler()).AvailableXRegisters());
+ if (temps_needed > old_temp_count + scratch_registers_available) {
+ locations->AddRegisterTemps(temps_needed - (old_temp_count + scratch_registers_available));
+ }
+}
+
+static XRegister PrepareXRegister(CodeGeneratorRISCV64* codegen,
+ Location loc,
+ DataType::Type type,
+ XRegister shift,
+ XRegister mask,
+ bool byte_swap,
+ ScratchRegisterScope* srs) {
+ DCHECK_IMPLIES(mask != kNoXRegister, shift != kNoXRegister);
+ DCHECK_EQ(shift == kNoXRegister, DataType::Size(type) >= 4u);
+ if (loc.IsConstant()) {
+ // The `shift`/`mask` and `byte_swap` are irrelevant for zero input.
+ DCHECK(loc.GetConstant()->IsZeroBitPattern());
+ return Zero;
+ }
+
+ Location result = loc;
+ if (DataType::IsFloatingPointType(type)) {
+ type = IntTypeForFloatingPointType(type);
+ result = Location::RegisterLocation(srs->AllocateXRegister());
+ codegen->MoveLocation(result, loc, type);
+ loc = result;
+ } else if (byte_swap || shift != kNoXRegister) {
+ result = Location::RegisterLocation(srs->AllocateXRegister());
+ }
+ if (byte_swap) {
+ if (type == DataType::Type::kInt16) {
+ type = DataType::Type::kUint16; // Do the masking as part of the byte swap.
+ }
+ GenerateReverseBytes(codegen, result, loc.AsRegister<XRegister>(), type);
+ loc = result;
+ }
+ if (shift != kNoXRegister) {
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ __ Sllw(result.AsRegister<XRegister>(), loc.AsRegister<XRegister>(), shift);
+ DCHECK_NE(type, DataType::Type::kUint8);
+ if (mask != kNoXRegister && type != DataType::Type::kUint16 && type != DataType::Type::kBool) {
+ __ And(result.AsRegister<XRegister>(), result.AsRegister<XRegister>(), mask);
+ }
+ }
+ return result.AsRegister<XRegister>();
+}
+
+static void GenerateByteSwapAndExtract(CodeGeneratorRISCV64* codegen,
+ Location rd,
+ XRegister rs1,
+ XRegister shift,
+ DataType::Type type) {
+ // Apply shift before `GenerateReverseBytes()` for small types.
+ DCHECK_EQ(shift != kNoXRegister, DataType::Size(type) < 4u);
+ if (shift != kNoXRegister) {
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ __ Srlw(rd.AsRegister<XRegister>(), rs1, shift);
+ rs1 = rd.AsRegister<XRegister>();
+ }
+ // Also handles moving to FP registers.
+ GenerateReverseBytes(codegen, rd, rs1, type);
+}
+
+static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen,
+ std::memory_order order,
+ bool return_success,
+ bool strong,
+ bool byte_swap = false) {
+ DCHECK(return_success || strong);
+
+ uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
+ uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
+ DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
+ DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index));
+
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ Location expected = locations->InAt(expected_index);
+ Location new_value = locations->InAt(new_value_index);
+ Location out = locations->Out();
+
+ VarHandleTarget target = GetVarHandleTarget(invoke);
+ VarHandleSlowPathRISCV64* slow_path = nullptr;
+ if (!byte_swap) {
+ slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
+ GenerateVarHandleTarget(invoke, target, codegen);
+ if (slow_path != nullptr) {
+ slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong);
+ __ Bind(slow_path->GetNativeByteOrderLabel());
+ }
+ }
+
+ // This needs to be before we allocate the scratch registers, as MarkGCCard also uses them.
+ if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
+ // Mark card for object assuming new value is stored.
+ bool new_value_can_be_null = true; // TODO: Worth finding out this information?
+ codegen->MarkGCCard(target.object, new_value.AsRegister<XRegister>(), new_value_can_be_null);
+ }
+
+ // Scratch registers may be needed for `new_value` and `expected`.
+ ScratchRegisterScope srs(assembler);
+ DCHECK_EQ(srs.AvailableXRegisters(), 2u);
+ size_t available_scratch_registers =
+ (ScratchXRegisterNeeded(expected, value_type, byte_swap) ? 0u : 1u) +
+ (ScratchXRegisterNeeded(new_value, value_type, byte_swap) ? 0u : 1u);
+
+ // Reuse the `offset` temporary for the pointer to the target location,
+ // except for references that need the offset for the read barrier.
+ DCHECK_EQ(target.offset, locations->GetTemp(0u).AsRegister<XRegister>());
+ size_t next_temp = 1u;
+ XRegister tmp_ptr = target.offset;
+ bool is_reference = (value_type == DataType::Type::kReference);
+ if (is_reference && codegen->EmitReadBarrier()) {
+ // Reserve scratch registers for `tmp_ptr` and `old_value_temp`.
+ DCHECK_EQ(available_scratch_registers, 2u);
+ available_scratch_registers = 0u;
+ DCHECK_EQ(expected_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
+ next_temp = expected_index == 1u ? 2u : 1u; // Preserve the class register for static field.
+ tmp_ptr = srs.AllocateXRegister();
+ }
+ __ Add(tmp_ptr, target.object, target.offset);
+
+ auto get_temp = [&]() {
+ if (available_scratch_registers != 0u) {
+ available_scratch_registers -= 1u;
+ return srs.AllocateXRegister();
+ } else {
+ XRegister temp = locations->GetTemp(next_temp).AsRegister<XRegister>();
+ next_temp += 1u;
+ return temp;
+ }
+ };
+
+ XRegister shift = kNoXRegister;
+ XRegister mask = kNoXRegister;
+ XRegister masked = kNoXRegister;
+ size_t data_size = DataType::Size(value_type);
+ bool is_small = (data_size < 4u);
+ if (is_small) {
+ // When returning "success" and not the old value, we shall not need the `shift` after
+ // the raw CAS operation, so use the output register as a temporary here.
+ shift = return_success ? locations->Out().AsRegister<XRegister>() : get_temp();
+ mask = get_temp();
+ masked = get_temp();
+ // Upper bits of the shift are not used, so we do not need to clear them.
+ __ Slli(shift, tmp_ptr, WhichPowerOf2(kBitsPerByte));
+ __ Andi(tmp_ptr, tmp_ptr, -4);
+ __ Li(mask, (1 << (data_size * kBitsPerByte)) - 1);
+ __ Sllw(mask, mask, shift);
+ }
+
+ // Move floating point values to scratch registers and apply shift, mask and byte swap if needed.
+ // Note that float/double CAS uses bitwise comparison, rather than the operator==.
+ XRegister expected_reg =
+ PrepareXRegister(codegen, expected, value_type, shift, mask, byte_swap, &srs);
+ XRegister new_value_reg =
+ PrepareXRegister(codegen, new_value, value_type, shift, mask, byte_swap, &srs);
+ bool is_fp = DataType::IsFloatingPointType(value_type);
+ DataType::Type cas_type = is_fp
+ ? IntTypeForFloatingPointType(value_type)
+ : (is_small ? DataType::Type::kInt32 : value_type);
+
+ // Prepare registers for old value and the result of the store conditional.
+ XRegister old_value;
+ XRegister store_result;
+ if (return_success) {
+ // Use a temp for the old value.
+ old_value = get_temp();
+ // For strong CAS, use the `old_value` temp also for the SC result.
+ // For weak CAS, put the SC result directly to `out`.
+ store_result = strong ? old_value : out.AsRegister<XRegister>();
+ } else if (is_fp) {
+ // We need two temporary registers.
+ old_value = get_temp();
+ store_result = get_temp();
+ } else {
+ // Use the output register for the old value and a temp for the store conditional result.
+ old_value = out.AsRegister<XRegister>();
+ store_result = get_temp();
+ }
+
+ Riscv64Label exit_loop_label;
+ Riscv64Label* exit_loop = &exit_loop_label;
+ Riscv64Label* cmp_failure = &exit_loop_label;
+
+ ReadBarrierCasSlowPathRISCV64* rb_slow_path = nullptr;
+ if (is_reference && codegen->EmitReadBarrier()) {
+ // The `old_value_temp` is used first for marking the `old_value` and then for the unmarked
+ // reloaded old value for subsequent CAS in the slow path. We make this a scratch register
+ // as we do have marking entrypoints on riscv64 even for scratch registers.
+ XRegister old_value_temp = srs.AllocateXRegister();
+ // For strong CAS, use the `old_value_temp` also for the SC result as the reloaded old value
+ // is no longer needed after the comparison. For weak CAS, store the SC result in the same
+ // result register as the main path.
+ // Note that for a strong CAS, a SC failure in the slow path can set the register to 1, so
+ // we cannot use that register to indicate success without resetting it to 0 at the start of
+ // the retry loop. Instead, we return to the success indicating instruction in the main path.
+ XRegister slow_path_store_result = strong ? old_value_temp : store_result;
+ rb_slow_path = new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathRISCV64(
+ invoke,
+ order,
+ strong,
+ target.object,
+ target.offset,
+ expected_reg,
+ new_value_reg,
+ old_value,
+ old_value_temp,
+ slow_path_store_result,
+ /*update_old_value=*/ !return_success,
+ codegen);
+ codegen->AddSlowPath(rb_slow_path);
+ exit_loop = rb_slow_path->GetExitLabel();
+ cmp_failure = rb_slow_path->GetEntryLabel();
+ }
+
+ if (return_success) {
+ // Pre-populate the output register with failure for the case when the old value
+ // differs and we do not execute the store conditional.
+ __ Li(out.AsRegister<XRegister>(), 0);
+ }
+ GenerateCompareAndSet(codegen->GetAssembler(),
+ cas_type,
+ order,
+ strong,
+ cmp_failure,
+ tmp_ptr,
+ new_value_reg,
+ old_value,
+ mask,
+ masked,
+ store_result,
+ expected_reg);
+ if (return_success && strong) {
+ if (rb_slow_path != nullptr) {
+ // Slow path returns here on success.
+ __ Bind(rb_slow_path->GetSuccessExitLabel());
+ }
+ // Load success value to the output register.
+ // `GenerateCompareAndSet()` does not emit code to indicate success for a strong CAS.
+ __ Li(out.AsRegister<XRegister>(), 1);
+ } else if (rb_slow_path != nullptr) {
+ DCHECK(!rb_slow_path->GetSuccessExitLabel()->IsLinked());
+ }
+ __ Bind(exit_loop);
+
+ if (return_success) {
+ // Nothing to do, the result register already contains 1 on success and 0 on failure.
+ } else if (byte_swap) {
+ DCHECK_IMPLIES(is_small, out.AsRegister<XRegister>() == old_value)
+ << " " << value_type << " " << out.AsRegister<XRegister>() << "!=" << old_value;
+ GenerateByteSwapAndExtract(codegen, out, old_value, shift, value_type);
+ } else if (is_fp) {
+ codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type);
+ } else if (is_small) {
+ __ Srlw(old_value, masked, shift);
+ if (value_type == DataType::Type::kInt8) {
+ __ SextB(old_value, old_value);
+ } else if (value_type == DataType::Type::kInt16) {
+ __ SextH(old_value, old_value);
+ }
+ }
+
+ if (slow_path != nullptr) {
+ DCHECK(!byte_swap);
+ __ Bind(slow_path->GetExitLabel());
+ }
+
+ // Check that we have allocated the right number of temps. We may need more registers
+ // for byte swapped CAS in the slow path, so skip this check for the main path in that case.
+ bool has_byte_swap = (expected_index == 3u) && (!is_reference && data_size != 1u);
+ if ((!has_byte_swap || byte_swap) && next_temp != locations->GetTempCount()) {
+ // We allocate a temporary register for the class object for a static field `VarHandle` but
+ // we do not update the `next_temp` if it's otherwise unused after the address calculation.
+ CHECK_EQ(expected_index, 1u);
+ CHECK_EQ(next_temp, 1u);
+ CHECK_EQ(locations->GetTempCount(), 2u);
+ }
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
+ GenerateVarHandleCompareAndSetOrExchange(
+ invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
+ GenerateVarHandleCompareAndSetOrExchange(
+ invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
+ GenerateVarHandleCompareAndSetOrExchange(
+ invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
+ GenerateVarHandleCompareAndSetOrExchange(
+ invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
+ GenerateVarHandleCompareAndSetOrExchange(
+ invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
+ GenerateVarHandleCompareAndSetOrExchange(
+ invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
+ GenerateVarHandleCompareAndSetOrExchange(
+ invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
+ GenerateVarHandleCompareAndSetOrExchange(
+ invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
+}
+
+static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen,
+ GetAndUpdateOp get_and_update_op) {
+ VarHandleOptimizations optimizations(invoke);
+ if (optimizations.GetDoNotIntrinsify()) {
+ return;
+ }
+
+ if (invoke->GetType() == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
+ // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
+ // the passed reference and reloads it from the field, thus seeing the new value
+ // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
+ return;
+ }
+
+ LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
+ uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
+ DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
+ DataType::Type value_type = invoke->GetType();
+ DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, arg_index));
+ Location arg = locations->InAt(arg_index);
+
+ bool is_fp = DataType::IsFloatingPointType(value_type);
+ if (is_fp) {
+ if (get_and_update_op == GetAndUpdateOp::kAdd) {
+ // For ADD, do not use ZR for zero bit pattern (+0.0f or +0.0).
+ locations->SetInAt(invoke->GetNumberOfArguments() - 1u, Location::RequiresFpuRegister());
+ } else {
+ DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+ }
+ }
+
+ size_t data_size = DataType::Size(value_type);
+ bool can_byte_swap =
+ (arg_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u);
+ bool can_use_cas = (get_and_update_op == GetAndUpdateOp::kAdd) && (can_byte_swap || is_fp);
+ bool is_small = (data_size < 4u);
+ bool is_small_and = is_small && (get_and_update_op == GetAndUpdateOp::kAnd);
+ bool is_bitwise =
+ (get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd);
+
+ size_t temps_needed =
+ // The offset temp is used for the `tmp_ptr`.
+ 1u +
+ // For small values, we need temps for `shift` and maybe also `mask` and `temp`.
+ (is_small ? (is_bitwise ? 1u : 3u) : 0u) +
+ // Some cases need modified copies of `arg`.
+ (is_small_and || ScratchXRegisterNeeded(arg, value_type, can_byte_swap) ? 1u : 0u) +
+ // For FP types, we need a temp for `old_value` which cannot be loaded directly to `out`.
+ (is_fp ? 1u : 0u);
+ if (can_use_cas) {
+ size_t cas_temps_needed =
+ // The offset temp is used for the `tmp_ptr`.
+ 1u +
+ // For small values, we need a temp for `shift`.
+ (is_small ? 1u : 0u) +
+ // And we always need temps for `old_value`, `new_value` and `reloaded_old_value`.
+ 3u;
+ DCHECK_GE(cas_temps_needed, temps_needed);
+ temps_needed = cas_temps_needed;
+ }
+
+ size_t scratch_registers_available = 2u;
+ DCHECK_EQ(scratch_registers_available,
+ ScratchRegisterScope(codegen->GetAssembler()).AvailableXRegisters());
+ size_t old_temp_count = locations->GetTempCount();
+ DCHECK_EQ(old_temp_count, (arg_index == 1u) ? 2u : 1u);
+ if (temps_needed > old_temp_count + scratch_registers_available) {
+ locations->AddRegisterTemps(temps_needed - (old_temp_count + scratch_registers_available));
+ }
+}
+
+static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen,
+ GetAndUpdateOp get_and_update_op,
+ std::memory_order order,
+ bool byte_swap = false) {
+ uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
+ DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
+ DataType::Type value_type = invoke->GetType();
+ DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, arg_index));
+
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ Location arg = locations->InAt(arg_index);
+ DCHECK_IMPLIES(arg.IsConstant(), arg.GetConstant()->IsZeroBitPattern());
+ Location out = locations->Out();
+
+ VarHandleTarget target = GetVarHandleTarget(invoke);
+ VarHandleSlowPathRISCV64* slow_path = nullptr;
+ if (!byte_swap) {
+ slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
+ GenerateVarHandleTarget(invoke, target, codegen);
+ if (slow_path != nullptr) {
+ slow_path->SetGetAndUpdateOp(get_and_update_op);
+ __ Bind(slow_path->GetNativeByteOrderLabel());
+ }
+ }
+
+ // This needs to be before the temp registers, as MarkGCCard also uses scratch registers.
+ if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
+ DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+ // Mark card for object, the new value shall be stored.
+ bool new_value_can_be_null = true; // TODO: Worth finding out this information?
+ codegen->MarkGCCard(target.object, arg.AsRegister<XRegister>(), new_value_can_be_null);
+ }
+
+ size_t data_size = DataType::Size(value_type);
+ bool is_fp = DataType::IsFloatingPointType(value_type);
+ bool use_cas = (get_and_update_op == GetAndUpdateOp::kAdd) && (byte_swap || is_fp);
+ bool is_small = (data_size < 4u);
+ bool is_small_and = is_small && (get_and_update_op == GetAndUpdateOp::kAnd);
+ bool is_reference = (value_type == DataType::Type::kReference);
+ DataType::Type op_type = is_fp
+ ? IntTypeForFloatingPointType(value_type)
+ : (is_small || is_reference ? DataType::Type::kInt32 : value_type);
+
+ ScratchRegisterScope srs(assembler);
+ DCHECK_EQ(srs.AvailableXRegisters(), 2u);
+ size_t available_scratch_registers = use_cas
+ // We use scratch registers differently for the CAS path.
+ ? 0u
+ // Reserve one scratch register for `PrepareXRegister()` or similar `arg_reg` allocation.
+ : (is_small_and || ScratchXRegisterNeeded(arg, value_type, byte_swap) ? 1u : 2u);
+
+ // Reuse the `target.offset` temporary for the pointer to the target location,
+ // except for references that need the offset for the non-Baker read barrier.
+ DCHECK_EQ(target.offset, locations->GetTemp(0u).AsRegister<XRegister>());
+ size_t next_temp = 1u;
+ XRegister tmp_ptr = target.offset;
+ if (is_reference && codegen->EmitNonBakerReadBarrier()) {
+ DCHECK_EQ(available_scratch_registers, 2u);
+ available_scratch_registers -= 1u;
+ tmp_ptr = srs.AllocateXRegister();
+ }
+ __ Add(tmp_ptr, target.object, target.offset);
+
+ auto get_temp = [&]() {
+ if (available_scratch_registers != 0u) {
+ available_scratch_registers -= 1u;
+ return srs.AllocateXRegister();
+ } else {
+ XRegister temp = locations->GetTemp(next_temp).AsRegister<XRegister>();
+ next_temp += 1u;
+ return temp;
+ }
+ };
+
+ XRegister shift = kNoXRegister;
+ XRegister mask = kNoXRegister;
+ XRegister prepare_mask = kNoXRegister;
+ XRegister temp = kNoXRegister;
+ XRegister arg_reg = kNoXRegister;
+ if (is_small) {
+ shift = get_temp();
+ // Upper bits of the shift are not used, so we do not need to clear them.
+ __ Slli(shift, tmp_ptr, WhichPowerOf2(kBitsPerByte));
+ __ Andi(tmp_ptr, tmp_ptr, -4);
+ switch (get_and_update_op) {
+ case GetAndUpdateOp::kAdd:
+ if (byte_swap) {
+ // The mask is not needed in the CAS path.
+ DCHECK(use_cas);
+ break;
+ }
+ FALLTHROUGH_INTENDED;
+ case GetAndUpdateOp::kSet:
+ mask = get_temp();
+ temp = get_temp();
+ __ Li(mask, (1 << (data_size * kBitsPerByte)) - 1);
+ __ Sllw(mask, mask, shift);
+ // The argument does not need to be masked for `GetAndUpdateOp::kAdd`,
+ // the mask shall be applied after the ADD instruction.
+ prepare_mask = (get_and_update_op == GetAndUpdateOp::kSet) ? mask : kNoXRegister;
+ break;
+ case GetAndUpdateOp::kAnd:
+ // We need to set all other bits, so we always need a temp.
+ arg_reg = srs.AllocateXRegister();
+ if (data_size == 1u) {
+ __ Ori(arg_reg, InputXRegisterOrZero(arg), ~0xff);
+ DCHECK(!byte_swap);
+ } else {
+ DCHECK_EQ(data_size, 2u);
+ __ Li(arg_reg, ~0xffff);
+ __ Or(arg_reg, InputXRegisterOrZero(arg), arg_reg);
+ if (byte_swap) {
+ __ Rev8(arg_reg, arg_reg);
+ __ Rori(arg_reg, arg_reg, 48);
+ }
+ }
+ __ Rolw(arg_reg, arg_reg, shift);
+ break;
+ case GetAndUpdateOp::kOr:
+ case GetAndUpdateOp::kXor:
+ // Signed values need to be truncated but we're keeping `prepare_mask == kNoXRegister`.
+ if (value_type == DataType::Type::kInt8 && !arg.IsConstant()) {
+ DCHECK(!byte_swap);
+ arg_reg = srs.AllocateXRegister();
+ __ ZextB(arg_reg, arg.AsRegister<XRegister>());
+ __ Sllw(arg_reg, arg_reg, shift);
+ } else if (value_type == DataType::Type::kInt16 && !arg.IsConstant() && !byte_swap) {
+ arg_reg = srs.AllocateXRegister();
+ __ ZextH(arg_reg, arg.AsRegister<XRegister>());
+ __ Sllw(arg_reg, arg_reg, shift);
+ } // else handled by `PrepareXRegister()` below.
+ break;
+ }
+ }
+ if (arg_reg == kNoXRegister && !use_cas) {
+ arg_reg = PrepareXRegister(codegen, arg, value_type, shift, prepare_mask, byte_swap, &srs);
+ }
+ if (mask != kNoXRegister && get_and_update_op == GetAndUpdateOp::kSet) {
+ __ Not(mask, mask); // We need to flip the mask for `kSet`, see `GenerateGetAndUpdate()`.
+ }
+
+ if (use_cas) {
+ // Allocate scratch registers for temps that can theoretically be clobbered on retry.
+ // (Even though the `retry` label shall never be far enough for `TMP` to be clobbered.)
+ DCHECK_EQ(available_scratch_registers, 0u); // Reserved for the two uses below.
+ XRegister old_value = srs.AllocateXRegister();
+ XRegister new_value = srs.AllocateXRegister();
+ // Allocate other needed temporaries.
+ XRegister reloaded_old_value = get_temp();
+ XRegister store_result = reloaded_old_value; // Clobber reloaded old value by store result.
+ FRegister ftmp = is_fp ? srs.AllocateFRegister() : kNoFRegister;
+
+ Riscv64Label retry;
+ __ Bind(&retry);
+ codegen->GetInstructionVisitor()->Load(
+ Location::RegisterLocation(old_value), tmp_ptr, /*offset=*/ 0, op_type);
+ if (byte_swap) {
+ GenerateByteSwapAndExtract(codegen, out, old_value, shift, value_type);
+ } else {
+ DCHECK(is_fp);
+ codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type);
+ }
+ if (is_fp) {
+ codegen->GetInstructionVisitor()->FAdd(
+ ftmp, out.AsFpuRegister<FRegister>(), arg.AsFpuRegister<FRegister>(), value_type);
+ codegen->MoveLocation(
+ Location::RegisterLocation(new_value), Location::FpuRegisterLocation(ftmp), op_type);
+ } else if (value_type == DataType::Type::kInt64) {
+ __ Add(new_value, out.AsRegister<XRegister>(), arg.AsRegister<XRegister>());
+ } else {
+ DCHECK_EQ(op_type, DataType::Type::kInt32);
+ __ Addw(new_value, out.AsRegister<XRegister>(), arg.AsRegister<XRegister>());
+ }
+ if (byte_swap) {
+ DataType::Type swap_type = op_type;
+ if (is_small) {
+ DCHECK_EQ(data_size, 2u);
+ // We want to update only 16 bits of the 32-bit location. The 16 bits we want to replace
+ // are present in both `old_value` and `out` but in different bits and byte order.
+ // To update the 16 bits, we can XOR the new value with the `out`, byte swap as Uint16
+ // (extracting only the bits we want to update), shift and XOR with the old value.
+ swap_type = DataType::Type::kUint16;
+ __ Xor(new_value, new_value, out.AsRegister<XRegister>());
+ }
+ GenerateReverseBytes(codegen, Location::RegisterLocation(new_value), new_value, swap_type);
+ if (is_small) {
+ __ Sllw(new_value, new_value, shift);
+ __ Xor(new_value, new_value, old_value);
+ }
+ }
+ GenerateCompareAndSet(assembler,
+ op_type,
+ order,
+ /*strong=*/ true,
+ /*cmp_failure=*/ &retry,
+ tmp_ptr,
+ new_value,
+ /*old_value=*/ reloaded_old_value,
+ /*mask=*/ kNoXRegister,
+ /*masked=*/ kNoXRegister,
+ store_result,
+ /*expected=*/ old_value);
+ } else {
+ XRegister old_value = is_fp ? get_temp() : out.AsRegister<XRegister>();
+ GenerateGetAndUpdate(
+ codegen, get_and_update_op, op_type, order, tmp_ptr, arg_reg, old_value, mask, temp);
+ if (byte_swap) {
+ DCHECK_IMPLIES(is_small, out.AsRegister<XRegister>() == old_value)
+ << " " << value_type << " " << out.AsRegister<XRegister>() << "!=" << old_value;
+ GenerateByteSwapAndExtract(codegen, out, old_value, shift, value_type);
+ } else if (is_fp) {
+ codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type);
+ } else if (is_small) {
+ __ Srlw(old_value, old_value, shift);
+ DCHECK_NE(value_type, DataType::Type::kUint8);
+ if (value_type == DataType::Type::kInt8) {
+ __ SextB(old_value, old_value);
+ } else if (value_type == DataType::Type::kBool) {
+ __ ZextB(old_value, old_value);
+ } else if (value_type == DataType::Type::kInt16) {
+ __ SextH(old_value, old_value);
+ } else {
+ DCHECK_EQ(value_type, DataType::Type::kUint16);
+ __ ZextH(old_value, old_value);
+ }
+ } else if (is_reference) {
+ __ ZextW(old_value, old_value);
+ if (codegen->EmitBakerReadBarrier()) {
+ // Use RA as temp. It is clobbered in the slow path anyway.
+ static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA);
+ SlowPathCodeRISCV64* rb_slow_path =
+ codegen->AddGcRootBakerBarrierBarrierSlowPath(invoke, out, kBakerReadBarrierTemp);
+ codegen->EmitBakerReadBarierMarkingCheck(rb_slow_path, out, kBakerReadBarrierTemp);
+ } else if (codegen->EmitNonBakerReadBarrier()) {
+ Location base_loc = Location::RegisterLocation(target.object);
+ Location index = Location::RegisterLocation(target.offset);
+ SlowPathCodeRISCV64* rb_slow_path = codegen->AddReadBarrierSlowPath(
+ invoke, out, out, base_loc, /*offset=*/ 0u, index);
+ __ J(rb_slow_path->GetEntryLabel());
+ __ Bind(rb_slow_path->GetExitLabel());
+ }
+ }
+ }
+
+ if (slow_path != nullptr) {
+ DCHECK(!byte_swap);
+ __ Bind(slow_path->GetExitLabel());
+ }
+
+ // Check that we have allocated the right number of temps. We may need more registers
+ // for byte swapped CAS in the slow path, so skip this check for the main path in that case.
+ bool has_byte_swap = (arg_index == 3u) && (!is_reference && data_size != 1u);
+ if ((!has_byte_swap || byte_swap) && next_temp != locations->GetTempCount()) {
+ // We allocate a temporary register for the class object for a static field `VarHandle` but
+ // we do not update the `next_temp` if it's otherwise unused after the address calculation.
+ CHECK_EQ(arg_index, 1u);
+ CHECK_EQ(next_temp, 1u);
+ CHECK_EQ(locations->GetTempCount(), 2u);
+ }
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSet(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSet(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
+}
+
+void VarHandleSlowPathRISCV64::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
+ DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
+ CodeGeneratorRISCV64* codegen = down_cast<CodeGeneratorRISCV64*>(codegen_in);
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ HInvoke* invoke = GetInvoke();
+ mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
+ DataType::Type value_type =
+ GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
+ DCHECK_NE(value_type, DataType::Type::kReference);
+ size_t size = DataType::Size(value_type);
+ DCHECK_GT(size, 1u);
+ LocationSummary* locations = invoke->GetLocations();
+ XRegister varhandle = locations->InAt(0).AsRegister<XRegister>();
+ XRegister object = locations->InAt(1).AsRegister<XRegister>();
+ XRegister index = locations->InAt(2).AsRegister<XRegister>();
+
+ MemberOffset class_offset = mirror::Object::ClassOffset();
+ MemberOffset array_length_offset = mirror::Array::LengthOffset();
+ MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
+ MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
+
+ __ Bind(GetByteArrayViewCheckLabel());
+
+ VarHandleTarget target = GetVarHandleTarget(invoke);
+ {
+ ScratchRegisterScope srs(assembler);
+ XRegister temp = srs.AllocateXRegister();
+ XRegister temp2 = srs.AllocateXRegister();
+
+ // The main path checked that the coordinateType0 is an array class that matches
+ // the class of the actual coordinate argument but it does not match the value type.
+ // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
+ __ Loadwu(temp, varhandle, class_offset.Int32Value());
+ codegen->MaybeUnpoisonHeapReference(temp);
+ codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
+ __ Bne(temp, temp2, GetEntryLabel());
+
+ // Check for array index out of bounds.
+ __ Loadw(temp, object, array_length_offset.Int32Value());
+ __ Bgeu(index, temp, GetEntryLabel());
+ __ Addi(temp2, index, size - 1u);
+ __ Bgeu(temp2, temp, GetEntryLabel());
+
+ // Construct the target.
+ __ Addi(target.offset, index, data_offset.Int32Value());
+
+ // Alignment check. For unaligned access, go to the runtime.
+ DCHECK(IsPowerOfTwo(size));
+ __ Andi(temp, target.offset, size - 1u);
+ __ Bnez(temp, GetEntryLabel());
+
+ // Byte order check. For native byte order return to the main path.
+ if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
+ IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
+ // There is no reason to differentiate between native byte order and byte-swap
+ // for setting a zero bit pattern. Just return to the main path.
+ __ J(GetNativeByteOrderLabel());
+ return;
+ }
+ __ Loadbu(temp, varhandle, native_byte_order_offset.Int32Value());
+ __ Bnez(temp, GetNativeByteOrderLabel());
+ }
+
+ switch (access_mode_template) {
+ case mirror::VarHandle::AccessModeTemplate::kGet:
+ GenerateVarHandleGet(invoke, codegen, order_, /*byte_swap=*/ true);
+ break;
+ case mirror::VarHandle::AccessModeTemplate::kSet:
+ GenerateVarHandleSet(invoke, codegen, order_, /*byte_swap=*/ true);
+ break;
+ case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
+ case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
+ GenerateVarHandleCompareAndSetOrExchange(
+ invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true);
+ break;
+ case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
+ GenerateVarHandleGetAndUpdate(
+ invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true);
+ break;
+ }
+ __ J(GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitThreadCurrentThread(HInvoke* invoke) {
+ LocationSummary* locations =
+ new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitThreadCurrentThread(HInvoke* invoke) {
+ Riscv64Assembler* assembler = GetAssembler();
+ XRegister out = invoke->GetLocations()->Out().AsRegister<XRegister>();
+ __ Loadwu(out, TR, Thread::PeerOffset<kRiscv64PointerSize>().Int32Value());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitReachabilityFence(HInvoke* invoke) {
+ LocationSummary* locations =
+ new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ locations->SetInAt(0, Location::Any());
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathFmaDouble(HInvoke* invoke) {
+ CreateFpFpFpToFpNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathFmaDouble(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ Riscv64Assembler* assembler = GetAssembler();
+ FRegister n = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister m = locations->InAt(1).AsFpuRegister<FRegister>();
+ FRegister a = locations->InAt(2).AsFpuRegister<FRegister>();
+ FRegister out = locations->Out().AsFpuRegister<FRegister>();
+
+ __ FMAddD(out, n, m, a);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathFmaFloat(HInvoke* invoke) {
+ CreateFpFpFpToFpNoOverlapLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathFmaFloat(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ Riscv64Assembler* assembler = GetAssembler();
+ FRegister n = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister m = locations->InAt(1).AsFpuRegister<FRegister>();
+ FRegister a = locations->InAt(2).AsFpuRegister<FRegister>();
+ FRegister out = locations->Out().AsFpuRegister<FRegister>();
+
+ __ FMAddS(out, n, m, a);
+}
+
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathCos(HInvoke* invoke) {
+ CreateFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathCos(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickCos, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathSin(HInvoke* invoke) {
+ CreateFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathSin(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickSin, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathAcos(HInvoke* invoke) {
+ CreateFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathAcos(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickAcos, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathAsin(HInvoke* invoke) {
+ CreateFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathAsin(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickAsin, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathAtan(HInvoke* invoke) {
+ CreateFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathAtan(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickAtan, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathAtan2(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathAtan2(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickAtan2, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathPow(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathPow(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickPow, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathCbrt(HInvoke* invoke) {
+ CreateFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathCbrt(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickCbrt, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathCosh(HInvoke* invoke) {
+ CreateFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathCosh(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickCosh, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathExp(HInvoke* invoke) {
+ CreateFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathExp(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickExp, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathExpm1(HInvoke* invoke) {
+ CreateFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathExpm1(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickExpm1, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathHypot(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathHypot(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickHypot, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathLog(HInvoke* invoke) {
+ CreateFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathLog(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickLog, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathLog10(HInvoke* invoke) {
+ CreateFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathLog10(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickLog10, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathNextAfter(HInvoke* invoke) {
+ CreateFPFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathNextAfter(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickNextAfter, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathSinh(HInvoke* invoke) {
+ CreateFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathSinh(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickSinh, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathTan(HInvoke* invoke) {
+ CreateFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathTan(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickTan, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathTanh(HInvoke* invoke) {
+ CreateFPToFPCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathTanh(HInvoke* invoke) {
+ codegen_->InvokeRuntime(kQuickTanh, invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathSqrt(HInvoke* invoke) {
+ CreateFPToFPLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathSqrt(HInvoke* invoke) {
+ DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
+ DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
+
+ LocationSummary* locations = invoke->GetLocations();
+ Riscv64Assembler* assembler = GetAssembler();
+ FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister out = locations->Out().AsFpuRegister<FRegister>();
+
+ __ FSqrtD(out, in);
+}
+
+static void GenDoubleRound(Riscv64Assembler* assembler, HInvoke* invoke, FPRoundingMode mode) {
+ LocationSummary* locations = invoke->GetLocations();
+ FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister out = locations->Out().AsFpuRegister<FRegister>();
+ ScratchRegisterScope srs(assembler);
+ XRegister tmp = srs.AllocateXRegister();
+ FRegister ftmp = srs.AllocateFRegister();
+ Riscv64Label done;
+
+ // Load 2^52
+ __ LoadConst64(tmp, 0x4330000000000000L);
+ __ FMvDX(ftmp, tmp);
+ __ FAbsD(out, in);
+ __ FLtD(tmp, out, ftmp);
+
+ // Set output as the input if input greater than the max
+ __ FMvD(out, in);
+ __ Beqz(tmp, &done);
+
+ // Convert with rounding mode
+ __ FCvtLD(tmp, in, mode);
+ __ FCvtDL(ftmp, tmp, mode);
+
+ // Set the signed bit
+ __ FSgnjD(out, ftmp, in);
+ __ Bind(&done);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathFloor(HInvoke* invoke) {
+ CreateFPToFPLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathFloor(HInvoke* invoke) {
+ GenDoubleRound(GetAssembler(), invoke, FPRoundingMode::kRDN);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathCeil(HInvoke* invoke) {
+ CreateFPToFPLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathCeil(HInvoke* invoke) {
+ GenDoubleRound(GetAssembler(), invoke, FPRoundingMode::kRUP);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathRint(HInvoke* invoke) {
+ CreateFPToFPLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathRint(HInvoke* invoke) {
+ GenDoubleRound(GetAssembler(), invoke, FPRoundingMode::kRNE);
+}
+
+void GenMathRound(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) {
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+ ScratchRegisterScope srs(assembler);
+ FRegister ftmp = srs.AllocateFRegister();
+ Riscv64Label done;
+
+ // Check NaN
+ codegen->GetInstructionVisitor()->FClass(out, in, type);
+ __ Slti(out, out, kFClassNaNMinValue);
+ __ Beqz(out, &done);
+
+ if (type == DataType::Type::kFloat64) {
+ // Add 0.5 (0x3fe0000000000000), rounding down (towards negative infinity).
+ __ LoadConst64(out, 0x3fe0000000000000L);
+ __ FMvDX(ftmp, out);
+ __ FAddD(ftmp, ftmp, in, FPRoundingMode::kRDN);
+
+ // Convert to managed `long`, rounding down (towards negative infinity).
+ __ FCvtLD(out, ftmp, FPRoundingMode::kRDN);
+ } else {
+ // Add 0.5 (0x3f000000), rounding down (towards negative infinity).
+ __ LoadConst32(out, 0x3f000000);
+ __ FMvWX(ftmp, out);
+ __ FAddS(ftmp, ftmp, in, FPRoundingMode::kRDN);
+
+ // Convert to managed `int`, rounding down (towards negative infinity).
+ __ FCvtWS(out, ftmp, FPRoundingMode::kRDN);
+ }
+
+ __ Bind(&done);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathRoundDouble(HInvoke* invoke) {
+ CreateFPToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathRoundDouble(HInvoke* invoke) {
+ GenMathRound(codegen_, invoke, DataType::Type::kFloat64);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathRoundFloat(HInvoke* invoke) {
+ CreateFPToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathRoundFloat(HInvoke* invoke) {
+ GenMathRound(codegen_, invoke, DataType::Type::kFloat32);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMathMultiplyHigh(HInvoke* invoke) {
+ LocationSummary* locations =
+ new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMathMultiplyHigh(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ Riscv64Assembler* assembler = GetAssembler();
+ DCHECK(invoke->GetType() == DataType::Type::kInt64);
+
+ XRegister x = locations->InAt(0).AsRegister<XRegister>();
+ XRegister y = locations->InAt(1).AsRegister<XRegister>();
+ XRegister out = locations->Out().AsRegister<XRegister>();
+
+ // Get high 64 of the multiply
+ __ Mulh(out, x, y);
+}
+
+#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(RISCV64, Name)
+UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(MARK_UNIMPLEMENTED);
+#undef MARK_UNIMPLEMENTED
+
+UNREACHABLE_INTRINSICS(RISCV64)
+
+} // namespace riscv64
+} // namespace art
diff --git a/compiler/optimizing/intrinsics_riscv64.h b/compiler/optimizing/intrinsics_riscv64.h
new file mode 100644
index 0000000000..8160c054ee
--- /dev/null
+++ b/compiler/optimizing/intrinsics_riscv64.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_RISCV64_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_RISCV64_H_
+
+#include "base/macros.h"
+#include "intrinsics.h"
+#include "intrinsics_list.h"
+
+namespace art HIDDEN {
+
+class ArenaAllocator;
+class HInvokeStaticOrDirect;
+class HInvokeVirtual;
+
+namespace riscv64 {
+
+class CodeGeneratorRISCV64;
+class Riscv64Assembler;
+
+class IntrinsicLocationsBuilderRISCV64 final : public IntrinsicVisitor {
+ public:
+ explicit IntrinsicLocationsBuilderRISCV64(ArenaAllocator* allocator,
+ CodeGeneratorRISCV64* codegen)
+ : allocator_(allocator), codegen_(codegen) {}
+
+ // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, ...) \
+ void Visit##Name(HInvoke* invoke) override;
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef OPTIMIZING_INTRINSICS
+
+ // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+ // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+ // the invoke.
+ bool TryDispatch(HInvoke* invoke);
+
+ private:
+ ArenaAllocator* const allocator_;
+ CodeGeneratorRISCV64* const codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderRISCV64);
+};
+
+class IntrinsicCodeGeneratorRISCV64 final : public IntrinsicVisitor {
+ public:
+ explicit IntrinsicCodeGeneratorRISCV64(CodeGeneratorRISCV64* codegen) : codegen_(codegen) {}
+
+ // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, ...) \
+ void Visit##Name(HInvoke* invoke);
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef OPTIMIZING_INTRINSICS
+
+ private:
+ Riscv64Assembler* GetAssembler();
+ ArenaAllocator* GetAllocator();
+
+ void HandleValueOf(HInvoke* invoke,
+ const IntrinsicVisitor::ValueOfInfo& info,
+ DataType::Type type);
+
+ CodeGeneratorRISCV64* const codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorRISCV64);
+};
+
+} // namespace riscv64
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_RISCV64_H_
diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h
index 13cabdafed..590bc34ee9 100644
--- a/compiler/optimizing/intrinsics_utils.h
+++ b/compiler/optimizing/intrinsics_utils.h
@@ -153,24 +153,34 @@ static inline bool IsVarHandleGet(HInvoke* invoke) {
return access_mode == mirror::VarHandle::AccessModeTemplate::kGet;
}
-static inline bool IsUnsafeGetObject(HInvoke* invoke) {
+static inline bool IsUnsafeGetReference(HInvoke* invoke) {
switch (invoke->GetIntrinsic()) {
case Intrinsics::kUnsafeGetObject:
case Intrinsics::kUnsafeGetObjectVolatile:
- case Intrinsics::kJdkUnsafeGetObject:
- case Intrinsics::kJdkUnsafeGetObjectVolatile:
- case Intrinsics::kJdkUnsafeGetObjectAcquire:
+ case Intrinsics::kJdkUnsafeGetReference:
+ case Intrinsics::kJdkUnsafeGetReferenceVolatile:
+ case Intrinsics::kJdkUnsafeGetReferenceAcquire:
return true;
default:
return false;
}
}
-static inline bool IsUnsafeCASObject(HInvoke* invoke) {
+static inline bool IsUnsafeCASReference(HInvoke* invoke) {
switch (invoke->GetIntrinsic()) {
case Intrinsics::kUnsafeCASObject:
case Intrinsics::kJdkUnsafeCASObject:
- case Intrinsics::kJdkUnsafeCompareAndSetObject:
+ case Intrinsics::kJdkUnsafeCompareAndSetReference:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool IsUnsafeGetAndSetReference(HInvoke* invoke) {
+ switch (invoke->GetIntrinsic()) {
+ case Intrinsics::kUnsafeGetAndSetObject:
+ case Intrinsics::kJdkUnsafeGetAndSetReference:
return true;
default:
return false;
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index d2072201f8..1823bd4b4c 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -25,6 +25,7 @@
#include "data_type-inl.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "heap_poisoning.h"
+#include "intrinsic_objects.h"
#include "intrinsics.h"
#include "intrinsics_utils.h"
#include "lock_word.h"
@@ -37,6 +38,7 @@
#include "thread-current-inl.h"
#include "utils/x86/assembler_x86.h"
#include "utils/x86/constants_x86.h"
+#include "well_known_classes.h"
namespace art HIDDEN {
@@ -75,11 +77,10 @@ class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
public:
explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
: SlowPathCode(instruction) {
- DCHECK(gUseReadBarrier);
- DCHECK(kUseBakerReadBarrier);
}
void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(codegen->EmitBakerReadBarrier());
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
LocationSummary* locations = instruction_->GetLocations();
DCHECK(locations->CanCall());
@@ -394,7 +395,6 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
}
HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
- DCHECK(static_or_direct != nullptr);
LocationSummary* locations =
new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
locations->SetInAt(0, Location::RequiresFpuRegister());
@@ -774,9 +774,9 @@ void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
// We need at least two of the positions or length to be an integer constant,
// or else we won't have enough free registers.
- HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
- HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
- HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+ HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+ HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
+ HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
int num_constants =
((src_pos != nullptr) ? 1 : 0)
@@ -1205,7 +1205,7 @@ static void GenerateStringIndexOf(HInvoke* invoke,
HInstruction* code_point = invoke->InputAt(1);
if (code_point->IsIntConstant()) {
if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
- std::numeric_limits<uint16_t>::max()) {
+ std::numeric_limits<uint16_t>::max()) {
// Always needs the slow-path. We could directly dispatch to it, but this case should be
// rare, so for simplicity just put the full slow-path down and branch unconditionally.
slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
@@ -1445,7 +1445,7 @@ void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
Register obj = locations->InAt(0).AsRegister<Register>();
Location srcBegin = locations->InAt(1);
int srcBegin_value =
- srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
+ srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
Register srcEnd = locations->InAt(2).AsRegister<Register>();
Register dst = locations->InAt(3).AsRegister<Register>();
Register dstBegin = locations->InAt(4).AsRegister<Register>();
@@ -1691,6 +1691,12 @@ static void GenUnsafeGet(HInvoke* invoke,
Location output_loc = locations->Out();
switch (type) {
+ case DataType::Type::kInt8: {
+ Register output = output_loc.AsRegister<Register>();
+ __ movsxb(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+ break;
+ }
+
case DataType::Type::kInt32: {
Register output = output_loc.AsRegister<Register>();
__ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
@@ -1699,7 +1705,7 @@ static void GenUnsafeGet(HInvoke* invoke,
case DataType::Type::kReference: {
Register output = output_loc.AsRegister<Register>();
- if (gUseReadBarrier) {
+ if (codegen->EmitReadBarrier()) {
if (kUseBakerReadBarrier) {
Address src(base, offset, ScaleFactor::TIMES_1, 0);
codegen->GenerateReferenceLoadWithBakerReadBarrier(
@@ -1739,25 +1745,12 @@ static void GenUnsafeGet(HInvoke* invoke,
}
}
-static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) {
- switch (intrinsic) {
- case Intrinsics::kUnsafeGetObject:
- case Intrinsics::kUnsafeGetObjectVolatile:
- case Intrinsics::kJdkUnsafeGetObject:
- case Intrinsics::kJdkUnsafeGetObjectVolatile:
- case Intrinsics::kJdkUnsafeGetObjectAcquire:
- return true;
- default:
- break;
- }
- return false;
-}
-
static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
HInvoke* invoke,
+ CodeGeneratorX86* codegen,
DataType::Type type,
bool is_volatile) {
- bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
+ bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
can_call
@@ -1797,12 +1790,14 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
VisitJdkUnsafeGetLongVolatile(invoke);
}
void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
- VisitJdkUnsafeGetObject(invoke);
+ VisitJdkUnsafeGetReference(invoke);
}
void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- VisitJdkUnsafeGetObjectVolatile(invoke);
+ VisitJdkUnsafeGetReferenceVolatile(invoke);
+}
+void IntrinsicLocationsBuilderX86::VisitUnsafeGetByte(HInvoke* invoke) {
+ VisitJdkUnsafeGetByte(invoke);
}
-
void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
VisitJdkUnsafeGet(invoke);
@@ -1817,44 +1812,54 @@ void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
VisitJdkUnsafeGetLongVolatile(invoke);
}
void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
- VisitJdkUnsafeGetObject(invoke);
+ VisitJdkUnsafeGetReference(invoke);
}
void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- VisitJdkUnsafeGetObjectVolatile(invoke);
+ VisitJdkUnsafeGetReferenceVolatile(invoke);
+}
+void IntrinsicCodeGeneratorX86::VisitUnsafeGetByte(HInvoke* invoke) {
+ VisitJdkUnsafeGetByte(invoke);
}
-
void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGet(HInvoke* invoke) {
CreateIntIntIntToIntLocations(
- allocator_, invoke, DataType::Type::kInt32, /*is_volatile=*/ false);
+ allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ false);
}
void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /*is_volatile=*/ true);
+ CreateIntIntIntToIntLocations(
+ allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ true);
}
void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /*is_volatile=*/ true);
+ CreateIntIntIntToIntLocations(
+ allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ true);
}
void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLong(HInvoke* invoke) {
CreateIntIntIntToIntLocations(
- allocator_, invoke, DataType::Type::kInt64, /*is_volatile=*/ false);
+ allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ false);
}
void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /*is_volatile=*/ true);
+ CreateIntIntIntToIntLocations(
+ allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ true);
}
void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /*is_volatile=*/ true);
+ CreateIntIntIntToIntLocations(
+ allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ true);
+}
+void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReference(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(
+ allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ false);
}
-void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetObject(HInvoke* invoke) {
+void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
CreateIntIntIntToIntLocations(
- allocator_, invoke, DataType::Type::kReference, /*is_volatile=*/ false);
+ allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ true);
}
-void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
+void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
CreateIntIntIntToIntLocations(
- allocator_, invoke, DataType::Type::kReference, /*is_volatile=*/ true);
+ allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ true);
}
-void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
+void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetByte(HInvoke* invoke) {
CreateIntIntIntToIntLocations(
- allocator_, invoke, DataType::Type::kReference, /*is_volatile=*/ true);
+ allocator_, invoke, codegen_, DataType::Type::kInt8, /*is_volatile=*/ false);
}
void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGet(HInvoke* invoke) {
@@ -1875,15 +1880,18 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
}
-void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetObject(HInvoke* invoke) {
+void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReference(HInvoke* invoke) {
GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
}
-void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
+void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
}
-void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
+void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
}
+void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetByte(HInvoke* invoke) {
+ GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/ false, codegen_);
+}
static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
DataType::Type type,
@@ -1916,13 +1924,13 @@ void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
VisitJdkUnsafePutVolatile(invoke);
}
void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
- VisitJdkUnsafePutObject(invoke);
+ VisitJdkUnsafePutReference(invoke);
}
void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
VisitJdkUnsafePutObjectOrdered(invoke);
}
void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
- VisitJdkUnsafePutObjectVolatile(invoke);
+ VisitJdkUnsafePutReferenceVolatile(invoke);
}
void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
VisitJdkUnsafePutLong(invoke);
@@ -1933,6 +1941,9 @@ void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
VisitJdkUnsafePutLongVolatile(invoke);
}
+void IntrinsicLocationsBuilderX86::VisitUnsafePutByte(HInvoke* invoke) {
+ VisitJdkUnsafePutByte(invoke);
+}
void IntrinsicLocationsBuilderX86::VisitJdkUnsafePut(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(
@@ -1950,7 +1961,7 @@ void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutRelease(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(
allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ true);
}
-void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObject(HInvoke* invoke) {
+void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReference(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(
allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ false);
}
@@ -1958,11 +1969,11 @@ void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObjectOrdered(HInvoke* invok
CreateIntIntIntIntToVoidPlusTempsLocations(
allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ false);
}
-void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
+void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(
allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ true);
}
-void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
+void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(
allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ true);
}
@@ -1982,6 +1993,10 @@ void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke)
CreateIntIntIntIntToVoidPlusTempsLocations(
allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ true);
}
+void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutByte(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(
+ allocator_, DataType::Type::kInt8, invoke, /*is_volatile=*/ false);
+}
// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
// memory model.
@@ -2041,13 +2056,13 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
VisitJdkUnsafePutVolatile(invoke);
}
void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
- VisitJdkUnsafePutObject(invoke);
+ VisitJdkUnsafePutReference(invoke);
}
void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
VisitJdkUnsafePutObjectOrdered(invoke);
}
void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
- VisitJdkUnsafePutObjectVolatile(invoke);
+ VisitJdkUnsafePutReferenceVolatile(invoke);
}
void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
VisitJdkUnsafePutLong(invoke);
@@ -2058,6 +2073,9 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
VisitJdkUnsafePutLongVolatile(invoke);
}
+void IntrinsicCodeGeneratorX86::VisitUnsafePutByte(HInvoke* invoke) {
+ VisitJdkUnsafePutByte(invoke);
+}
void IntrinsicCodeGeneratorX86::VisitJdkUnsafePut(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
@@ -2071,7 +2089,7 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutRelease(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
}
-void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObject(HInvoke* invoke) {
+void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReference(HInvoke* invoke) {
GenUnsafePut(
invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
}
@@ -2079,11 +2097,11 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke)
GenUnsafePut(
invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
}
-void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
+void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
GenUnsafePut(
invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
}
-void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
+void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
GenUnsafePut(
invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
}
@@ -2099,13 +2117,15 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
}
+void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutByte(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt8, /*is_volatile=*/ false, codegen_);
+}
static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
+ CodeGeneratorX86* codegen,
DataType::Type type,
HInvoke* invoke) {
- const bool can_call = gUseReadBarrier &&
- kUseBakerReadBarrier &&
- IsUnsafeCASObject(invoke);
+ const bool can_call = codegen->EmitBakerReadBarrier() && IsUnsafeCASReference(invoke);
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
can_call
@@ -2162,24 +2182,24 @@ void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASLong(HInvoke* invoke) {
void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASObject(HInvoke* invoke) {
// `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
- VisitJdkUnsafeCompareAndSetObject(invoke);
+ VisitJdkUnsafeCompareAndSetReference(invoke);
}
void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
- CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke);
+ CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kInt32, invoke);
}
void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
- CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke);
+ CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kInt64, invoke);
}
-void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
+void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
return;
}
- CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke);
+ CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kReference, invoke);
}
static void GenPrimitiveLockedCmpxchg(DataType::Type type,
@@ -2304,7 +2324,7 @@ static void GenReferenceCAS(HInvoke* invoke,
DCHECK_EQ(expected, EAX);
DCHECK_NE(temp, temp2);
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen->EmitBakerReadBarrier()) {
// Need to make sure the reference stored in the field is a to-space
// one before attempting the CAS or the CAS could fail incorrectly.
codegen->GenerateReferenceLoadWithBakerReadBarrier(
@@ -2391,7 +2411,7 @@ static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codeg
if (type == DataType::Type::kReference) {
// The only read barrier implementation supporting the
// UnsafeCASObject intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
Register temp = locations->GetTemp(0).AsRegister<Register>();
Register temp2 = locations->GetTemp(1).AsRegister<Register>();
@@ -2413,7 +2433,7 @@ void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
// The only read barrier implementation supporting the
// UnsafeCASObject intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
GenCAS(DataType::Type::kReference, invoke, codegen_);
}
@@ -2430,7 +2450,7 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASLong(HInvoke* invoke) {
void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASObject(HInvoke* invoke) {
// `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
- VisitJdkUnsafeCompareAndSetObject(invoke);
+ VisitJdkUnsafeCompareAndSetReference(invoke);
}
void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
@@ -2441,13 +2461,245 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke)
GenCAS(DataType::Type::kInt64, invoke, codegen_);
}
-void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
+void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
GenCAS(DataType::Type::kReference, invoke, codegen_);
}
+// Note: Unlike other architectures that use corresponding enums for the `VarHandle`
+// implementation, x86 is currently using it only for `Unsafe`.
+enum class GetAndUpdateOp {
+ kSet,
+ kAdd,
+};
+
+void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
+ HInvoke* invoke,
+ CodeGeneratorX86* codegen,
+ DataType::Type type,
+ GetAndUpdateOp get_and_unsafe_op) {
+ const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke,
+ can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall,
+ kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ if (type == DataType::Type::kInt64) {
+ // Explicitly allocate all registers.
+ locations->SetInAt(1, Location::RegisterLocation(EBP));
+ if (get_and_unsafe_op == GetAndUpdateOp::kAdd) {
+ locations->AddTemp(Location::RegisterLocation(EBP)); // We shall clobber EBP.
+ locations->SetInAt(2, Location::Any()); // Offset shall be on the stack.
+ locations->SetInAt(3, Location::RegisterPairLocation(ESI, EDI));
+ locations->AddTemp(Location::RegisterLocation(EBX));
+ locations->AddTemp(Location::RegisterLocation(ECX));
+ } else {
+ locations->SetInAt(2, Location::RegisterPairLocation(ESI, EDI));
+ locations->SetInAt(3, Location::RegisterPairLocation(EBX, ECX));
+ }
+ locations->SetOut(Location::RegisterPairLocation(EAX, EDX), Location::kOutputOverlap);
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ // Use the same register for both the output and the new value or addend
+ // to take advantage of XCHG or XADD. Arbitrarily pick EAX.
+ locations->SetInAt(3, Location::RegisterLocation(EAX));
+ locations->SetOut(Location::RegisterLocation(EAX));
+ }
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddInt(invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddLong(invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetInt(invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetLong(invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetReference(invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(
+ allocator_, invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd);
+}
+
+void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(
+ allocator_, invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd);
+}
+
+void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(
+ allocator_, invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(
+ allocator_, invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
+ // The only supported read barrier implementation is the Baker-style read barriers.
+ if (codegen_->EmitNonBakerReadBarrier()) {
+ return;
+ }
+
+ CreateUnsafeGetAndUpdateLocations(
+ allocator_, invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet);
+ LocationSummary* locations = invoke->GetLocations();
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RegisterLocation(ECX)); // Byte register for `MarkGCCard()`.
+}
+
+static void GenUnsafeGetAndUpdate(HInvoke* invoke,
+ DataType::Type type,
+ CodeGeneratorX86* codegen,
+ GetAndUpdateOp get_and_update_op) {
+ X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ Location out = locations->Out(); // Result.
+ Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer.
+ Location offset = locations->InAt(2); // Long offset.
+ Location arg = locations->InAt(3); // New value or addend.
+
+ if (type == DataType::Type::kInt32) {
+ DCHECK(out.Equals(arg));
+ Register out_reg = out.AsRegister<Register>();
+ Address field_address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0);
+ if (get_and_update_op == GetAndUpdateOp::kAdd) {
+ __ LockXaddl(field_address, out_reg);
+ } else {
+ DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+ __ xchgl(out_reg, field_address);
+ }
+ } else if (type == DataType::Type::kInt64) {
+ // Prepare the field address. Ignore the high 32 bits of the `offset`.
+ Address field_address_low(kNoRegister, 0), field_address_high(kNoRegister, 0);
+ if (get_and_update_op == GetAndUpdateOp::kAdd) {
+ DCHECK(offset.IsDoubleStackSlot());
+ __ addl(base, Address(ESP, offset.GetStackIndex())); // Clobbers `base`.
+ DCHECK(Location::RegisterLocation(base).Equals(locations->GetTemp(0)));
+ field_address_low = Address(base, 0);
+ field_address_high = Address(base, 4);
+ } else {
+ field_address_low = Address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0);
+ field_address_high = Address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 4);
+ }
+ // Load the old value to EDX:EAX and use LOCK CMPXCHG8B to set the new value.
+ NearLabel loop;
+ __ Bind(&loop);
+ __ movl(EAX, field_address_low);
+ __ movl(EDX, field_address_high);
+ if (get_and_update_op == GetAndUpdateOp::kAdd) {
+ DCHECK(Location::RegisterPairLocation(ESI, EDI).Equals(arg));
+ __ movl(EBX, EAX);
+ __ movl(ECX, EDX);
+ __ addl(EBX, ESI);
+ __ adcl(ECX, EDI);
+ } else {
+ DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+ DCHECK(Location::RegisterPairLocation(EBX, ECX).Equals(arg));
+ }
+ __ LockCmpxchg8b(field_address_low);
+ __ j(kNotEqual, &loop); // Repeat on failure.
+ } else {
+ DCHECK_EQ(type, DataType::Type::kReference);
+ DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+ Register out_reg = out.AsRegister<Register>();
+ Address field_address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0);
+ Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+ Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+
+ if (codegen->EmitReadBarrier()) {
+ DCHECK(kUseBakerReadBarrier);
+ // Ensure that the field contains a to-space reference.
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ invoke,
+ Location::RegisterLocation(temp2),
+ base,
+ field_address,
+ /*needs_null_check=*/ false,
+ /*always_update_field=*/ true,
+ &temp1);
+ }
+
+ // Mark card for object as a new value shall be stored.
+ bool new_value_can_be_null = true; // TODO: Worth finding out this information?
+ DCHECK_EQ(temp2, ECX); // Byte register for `MarkGCCard()`.
+ codegen->MarkGCCard(temp1, temp2, base, /*value=*/ out_reg, new_value_can_be_null);
+
+ if (kPoisonHeapReferences) {
+ // Use a temp to avoid poisoning base of the field address, which might happen if `out`
+ // is the same as `base` (for code like `unsafe.getAndSet(obj, offset, obj)`).
+ __ movl(temp1, out_reg);
+ __ PoisonHeapReference(temp1);
+ __ xchgl(temp1, field_address);
+ __ UnpoisonHeapReference(temp1);
+ __ movl(out_reg, temp1);
+ } else {
+ __ xchgl(out_reg, field_address);
+ }
+ }
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddInt(invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddLong(invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetInt(invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetLong(invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetReference(invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
+}
+
+void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
+}
+
+void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
+}
+
void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
LocationSummary* locations =
new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
@@ -2843,7 +3095,7 @@ static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
return;
}
@@ -2875,7 +3127,7 @@ void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
X86Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -2995,7 +3247,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// slow path.
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
@@ -3006,7 +3258,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ testl(temp1, temp1);
__ j(kEqual, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp1` has been unpoisoned
- // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // by the previous call to GenerateFieldLoadWithBakerReadBarrier.
} else {
// /* HeapReference<Class> */ temp1 = src->klass_
__ movl(temp1, Address(src, class_offset));
@@ -3022,7 +3274,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
if (length.Equals(Location::RegisterLocation(temp3))) {
// When Baker read barriers are enabled, register `temp3`,
// which in the present case contains the `length` parameter,
@@ -3051,7 +3303,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ testl(temp2, temp2);
__ j(kEqual, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp2` has been unpoisoned
- // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // by the previous call to GenerateFieldLoadWithBakerReadBarrier.
__ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
__ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
@@ -3120,7 +3372,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
@@ -3130,7 +3382,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ testl(temp1, temp1);
__ j(kEqual, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp1` has been unpoisoned
- // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // by the previous call to GenerateFieldLoadWithBakerReadBarrier.
} else {
// /* HeapReference<Class> */ temp1 = src->klass_
__ movl(temp1, Address(src, class_offset));
@@ -3151,7 +3403,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// Compute the base source address in `temp1`.
GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// If it is needed (in the case of the fast-path loop), the base
// destination address is computed later, as `temp2` is used for
// intermediate computations.
@@ -3279,21 +3531,36 @@ static void RequestBaseMethodAddressInRegister(HInvoke* invoke) {
}
}
-void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
+#define VISIT_INTRINSIC(name, low, high, type, start_index) \
+ void IntrinsicLocationsBuilderX86::Visit ##name ##ValueOf(HInvoke* invoke) { \
+ InvokeRuntimeCallingConvention calling_convention; \
+ IntrinsicVisitor::ComputeValueOfLocations( \
+ invoke, \
+ codegen_, \
+ low, \
+ high - low + 1, \
+ Location::RegisterLocation(EAX), \
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0))); \
+ RequestBaseMethodAddressInRegister(invoke); \
+ } \
+ void IntrinsicCodeGeneratorX86::Visit ##name ##ValueOf(HInvoke* invoke) { \
+ IntrinsicVisitor::ValueOfInfo info = \
+ IntrinsicVisitor::ComputeValueOfInfo( \
+ invoke, \
+ codegen_->GetCompilerOptions(), \
+ WellKnownClasses::java_lang_ ##name ##_value, \
+ low, \
+ high - low + 1, \
+ start_index); \
+ HandleValueOf(invoke, info, type); \
+ }
+ BOXED_TYPES(VISIT_INTRINSIC)
+#undef VISIT_INTRINSIC
+
+void IntrinsicCodeGeneratorX86::HandleValueOf(HInvoke* invoke,
+ const IntrinsicVisitor::ValueOfInfo& info,
+ DataType::Type type) {
DCHECK(invoke->IsInvokeStaticOrDirect());
- InvokeRuntimeCallingConvention calling_convention;
- IntrinsicVisitor::ComputeIntegerValueOfLocations(
- invoke,
- codegen_,
- Location::RegisterLocation(EAX),
- Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- RequestBaseMethodAddressInRegister(invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
- DCHECK(invoke->IsInvokeStaticOrDirect());
- IntrinsicVisitor::IntegerValueOfInfo info =
- IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
LocationSummary* locations = invoke->GetLocations();
X86Assembler* assembler = GetAssembler();
@@ -3304,20 +3571,25 @@ void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
};
- if (invoke->InputAt(0)->IsConstant()) {
+ if (invoke->InputAt(0)->IsIntConstant()) {
int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
if (static_cast<uint32_t>(value - info.low) < info.length) {
- // Just embed the j.l.Integer in the code.
- DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
+ // Just embed the object in the code.
+ DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
codegen_->LoadBootImageAddress(
out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect());
} else {
DCHECK(locations->CanCall());
// Allocate and initialize a new j.l.Integer.
- // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+ // TODO: If we JIT, we could allocate the object now, and store it in the
// JIT object table.
allocate_instance();
- __ movl(Address(out, info.value_offset), Immediate(value));
+ codegen_->MoveToMemory(type,
+ Location::ConstantLocation(invoke->InputAt(0)->AsIntConstant()),
+ out,
+ /* dst_index= */ Register::kNoRegister,
+ /* dst_scale= */ TIMES_1,
+ /* dst_disp= */ info.value_offset);
}
} else {
DCHECK(locations->CanCall());
@@ -3327,7 +3599,7 @@ void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
__ cmpl(out, Immediate(info.length));
NearLabel allocate, done;
__ j(kAboveEqual, &allocate);
- // If the value is within the bounds, load the j.l.Integer directly from the array.
+ // If the value is within the bounds, load the object directly from the array.
constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>);
static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
"Check heap reference size.");
@@ -3355,9 +3627,14 @@ void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
__ MaybeUnpoisonHeapReference(out);
__ jmp(&done);
__ Bind(&allocate);
- // Otherwise allocate and initialize a new j.l.Integer.
+ // Otherwise allocate and initialize a new object.
allocate_instance();
- __ movl(Address(out, info.value_offset), in);
+ codegen_->MoveToMemory(type,
+ Location::RegisterLocation(in),
+ out,
+ /* dst_index= */ Register::kNoRegister,
+ /* dst_scale= */ TIMES_1,
+ /* dst_disp= */ info.value_offset);
__ Bind(&done);
}
}
@@ -3377,7 +3654,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
codegen_->AddSlowPath(slow_path);
- if (gUseReadBarrier) {
+ if (codegen_->EmitReadBarrier()) {
// Check self->GetWeakRefAccessEnabled().
ThreadOffset32 offset = Thread::WeakRefAccessEnabledOffset<kX86PointerSize>();
__ fs()->cmpl(Address::Absolute(offset),
@@ -3400,7 +3677,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
// Load the value from the field.
uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
out,
obj.AsRegister<Register>(),
@@ -3419,7 +3696,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86::VisitReferenceRefersTo(HInvoke* invoke) {
- IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
+ IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitReferenceRefersTo(HInvoke* invoke) {
@@ -3442,7 +3719,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceRefersTo(HInvoke* invoke) {
NearLabel end, return_true, return_false;
__ cmpl(out, other);
- if (gUseReadBarrier) {
+ if (codegen_->EmitReadBarrier()) {
DCHECK(kUseBakerReadBarrier);
__ j(kEqual, &return_true);
@@ -3504,7 +3781,7 @@ void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) {
locations->SetInAt(0, Location::Any());
}
-void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
+void IntrinsicCodeGeneratorX86::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
void IntrinsicLocationsBuilderX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
LocationSummary* locations = new (allocator_) LocationSummary(invoke,
@@ -3769,7 +4046,7 @@ static Register GenerateVarHandleFieldReference(HInvoke* invoke,
const uint32_t declaring_class_offset = ArtField::DeclaringClassOffset().Uint32Value();
Register varhandle_object = locations->InAt(0).AsRegister<Register>();
- // Load the ArtField and the offset
+ // Load the ArtField* and the offset.
__ movl(temp, Address(varhandle_object, artfield_offset));
__ movl(offset, Address(temp, offset_offset));
size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
@@ -3781,7 +4058,7 @@ static Register GenerateVarHandleFieldReference(HInvoke* invoke,
Location::RegisterLocation(temp),
Address(temp, declaring_class_offset),
/* fixup_label= */ nullptr,
- gCompilerReadBarrierOption);
+ codegen->GetCompilerReadBarrierOption());
return temp;
}
@@ -3791,10 +4068,10 @@ static Register GenerateVarHandleFieldReference(HInvoke* invoke,
return locations->InAt(1).AsRegister<Register>();
}
-static void CreateVarHandleGetLocations(HInvoke* invoke) {
+static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+ if (codegen->EmitNonBakerReadBarrier()) {
return;
}
@@ -3836,7 +4113,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke) {
static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -3860,7 +4137,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
Address field_addr(ref, offset, TIMES_1, 0);
// Load the value from the field
- if (type == DataType::Type::kReference && gCompilerReadBarrierOption == kWithReadBarrier) {
+ if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
codegen->GenerateReferenceLoadWithBakerReadBarrier(
invoke, out, ref, field_addr, /* needs_null_check= */ false);
} else if (type == DataType::Type::kInt64 &&
@@ -3883,7 +4160,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGet(HInvoke* invoke) {
- CreateVarHandleGetLocations(invoke);
+ CreateVarHandleGetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGet(HInvoke* invoke) {
@@ -3891,7 +4168,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGet(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
- CreateVarHandleGetLocations(invoke);
+ CreateVarHandleGetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
@@ -3899,7 +4176,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
- CreateVarHandleGetLocations(invoke);
+ CreateVarHandleGetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
@@ -3907,17 +4184,17 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
- CreateVarHandleGetLocations(invoke);
+ CreateVarHandleGetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
GenerateVarHandleGet(invoke, codegen_);
}
-static void CreateVarHandleSetLocations(HInvoke* invoke) {
+static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+ if (codegen->EmitNonBakerReadBarrier()) {
return;
}
@@ -3990,7 +4267,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke) {
static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4056,7 +4333,7 @@ static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
}
void IntrinsicLocationsBuilderX86::VisitVarHandleSet(HInvoke* invoke) {
- CreateVarHandleSetLocations(invoke);
+ CreateVarHandleSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleSet(HInvoke* invoke) {
@@ -4064,7 +4341,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleSet(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
- CreateVarHandleSetLocations(invoke);
+ CreateVarHandleSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
@@ -4072,7 +4349,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86::VisitVarHandleSetRelease(HInvoke* invoke) {
- CreateVarHandleSetLocations(invoke);
+ CreateVarHandleSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleSetRelease(HInvoke* invoke) {
@@ -4080,17 +4357,17 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleSetRelease(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
- CreateVarHandleSetLocations(invoke);
+ CreateVarHandleSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
GenerateVarHandleSet(invoke, codegen_);
}
-static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
+static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+ if (codegen->EmitNonBakerReadBarrier()) {
return;
}
@@ -4138,7 +4415,7 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4197,7 +4474,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege
__ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX);
break;
case DataType::Type::kReference: {
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen->EmitBakerReadBarrier()) {
// Need to make sure the reference stored in the field is a to-space
// one before attempting the CAS or the CAS could fail incorrectly.
codegen->GenerateReferenceLoadWithBakerReadBarrier(
@@ -4235,7 +4512,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
- CreateVarHandleGetAndSetLocations(invoke);
+ CreateVarHandleGetAndSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
@@ -4243,7 +4520,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndSetLocations(invoke);
+ CreateVarHandleGetAndSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
@@ -4251,17 +4528,18 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke)
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
- CreateVarHandleGetAndSetLocations(invoke);
+ CreateVarHandleGetAndSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
GenerateVarHandleGetAndSet(invoke, codegen_);
}
-static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) {
+static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
+ CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+ if (codegen->EmitNonBakerReadBarrier()) {
return;
}
@@ -4325,7 +4603,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) {
static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4378,7 +4656,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGenera
}
void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
@@ -4386,7 +4664,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
@@ -4394,7 +4672,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke)
}
void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
@@ -4402,7 +4680,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* in
}
void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
@@ -4410,7 +4688,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke*
}
void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
@@ -4418,7 +4696,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke*
}
void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
@@ -4426,7 +4704,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchange(HInvoke* invoke
}
void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
@@ -4434,17 +4712,17 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke*
}
void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
}
-static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) {
+static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+ if (codegen->EmitNonBakerReadBarrier()) {
return;
}
@@ -4493,7 +4771,7 @@ static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) {
static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4568,7 +4846,7 @@ static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codege
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
- CreateVarHandleGetAndAddLocations(invoke);
+ CreateVarHandleGetAndAddLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
@@ -4576,7 +4854,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndAddLocations(invoke);
+ CreateVarHandleGetAndAddLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
@@ -4584,17 +4862,17 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke)
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
- CreateVarHandleGetAndAddLocations(invoke);
+ CreateVarHandleGetAndAddLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
GenerateVarHandleGetAndAdd(invoke, codegen_);
}
-static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke) {
+static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+ if (codegen->EmitNonBakerReadBarrier()) {
return;
}
@@ -4662,7 +4940,7 @@ static void GenerateBitwiseOp(HInvoke* invoke,
static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4723,7 +5001,7 @@ static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86*
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
@@ -4731,7 +5009,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
@@ -4739,7 +5017,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* in
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
@@ -4747,7 +5025,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* in
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
@@ -4755,7 +5033,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke)
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
@@ -4763,7 +5041,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* i
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
@@ -4771,7 +5049,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* i
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
@@ -4779,7 +5057,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke)
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
@@ -4787,7 +5065,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* i
}
void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h
index 77c236d244..289a3c342c 100644
--- a/compiler/optimizing/intrinsics_x86.h
+++ b/compiler/optimizing/intrinsics_x86.h
@@ -19,6 +19,7 @@
#include "base/macros.h"
#include "intrinsics.h"
+#include "intrinsics_list.h"
namespace art HIDDEN {
@@ -39,9 +40,7 @@ class IntrinsicLocationsBuilderX86 final : public IntrinsicVisitor {
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
// Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
@@ -64,9 +63,7 @@ class IntrinsicCodeGeneratorX86 final : public IntrinsicVisitor {
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
private:
@@ -74,6 +71,10 @@ class IntrinsicCodeGeneratorX86 final : public IntrinsicVisitor {
ArenaAllocator* GetAllocator();
+ void HandleValueOf(HInvoke* invoke,
+ const IntrinsicVisitor::ValueOfInfo& info,
+ DataType::Type type);
+
CodeGeneratorX86* const codegen_;
DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorX86);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 9d0d5f155e..493cd67c27 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -25,6 +25,7 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "heap_poisoning.h"
#include "intrinsics.h"
+#include "intrinsic_objects.h"
#include "intrinsics_utils.h"
#include "lock_word.h"
#include "mirror/array-inl.h"
@@ -35,6 +36,7 @@
#include "thread-current-inl.h"
#include "utils/x86_64/assembler_x86_64.h"
#include "utils/x86_64/constants_x86_64.h"
+#include "well_known_classes.h"
namespace art HIDDEN {
@@ -71,11 +73,10 @@ class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
public:
explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
: SlowPathCode(instruction) {
- DCHECK(gUseReadBarrier);
- DCHECK(kUseBakerReadBarrier);
}
void EmitNativeCode(CodeGenerator* codegen) override {
+ DCHECK(codegen->EmitBakerReadBarrier());
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
LocationSummary* locations = instruction_->GetLocations();
DCHECK(locations->CanCall());
@@ -222,34 +223,34 @@ static void GenIsInfinite(LocationSummary* locations,
double kPositiveInfinity = std::numeric_limits<double>::infinity();
double kNegativeInfinity = -1 * kPositiveInfinity;
- __ xorq(output, output);
- __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity));
- __ j(kNotEqual, &done1);
- __ j(kParityEven, &done2);
- __ movq(output, Immediate(1));
- __ jmp(&done2);
- __ Bind(&done1);
- __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity));
- __ j(kNotEqual, &done2);
- __ j(kParityEven, &done2);
- __ movq(output, Immediate(1));
- __ Bind(&done2);
+ __ xorq(output, output);
+ __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity));
+ __ j(kNotEqual, &done1);
+ __ j(kParityEven, &done2);
+ __ movq(output, Immediate(1));
+ __ jmp(&done2);
+ __ Bind(&done1);
+ __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity));
+ __ j(kNotEqual, &done2);
+ __ j(kParityEven, &done2);
+ __ movq(output, Immediate(1));
+ __ Bind(&done2);
} else {
float kPositiveInfinity = std::numeric_limits<float>::infinity();
float kNegativeInfinity = -1 * kPositiveInfinity;
- __ xorl(output, output);
- __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity));
- __ j(kNotEqual, &done1);
- __ j(kParityEven, &done2);
- __ movl(output, Immediate(1));
- __ jmp(&done2);
- __ Bind(&done1);
- __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity));
- __ j(kNotEqual, &done2);
- __ j(kParityEven, &done2);
- __ movl(output, Immediate(1));
- __ Bind(&done2);
+ __ xorl(output, output);
+ __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity));
+ __ j(kNotEqual, &done1);
+ __ j(kParityEven, &done2);
+ __ movl(output, Immediate(1));
+ __ jmp(&done2);
+ __ Bind(&done1);
+ __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity));
+ __ j(kNotEqual, &done2);
+ __ j(kParityEven, &done2);
+ __ movl(output, Immediate(1));
+ __ Bind(&done2);
}
}
@@ -617,8 +618,8 @@ void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
// Check to see if we have known failures that will cause us to have to bail out
// to the runtime, and just generate the runtime call directly.
- HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
- HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+ HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+ HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
// The positions must be non-negative.
if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
@@ -628,7 +629,7 @@ static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
}
// The length must be > 0.
- HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+ HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
if (length != nullptr) {
int32_t len = length->GetValue();
if (len < 0) {
@@ -836,7 +837,7 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
return;
}
@@ -887,7 +888,7 @@ static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler,
void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
X86_64Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -1002,7 +1003,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// slow path.
bool did_unpoison = false;
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// /* HeapReference<Class> */ temp1 = dest->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
@@ -1014,9 +1015,8 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// /* HeapReference<Class> */ temp2 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
- // If heap poisoning is enabled, `temp1` and `temp2` have been
- // unpoisoned by the the previous calls to
- // GenerateFieldLoadWithBakerReadBarrier.
+ // If heap poisoning is enabled, `temp1` and `temp2` have been unpoisoned
+ // by the previous calls to GenerateFieldLoadWithBakerReadBarrier.
} else {
// /* HeapReference<Class> */ temp1 = dest->klass_
__ movl(temp1, Address(dest, class_offset));
@@ -1034,14 +1034,14 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
// Bail out if the destination is not a non primitive array.
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// /* HeapReference<Class> */ TMP = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false);
__ testl(CpuRegister(TMP), CpuRegister(TMP));
__ j(kEqual, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `TMP` has been unpoisoned by
- // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // the previous call to GenerateFieldLoadWithBakerReadBarrier.
} else {
// /* HeapReference<Class> */ TMP = temp1->component_type_
__ movl(CpuRegister(TMP), Address(temp1, component_offset));
@@ -1055,7 +1055,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
// Bail out if the source is not a non primitive array.
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// For the same reason given earlier, `temp1` is not trashed by the
// read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
// /* HeapReference<Class> */ TMP = temp2->component_type_
@@ -1064,7 +1064,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
__ testl(CpuRegister(TMP), CpuRegister(TMP));
__ j(kEqual, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `TMP` has been unpoisoned by
- // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // the previous call to GenerateFieldLoadWithBakerReadBarrier.
} else {
// /* HeapReference<Class> */ TMP = temp2->component_type_
__ movl(CpuRegister(TMP), Address(temp2, component_offset));
@@ -1081,7 +1081,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (optimizations.GetDestinationIsTypedObjectArray()) {
NearLabel do_copy;
__ j(kEqual, &do_copy);
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// /* HeapReference<Class> */ temp1 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
@@ -1109,7 +1109,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
@@ -1141,7 +1141,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
GenSystemArrayCopyAddresses(
GetAssembler(), type, src, src_pos, dest, dest_pos, length, temp1, temp2, temp3);
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
// SystemArrayCopy implementation for Baker read barriers (see
// also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
//
@@ -1424,7 +1424,7 @@ static void GenerateStringIndexOf(HInvoke* invoke,
HInstruction* code_point = invoke->InputAt(1);
if (code_point->IsIntConstant()) {
if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
- std::numeric_limits<uint16_t>::max()) {
+ std::numeric_limits<uint16_t>::max()) {
// Always needs the slow-path. We could directly dispatch to it, but this case should be
// rare, so for simplicity just put the full slow-path down and branch unconditionally.
slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
@@ -1655,7 +1655,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
Location srcBegin = locations->InAt(1);
int srcBegin_value =
- srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
+ srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
@@ -1871,7 +1871,7 @@ void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
static void GenUnsafeGet(HInvoke* invoke,
DataType::Type type,
- bool is_volatile ATTRIBUTE_UNUSED,
+ [[maybe_unused]] bool is_volatile,
CodeGeneratorX86_64* codegen) {
X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
LocationSummary* locations = invoke->GetLocations();
@@ -1883,12 +1883,16 @@ static void GenUnsafeGet(HInvoke* invoke,
CpuRegister output = output_loc.AsRegister<CpuRegister>();
switch (type) {
+ case DataType::Type::kInt8:
+ __ movsxb(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
+ break;
+
case DataType::Type::kInt32:
__ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
break;
case DataType::Type::kReference: {
- if (gUseReadBarrier) {
+ if (codegen->EmitReadBarrier()) {
if (kUseBakerReadBarrier) {
Address src(base, offset, ScaleFactor::TIMES_1, 0);
codegen->GenerateReferenceLoadWithBakerReadBarrier(
@@ -1915,22 +1919,10 @@ static void GenUnsafeGet(HInvoke* invoke,
}
}
-static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) {
- switch (intrinsic) {
- case Intrinsics::kUnsafeGetObject:
- case Intrinsics::kUnsafeGetObjectVolatile:
- case Intrinsics::kJdkUnsafeGetObject:
- case Intrinsics::kJdkUnsafeGetObjectVolatile:
- case Intrinsics::kJdkUnsafeGetObjectAcquire:
- return true;
- default:
- break;
- }
- return false;
-}
-
-static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
+static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
+ HInvoke* invoke,
+ CodeGeneratorX86_64* codegen) {
+ bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
can_call
@@ -1960,40 +1952,45 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke
VisitJdkUnsafeGetLongVolatile(invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
- VisitJdkUnsafeGetObject(invoke);
+ VisitJdkUnsafeGetReference(invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- VisitJdkUnsafeGetObjectVolatile(invoke);
+ VisitJdkUnsafeGetReferenceVolatile(invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetByte(HInvoke* invoke) {
+ VisitJdkUnsafeGetByte(invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+ CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+ CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+ CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+ CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+ CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+ CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
}
-void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetObject(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
}
-void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
}
-void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke);
+void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
+}
+void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
}
-
void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
VisitJdkUnsafeGet(invoke);
@@ -2008,10 +2005,13 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
VisitJdkUnsafeGetLongVolatile(invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
- VisitJdkUnsafeGetObject(invoke);
+ VisitJdkUnsafeGetReference(invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- VisitJdkUnsafeGetObjectVolatile(invoke);
+ VisitJdkUnsafeGetReferenceVolatile(invoke);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetByte(HInvoke* invoke) {
+ VisitJdkUnsafeGetByte(invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
@@ -2032,16 +2032,18 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke
void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
}
-void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetObject(HInvoke* invoke) {
+void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
}
-void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
+void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
}
-void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
+void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
}
-
+void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
+ GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/false, codegen_);
+}
static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
DataType::Type type,
@@ -2069,13 +2071,13 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
VisitJdkUnsafePutVolatile(invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
- VisitJdkUnsafePutObject(invoke);
+ VisitJdkUnsafePutReference(invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
VisitJdkUnsafePutObjectOrdered(invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
- VisitJdkUnsafePutObjectVolatile(invoke);
+ VisitJdkUnsafePutReferenceVolatile(invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
VisitJdkUnsafePutLong(invoke);
@@ -2086,6 +2088,9 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke)
void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
VisitJdkUnsafePutLongVolatile(invoke);
}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutByte(HInvoke* invoke) {
+ VisitJdkUnsafePut(invoke);
+}
void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
@@ -2099,16 +2104,16 @@ void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke)
void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
}
-void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObject(HInvoke* invoke) {
+void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReference(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
}
-void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
+void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
}
-void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
+void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) {
@@ -2123,6 +2128,9 @@ void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* inv
void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
}
+void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutByte(HInvoke* invoke) {
+ CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kUint8, invoke);
+}
// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
// memory model.
@@ -2168,13 +2176,13 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
VisitJdkUnsafePutVolatile(invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
- VisitJdkUnsafePutObject(invoke);
+ VisitJdkUnsafePutReference(invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
VisitJdkUnsafePutObjectOrdered(invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
- VisitJdkUnsafePutObjectVolatile(invoke);
+ VisitJdkUnsafePutReferenceVolatile(invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
VisitJdkUnsafePutLong(invoke);
@@ -2185,6 +2193,9 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
VisitJdkUnsafePutLongVolatile(invoke);
}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutByte(HInvoke* invoke) {
+ VisitJdkUnsafePutByte(invoke);
+}
void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
@@ -2198,7 +2209,7 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
}
-void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObject(HInvoke* invoke) {
+void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReference(HInvoke* invoke) {
GenUnsafePut(
invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
}
@@ -2206,11 +2217,11 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObjectOrdered(HInvoke* invok
GenUnsafePut(
invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
}
-void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
+void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
GenUnsafePut(
invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
}
-void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
+void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
GenUnsafePut(
invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
}
@@ -2226,13 +2237,15 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke
void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
}
+void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutByte(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt8, /*is_volatile=*/false, codegen_);
+}
static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
- DataType::Type type,
- HInvoke* invoke) {
- const bool can_call = gUseReadBarrier &&
- kUseBakerReadBarrier &&
- IsUnsafeCASObject(invoke);
+ HInvoke* invoke,
+ CodeGeneratorX86_64* codegen,
+ DataType::Type type) {
+ const bool can_call = codegen->EmitBakerReadBarrier() && IsUnsafeCASReference(invoke);
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
can_call
@@ -2253,7 +2266,7 @@ static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
// Need two temporaries for MarkGCCard.
locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
locations->AddTemp(Location::RequiresRegister());
- if (gUseReadBarrier) {
+ if (codegen->EmitReadBarrier()) {
// Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
DCHECK(kUseBakerReadBarrier);
locations->AddTemp(Location::RequiresRegister());
@@ -2285,24 +2298,24 @@ void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
// `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
- VisitJdkUnsafeCompareAndSetObject(invoke);
+ VisitJdkUnsafeCompareAndSetReference(invoke);
}
void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
- CreateUnsafeCASLocations(allocator_, DataType::Type::kInt32, invoke);
+ CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kInt32);
}
void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
- CreateUnsafeCASLocations(allocator_, DataType::Type::kInt64, invoke);
+ CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kInt64);
}
-void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
+void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+ if (codegen_->EmitNonBakerReadBarrier()) {
return;
}
- CreateUnsafeCASLocations(allocator_, DataType::Type::kReference, invoke);
+ CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kReference);
}
// Convert ZF into the Boolean result.
@@ -2438,7 +2451,7 @@ static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen,
CpuRegister temp3,
bool is_cmpxchg) {
// The only supported read barrier implementation is the Baker-style read barriers.
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
@@ -2447,7 +2460,7 @@ static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen,
codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
Address field_addr(base, offset, TIMES_1, 0);
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen->EmitBakerReadBarrier()) {
// Need to make sure the reference stored in the field is a to-space
// one before attempting the CAS or the CAS could fail incorrectly.
codegen->GenerateReferenceLoadWithBakerReadBarrier(
@@ -2556,7 +2569,7 @@ static void GenCompareAndSetOrExchange(CodeGeneratorX86_64* codegen,
CpuRegister new_value_reg = new_value.AsRegister<CpuRegister>();
CpuRegister temp1 = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
CpuRegister temp2 = locations->GetTemp(temp2_index).AsRegister<CpuRegister>();
- CpuRegister temp3 = gUseReadBarrier
+ CpuRegister temp3 = codegen->EmitReadBarrier()
? locations->GetTemp(temp3_index).AsRegister<CpuRegister>()
: CpuRegister(kNoRegister);
DCHECK(RegsAreAllDifferent({base, offset, temp1, temp2, temp3}));
@@ -2611,7 +2624,7 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
// `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
- VisitJdkUnsafeCompareAndSetObject(invoke);
+ VisitJdkUnsafeCompareAndSetReference(invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
@@ -2622,13 +2635,195 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invo
GenCAS(DataType::Type::kInt64, invoke, codegen_);
}
-void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
+void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
GenCAS(DataType::Type::kReference, invoke, codegen_);
}
+static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
+ HInvoke* invoke,
+ CodeGeneratorX86_64* codegen) {
+ const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke,
+ can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall,
+ kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ // Use the same register for both the output and the new value or addend
+ // to take advantage of XCHG or XADD. Arbitrarily pick RAX.
+ locations->SetInAt(3, Location::RegisterLocation(RAX));
+ locations->SetOut(Location::RegisterLocation(RAX));
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddInt(invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddLong(invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetInt(invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetLong(invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetReference(invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
+ CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
+ // The only supported read barrier implementation is the Baker-style read barriers.
+ if (codegen_->EmitNonBakerReadBarrier()) {
+ return;
+ }
+
+ CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+ invoke->GetLocations()->AddRegisterTemps(3);
+}
+
+enum class GetAndUpdateOp {
+ kSet,
+ kAdd,
+ kBitwiseAnd,
+ kBitwiseOr,
+ kBitwiseXor
+};
+
+static void GenUnsafeGetAndUpdate(HInvoke* invoke,
+ DataType::Type type,
+ CodeGeneratorX86_64* codegen,
+ GetAndUpdateOp get_and_update_op) {
+ X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // Result.
+ CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); // Object pointer.
+ CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); // Long offset.
+ DCHECK_EQ(out, locations->InAt(3).AsRegister<CpuRegister>()); // New value or addend.
+ Address field_address(base, offset, TIMES_1, 0);
+
+ if (type == DataType::Type::kInt32) {
+ if (get_and_update_op == GetAndUpdateOp::kAdd) {
+ __ LockXaddl(field_address, out);
+ } else {
+ DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+ __ xchgl(out, field_address);
+ }
+ } else if (type == DataType::Type::kInt64) {
+ if (get_and_update_op == GetAndUpdateOp::kAdd) {
+ __ LockXaddq(field_address, out);
+ } else {
+ DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+ __ xchgq(out, field_address);
+ }
+ } else {
+ DCHECK_EQ(type, DataType::Type::kReference);
+ DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+ CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
+ CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>();
+
+ if (codegen->EmitReadBarrier()) {
+ DCHECK(kUseBakerReadBarrier);
+ // Ensure that the field contains a to-space reference.
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ invoke,
+ Location::RegisterLocation(temp3.AsRegister()),
+ base,
+ field_address,
+ /*needs_null_check=*/ false,
+ /*always_update_field=*/ true,
+ &temp1,
+ &temp2);
+ }
+
+ // Mark card for object as a new value shall be stored.
+ bool new_value_can_be_null = true; // TODO: Worth finding out this information?
+ codegen->MarkGCCard(temp1, temp2, base, /*value=*/ out, new_value_can_be_null);
+
+ if (kPoisonHeapReferences) {
+ // Use a temp to avoid poisoning base of the field address, which might happen if `out`
+ // is the same as `base` (for code like `unsafe.getAndSet(obj, offset, obj)`).
+ __ movl(temp1, out);
+ __ PoisonHeapReference(temp1);
+ __ xchgl(temp1, field_address);
+ __ UnpoisonHeapReference(temp1);
+ __ movl(out, temp1);
+ } else {
+ __ xchgl(out, field_address);
+ }
+ }
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddInt(invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndAddLong(invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetInt(invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetLong(invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
+ VisitJdkUnsafeGetAndSetReference(invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
+ GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
+}
+
void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
LocationSummary* locations =
new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
@@ -3053,18 +3248,60 @@ void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invok
GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
}
-void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) {
- InvokeRuntimeCallingConvention calling_convention;
- IntrinsicVisitor::ComputeIntegerValueOfLocations(
- invoke,
- codegen_,
- Location::RegisterLocation(RAX),
- Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+#define VISIT_INTRINSIC(name, low, high, type, start_index) \
+ void IntrinsicLocationsBuilderX86_64::Visit ##name ##ValueOf(HInvoke* invoke) { \
+ InvokeRuntimeCallingConvention calling_convention; \
+ IntrinsicVisitor::ComputeValueOfLocations( \
+ invoke, \
+ codegen_, \
+ low, \
+ high - low + 1, \
+ Location::RegisterLocation(RAX), \
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0))); \
+ } \
+ void IntrinsicCodeGeneratorX86_64::Visit ##name ##ValueOf(HInvoke* invoke) { \
+ IntrinsicVisitor::ValueOfInfo info = \
+ IntrinsicVisitor::ComputeValueOfInfo( \
+ invoke, \
+ codegen_->GetCompilerOptions(), \
+ WellKnownClasses::java_lang_ ##name ##_value, \
+ low, \
+ high - low + 1, \
+ start_index); \
+ HandleValueOf(invoke, info, type); \
+ }
+ BOXED_TYPES(VISIT_INTRINSIC)
+#undef VISIT_INTRINSIC
+
+template <typename T>
+static void Store(X86_64Assembler* assembler,
+ DataType::Type primitive_type,
+ const Address& address,
+ const T& operand) {
+ switch (primitive_type) {
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint8: {
+ __ movb(address, operand);
+ break;
+ }
+ case DataType::Type::kInt16:
+ case DataType::Type::kUint16: {
+ __ movw(address, operand);
+ break;
+ }
+ case DataType::Type::kInt32: {
+ __ movl(address, operand);
+ break;
+ }
+ default: {
+ LOG(FATAL) << "Unrecognized ValueOf type " << primitive_type;
+ }
+ }
}
-void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) {
- IntrinsicVisitor::IntegerValueOfInfo info =
- IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
+void IntrinsicCodeGeneratorX86_64::HandleValueOf(HInvoke* invoke,
+ const IntrinsicVisitor::ValueOfInfo& info,
+ DataType::Type type) {
LocationSummary* locations = invoke->GetLocations();
X86_64Assembler* assembler = GetAssembler();
@@ -3079,16 +3316,16 @@ void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) {
if (invoke->InputAt(0)->IsIntConstant()) {
int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
if (static_cast<uint32_t>(value - info.low) < info.length) {
- // Just embed the j.l.Integer in the code.
- DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
+ // Just embed the object in the code.
+ DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
} else {
DCHECK(locations->CanCall());
- // Allocate and initialize a new j.l.Integer.
- // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+ // Allocate and initialize a new object.
+ // TODO: If we JIT, we could allocate the boxed value now, and store it in the
// JIT object table.
allocate_instance();
- __ movl(Address(out, info.value_offset), Immediate(value));
+ Store(assembler, type, Address(out, info.value_offset), Immediate(value));
}
} else {
DCHECK(locations->CanCall());
@@ -3098,7 +3335,7 @@ void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) {
__ cmpl(out, Immediate(info.length));
NearLabel allocate, done;
__ j(kAboveEqual, &allocate);
- // If the value is within the bounds, load the j.l.Integer directly from the array.
+ // If the value is within the bounds, load the boxed value directly from the array.
DCHECK_NE(out.AsRegister(), argument.AsRegister());
codegen_->LoadBootImageAddress(argument, info.array_data_boot_image_reference);
static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
@@ -3107,9 +3344,9 @@ void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) {
__ MaybeUnpoisonHeapReference(out);
__ jmp(&done);
__ Bind(&allocate);
- // Otherwise allocate and initialize a new j.l.Integer.
+ // Otherwise allocate and initialize a new object.
allocate_instance();
- __ movl(Address(out, info.value_offset), in);
+ Store(assembler, type, Address(out, info.value_offset), in);
__ Bind(&done);
}
}
@@ -3128,7 +3365,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
codegen_->AddSlowPath(slow_path);
- if (gUseReadBarrier) {
+ if (codegen_->EmitReadBarrier()) {
// Check self->GetWeakRefAccessEnabled().
ThreadOffset64 offset = Thread::WeakRefAccessEnabledOffset<kX86_64PointerSize>();
__ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true),
@@ -3150,7 +3387,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
// Load the value from the field.
uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
out,
obj.AsRegister<CpuRegister>(),
@@ -3169,7 +3406,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
- IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
+ IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
@@ -3191,7 +3428,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
__ cmpl(out, other);
- if (gUseReadBarrier) {
+ if (codegen_->EmitReadBarrier()) {
DCHECK(kUseBakerReadBarrier);
NearLabel calculate_result;
@@ -3249,7 +3486,7 @@ void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) {
locations->SetInAt(0, Location::Any());
}
-void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
+void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
static void CreateDivideUnsignedLocations(HInvoke* invoke, ArenaAllocator* allocator) {
LocationSummary* locations =
@@ -3332,14 +3569,6 @@ void IntrinsicCodeGeneratorX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
__ imulq(y);
}
-enum class GetAndUpdateOp {
- kSet,
- kAdd,
- kBitwiseAnd,
- kBitwiseOr,
- kBitwiseXor
-};
-
class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 {
public:
explicit VarHandleSlowPathX86_64(HInvoke* invoke)
@@ -3510,7 +3739,7 @@ static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
__ movl(temp, Address(varhandle, var_type_offset));
__ MaybeUnpoisonHeapReference(temp);
- // Check check the varType.primitiveType field against the type we're trying to retrieve.
+ // Check the varType.primitiveType field against the type we're trying to use.
__ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
__ j(kNotEqual, slow_path->GetEntryLabel());
@@ -3754,24 +3983,24 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
__ movl(CpuRegister(target.offset), Immediate(target_field->GetOffset().Uint32Value()));
} else {
// For static fields, we need to fill the `target.object` with the declaring class,
- // so we can use `target.object` as temporary for the `ArtMethod*`. For instance fields,
- // we do not need the declaring class, so we can forget the `ArtMethod*` when
- // we load the `target.offset`, so use the `target.offset` to hold the `ArtMethod*`.
- CpuRegister method((expected_coordinates_count == 0) ? target.object : target.offset);
+ // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
+ // we do not need the declaring class, so we can forget the `ArtField*` when
+ // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
+ CpuRegister field((expected_coordinates_count == 0) ? target.object : target.offset);
const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
const MemberOffset offset_offset = ArtField::OffsetOffset();
- // Load the ArtField, the offset and, if needed, declaring class.
- __ movq(method, Address(varhandle, art_field_offset));
- __ movl(CpuRegister(target.offset), Address(method, offset_offset));
+ // Load the ArtField*, the offset and, if needed, declaring class.
+ __ movq(field, Address(varhandle, art_field_offset));
+ __ movl(CpuRegister(target.offset), Address(field, offset_offset));
if (expected_coordinates_count == 0u) {
InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
instr_codegen->GenerateGcRootFieldLoad(invoke,
Location::RegisterLocation(target.object),
- Address(method, ArtField::DeclaringClassOffset()),
- /*fixup_label=*/ nullptr,
- gCompilerReadBarrierOption);
+ Address(field, ArtField::DeclaringClassOffset()),
+ /*fixup_label=*/nullptr,
+ codegen->GetCompilerReadBarrierOption());
}
}
} else {
@@ -3788,9 +4017,9 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
}
}
-static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
+static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
// The only supported read barrier implementation is the Baker-style read barriers.
- if (gUseReadBarrier && !kUseBakerReadBarrier) {
+ if (codegen->EmitNonBakerReadBarrier()) {
return false;
}
@@ -3839,8 +4068,8 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
return locations;
}
-static void CreateVarHandleGetLocations(HInvoke* invoke) {
- if (!HasVarHandleIntrinsicImplementation(invoke)) {
+static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+ if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
return;
}
@@ -3876,7 +4105,7 @@ static void GenerateVarHandleGet(HInvoke* invoke,
Location out = locations->Out();
if (type == DataType::Type::kReference) {
- if (gUseReadBarrier) {
+ if (codegen->EmitReadBarrier()) {
DCHECK(kUseBakerReadBarrier);
codegen->GenerateReferenceLoadWithBakerReadBarrier(
invoke, out, CpuRegister(target.object), src, /* needs_null_check= */ false);
@@ -3900,7 +4129,7 @@ static void GenerateVarHandleGet(HInvoke* invoke,
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGet(HInvoke* invoke) {
- CreateVarHandleGetLocations(invoke);
+ CreateVarHandleGetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGet(HInvoke* invoke) {
@@ -3908,7 +4137,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGet(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
- CreateVarHandleGetLocations(invoke);
+ CreateVarHandleGetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
@@ -3917,7 +4146,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
- CreateVarHandleGetLocations(invoke);
+ CreateVarHandleGetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
@@ -3926,7 +4155,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
- CreateVarHandleGetLocations(invoke);
+ CreateVarHandleGetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
@@ -3934,8 +4163,8 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
GenerateVarHandleGet(invoke, codegen_);
}
-static void CreateVarHandleSetLocations(HInvoke* invoke) {
- if (!HasVarHandleIntrinsicImplementation(invoke)) {
+static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+ if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
return;
}
@@ -4008,7 +4237,7 @@ static void GenerateVarHandleSet(HInvoke* invoke,
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleSet(HInvoke* invoke) {
- CreateVarHandleSetLocations(invoke);
+ CreateVarHandleSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleSet(HInvoke* invoke) {
@@ -4016,7 +4245,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleSet(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
- CreateVarHandleSetLocations(invoke);
+ CreateVarHandleSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
@@ -4024,7 +4253,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
- CreateVarHandleSetLocations(invoke);
+ CreateVarHandleSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
@@ -4032,15 +4261,16 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
- CreateVarHandleSetLocations(invoke);
+ CreateVarHandleSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ true, /*is_atomic=*/ true);
}
-static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) {
- if (!HasVarHandleIntrinsicImplementation(invoke)) {
+static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
+ CodeGeneratorX86_64* codegen) {
+ if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
return;
}
@@ -4073,7 +4303,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) {
// Need two temporaries for MarkGCCard.
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
- if (gUseReadBarrier) {
+ if (codegen->EmitReadBarrier()) {
// Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
DCHECK(kUseBakerReadBarrier);
locations->AddTemp(Location::RequiresRegister());
@@ -4088,7 +4318,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
CodeGeneratorX86_64* codegen,
bool is_cmpxchg,
bool byte_swap = false) {
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
X86_64Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4133,7 +4363,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
@@ -4141,7 +4371,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke)
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
@@ -4149,7 +4379,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invo
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
@@ -4157,7 +4387,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke*
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
@@ -4165,7 +4395,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvok
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
@@ -4173,7 +4403,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvok
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
@@ -4181,7 +4411,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchange(HInvoke* inv
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
@@ -4189,15 +4419,15 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvo
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
- CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
+ CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
}
-static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
- if (!HasVarHandleIntrinsicImplementation(invoke)) {
+static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+ if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
return;
}
@@ -4221,7 +4451,7 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
// Need two temporaries for MarkGCCard.
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
- if (gUseReadBarrier) {
+ if (codegen->EmitReadBarrier()) {
// Need a third temporary for GenerateReferenceLoadWithBakerReadBarrier.
DCHECK(kUseBakerReadBarrier);
locations->AddTemp(Location::RequiresRegister());
@@ -4270,7 +4500,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke,
CpuRegister temp2 = locations->GetTemp(temp_count - 2).AsRegister<CpuRegister>();
CpuRegister valreg = value.AsRegister<CpuRegister>();
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen->EmitBakerReadBarrier()) {
codegen->GenerateReferenceLoadWithBakerReadBarrier(
invoke,
locations->GetTemp(temp_count - 3),
@@ -4339,8 +4569,8 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke,
}
}
-static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke) {
- if (!HasVarHandleIntrinsicImplementation(invoke)) {
+static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+ if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
return;
}
@@ -4478,8 +4708,8 @@ static void GenerateVarHandleGetAndOp(HInvoke* invoke,
}
}
-static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) {
- if (!HasVarHandleIntrinsicImplementation(invoke)) {
+static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+ if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
return;
}
@@ -4650,7 +4880,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
bool need_any_store_barrier,
bool need_any_any_barrier,
bool byte_swap = false) {
- DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
X86_64Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4705,7 +4935,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
- CreateVarHandleGetAndSetLocations(invoke);
+ CreateVarHandleGetAndSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
@@ -4718,7 +4948,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndSetLocations(invoke);
+ CreateVarHandleGetAndSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
@@ -4731,7 +4961,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invok
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
- CreateVarHandleGetAndSetLocations(invoke);
+ CreateVarHandleGetAndSetLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
@@ -4744,7 +4974,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invok
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
- CreateVarHandleGetAndAddLocations(invoke);
+ CreateVarHandleGetAndAddLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
@@ -4757,7 +4987,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndAddLocations(invoke);
+ CreateVarHandleGetAndAddLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
@@ -4770,7 +5000,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invok
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
- CreateVarHandleGetAndAddLocations(invoke);
+ CreateVarHandleGetAndAddLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
@@ -4783,7 +5013,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invok
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
@@ -4796,7 +5026,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invok
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
@@ -4809,7 +5039,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
@@ -4822,7 +5052,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
@@ -4835,7 +5065,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
@@ -4848,7 +5078,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke*
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
@@ -4861,7 +5091,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke*
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
@@ -4874,7 +5104,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invok
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
@@ -4887,7 +5117,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke
}
void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
- CreateVarHandleGetAndBitwiseOpLocations(invoke);
+ CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h
index 59fe815a94..4a76c5c8ec 100644
--- a/compiler/optimizing/intrinsics_x86_64.h
+++ b/compiler/optimizing/intrinsics_x86_64.h
@@ -19,6 +19,7 @@
#include "base/macros.h"
#include "intrinsics.h"
+#include "intrinsics_list.h"
namespace art HIDDEN {
@@ -39,9 +40,7 @@ class IntrinsicLocationsBuilderX86_64 final : public IntrinsicVisitor {
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
// Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
@@ -64,9 +63,7 @@ class IntrinsicCodeGeneratorX86_64 final : public IntrinsicVisitor {
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
private:
@@ -74,6 +71,10 @@ class IntrinsicCodeGeneratorX86_64 final : public IntrinsicVisitor {
ArenaAllocator* GetAllocator();
+ void HandleValueOf(HInvoke* invoke,
+ const IntrinsicVisitor::ValueOfInfo& info,
+ DataType::Type type);
+
CodeGeneratorX86_64* const codegen_;
DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorX86_64);
diff --git a/compiler/optimizing/jit_patches_arm64.cc b/compiler/optimizing/jit_patches_arm64.cc
new file mode 100644
index 0000000000..76ba182acb
--- /dev/null
+++ b/compiler/optimizing/jit_patches_arm64.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generation_data.h"
+#include "gc_root.h"
+#include "jit_patches_arm64.h"
+
+namespace art HIDDEN {
+
+namespace arm64 {
+
+vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateUint32Literal(
+ uint32_t value) {
+ return uint32_literals_.GetOrCreate(
+ value,
+ [this, value]() {
+ return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(value);
+ });
+}
+
+vixl::aarch64::Literal<uint64_t>* JitPatchesARM64::DeduplicateUint64Literal(
+ uint64_t value) {
+ return uint64_literals_.GetOrCreate(
+ value,
+ [this, value]() {
+ return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint64_t>(value);
+ });
+}
+
+static void PatchJitRootUse(uint8_t* code,
+ const uint8_t* roots_data,
+ vixl::aarch64::Literal<uint32_t>* literal,
+ uint64_t index_in_table) {
+ uint32_t literal_offset = literal->GetOffset();
+ uintptr_t address =
+ reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+ uint8_t* data = code + literal_offset;
+ reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
+}
+
+void JitPatchesARM64::EmitJitRootPatches(
+ uint8_t* code,
+ const uint8_t* roots_data,
+ const CodeGenerationData& code_generation_data) const {
+ for (const auto& entry : jit_string_patches_) {
+ const StringReference& string_reference = entry.first;
+ vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
+ uint64_t index_in_table = code_generation_data.GetJitStringRootIndex(string_reference);
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
+ }
+ for (const auto& entry : jit_class_patches_) {
+ const TypeReference& type_reference = entry.first;
+ vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
+ uint64_t index_in_table = code_generation_data.GetJitClassRootIndex(type_reference);
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
+ }
+}
+
+vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateBootImageAddressLiteral(
+ uint64_t address) {
+ return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
+}
+
+vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateJitStringLiteral(
+ const DexFile& dex_file,
+ dex::StringIndex string_index,
+ Handle<mirror::String> handle,
+ CodeGenerationData* code_generation_data) {
+ code_generation_data->ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
+ return jit_string_patches_.GetOrCreate(
+ StringReference(&dex_file, string_index),
+ [this]() {
+ return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
+ });
+}
+
+vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateJitClassLiteral(
+ const DexFile& dex_file,
+ dex::TypeIndex type_index,
+ Handle<mirror::Class> handle,
+ CodeGenerationData* code_generation_data) {
+ code_generation_data->ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
+ return jit_class_patches_.GetOrCreate(
+ TypeReference(&dex_file, type_index),
+ [this]() {
+ return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
+ });
+}
+
+} // namespace arm64
+} // namespace art
diff --git a/compiler/optimizing/jit_patches_arm64.h b/compiler/optimizing/jit_patches_arm64.h
new file mode 100644
index 0000000000..f928723f58
--- /dev/null
+++ b/compiler/optimizing/jit_patches_arm64.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_JIT_PATCHES_ARM64_H_
+#define ART_COMPILER_OPTIMIZING_JIT_PATCHES_ARM64_H_
+
+#include "base/arena_allocator.h"
+#include "base/arena_containers.h"
+#include "dex/dex_file.h"
+#include "dex/string_reference.h"
+#include "dex/type_reference.h"
+#include "handle.h"
+#include "mirror/class.h"
+#include "mirror/string.h"
+#include "utils/arm64/assembler_arm64.h"
+
+// TODO(VIXL): Make VIXL compile with -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#pragma GCC diagnostic pop
+
+namespace art HIDDEN {
+
+class CodeGenerationData;
+
+namespace arm64 {
+
+/**
+ * Helper for emitting string or class literals into JIT generated code,
+ * which can be shared between different compilers.
+ */
+class JitPatchesARM64 {
+ public:
+ JitPatchesARM64(Arm64Assembler* assembler, ArenaAllocator* allocator) :
+ assembler_(assembler),
+ uint32_literals_(std::less<uint32_t>(),
+ allocator->Adapter(kArenaAllocCodeGenerator)),
+ uint64_literals_(std::less<uint64_t>(),
+ allocator->Adapter(kArenaAllocCodeGenerator)),
+ jit_string_patches_(StringReferenceValueComparator(),
+ allocator->Adapter(kArenaAllocCodeGenerator)),
+ jit_class_patches_(TypeReferenceValueComparator(),
+ allocator->Adapter(kArenaAllocCodeGenerator)) {
+ }
+
+ using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
+ using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
+ using StringToLiteralMap = ArenaSafeMap<StringReference,
+ vixl::aarch64::Literal<uint32_t>*,
+ StringReferenceValueComparator>;
+ using TypeToLiteralMap = ArenaSafeMap<TypeReference,
+ vixl::aarch64::Literal<uint32_t>*,
+ TypeReferenceValueComparator>;
+
+ vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value);
+ vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
+ vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
+ vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(
+ const DexFile& dex_file,
+ dex::StringIndex string_index,
+ Handle<mirror::String> handle,
+ CodeGenerationData* code_generation_data);
+ vixl::aarch64::Literal<uint32_t>* DeduplicateJitClassLiteral(
+ const DexFile& dex_file,
+ dex::TypeIndex type_index,
+ Handle<mirror::Class> handle,
+ CodeGenerationData* code_generation_data);
+
+ void EmitJitRootPatches(uint8_t* code,
+ const uint8_t* roots_data,
+ const CodeGenerationData& code_generation_data) const;
+
+ Arm64Assembler* GetAssembler() const { return assembler_; }
+ vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
+
+ private:
+ Arm64Assembler* assembler_;
+ // Deduplication map for 32-bit literals, used for JIT for boot image addresses.
+ Uint32ToLiteralMap uint32_literals_;
+ // Deduplication map for 64-bit literals, used for JIT for method address or method code.
+ Uint64ToLiteralMap uint64_literals_;
+ // Patches for string literals in JIT compiled code.
+ StringToLiteralMap jit_string_patches_;
+ // Patches for class literals in JIT compiled code.
+ TypeToLiteralMap jit_class_patches_;
+};
+
+} // namespace arm64
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_JIT_PATCHES_ARM64_H_
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index 01daa23511..6f4f2b6cf6 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -55,6 +55,7 @@ void LinearizeTest::TestCode(const std::vector<uint16_t>& data,
}
TEST_F(LinearizeTest, CFG1) {
+ TEST_DISABLED_FOR_RISCV64();
// Structure of this graph (+ are back edges)
// Block0
// |
@@ -80,6 +81,7 @@ TEST_F(LinearizeTest, CFG1) {
}
TEST_F(LinearizeTest, CFG2) {
+ TEST_DISABLED_FOR_RISCV64();
// Structure of this graph (+ are back edges)
// Block0
// |
@@ -105,6 +107,7 @@ TEST_F(LinearizeTest, CFG2) {
}
TEST_F(LinearizeTest, CFG3) {
+ TEST_DISABLED_FOR_RISCV64();
// Structure of this graph (+ are back edges)
// Block0
// |
@@ -132,6 +135,7 @@ TEST_F(LinearizeTest, CFG3) {
}
TEST_F(LinearizeTest, CFG4) {
+ TEST_DISABLED_FOR_RISCV64();
/* Structure of this graph (+ are back edges)
// Block0
// |
@@ -162,6 +166,7 @@ TEST_F(LinearizeTest, CFG4) {
}
TEST_F(LinearizeTest, CFG5) {
+ TEST_DISABLED_FOR_RISCV64();
/* Structure of this graph (+ are back edges)
// Block0
// |
@@ -192,6 +197,7 @@ TEST_F(LinearizeTest, CFG5) {
}
TEST_F(LinearizeTest, CFG6) {
+ TEST_DISABLED_FOR_RISCV64();
// Block0
// |
// Block1
@@ -218,6 +224,7 @@ TEST_F(LinearizeTest, CFG6) {
}
TEST_F(LinearizeTest, CFG7) {
+ TEST_DISABLED_FOR_RISCV64();
// Structure of this graph (+ are back edges)
// Block0
// |
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index fb1a23eef4..7e488ba41d 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -47,6 +47,7 @@ HGraph* LiveRangesTest::BuildGraph(const std::vector<uint16_t>& data) {
}
TEST_F(LiveRangesTest, CFG1) {
+ TEST_DISABLED_FOR_RISCV64();
/*
* Test the following snippet:
* return 0;
@@ -81,6 +82,7 @@ TEST_F(LiveRangesTest, CFG1) {
}
TEST_F(LiveRangesTest, CFG2) {
+ TEST_DISABLED_FOR_RISCV64();
/*
* Test the following snippet:
* var a = 0;
@@ -125,6 +127,7 @@ TEST_F(LiveRangesTest, CFG2) {
}
TEST_F(LiveRangesTest, CFG3) {
+ TEST_DISABLED_FOR_RISCV64();
/*
* Test the following snippet:
* var a = 0;
@@ -194,6 +197,7 @@ TEST_F(LiveRangesTest, CFG3) {
}
TEST_F(LiveRangesTest, Loop1) {
+ TEST_DISABLED_FOR_RISCV64();
/*
* Test the following snippet:
* var a = 0;
@@ -270,6 +274,7 @@ TEST_F(LiveRangesTest, Loop1) {
}
TEST_F(LiveRangesTest, Loop2) {
+ TEST_DISABLED_FOR_RISCV64();
/*
* Test the following snippet:
* var a = 0;
@@ -341,6 +346,7 @@ TEST_F(LiveRangesTest, Loop2) {
}
TEST_F(LiveRangesTest, CFG4) {
+ TEST_DISABLED_FOR_RISCV64();
/*
* Test the following snippet:
* var a = 0;
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 0b421cf9e6..6af07aea4e 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -70,6 +70,7 @@ void LivenessTest::TestCode(const std::vector<uint16_t>& data, const char* expec
}
TEST_F(LivenessTest, CFG1) {
+ TEST_DISABLED_FOR_RISCV64();
const char* expected =
"Block 0\n"
" live in: (0)\n"
@@ -93,6 +94,7 @@ TEST_F(LivenessTest, CFG1) {
}
TEST_F(LivenessTest, CFG2) {
+ TEST_DISABLED_FOR_RISCV64();
const char* expected =
"Block 0\n"
" live in: (0)\n"
@@ -115,6 +117,7 @@ TEST_F(LivenessTest, CFG2) {
}
TEST_F(LivenessTest, CFG3) {
+ TEST_DISABLED_FOR_RISCV64();
const char* expected =
"Block 0\n" // entry block
" live in: (000)\n"
@@ -144,6 +147,7 @@ TEST_F(LivenessTest, CFG3) {
}
TEST_F(LivenessTest, CFG4) {
+ TEST_DISABLED_FOR_RISCV64();
// var a;
// if (0 == 0) {
// a = 5;
@@ -192,6 +196,7 @@ TEST_F(LivenessTest, CFG4) {
}
TEST_F(LivenessTest, CFG5) {
+ TEST_DISABLED_FOR_RISCV64();
// var a = 0;
// if (0 == 0) {
// } else {
@@ -237,6 +242,7 @@ TEST_F(LivenessTest, CFG5) {
}
TEST_F(LivenessTest, Loop1) {
+ TEST_DISABLED_FOR_RISCV64();
// Simple loop with one preheader and one back edge.
// var a = 0;
// while (a == a) {
@@ -283,6 +289,7 @@ TEST_F(LivenessTest, Loop1) {
}
TEST_F(LivenessTest, Loop3) {
+ TEST_DISABLED_FOR_RISCV64();
// Test that the returned value stays live in a preceding loop.
// var a = 0;
// while (a == a) {
@@ -330,6 +337,7 @@ TEST_F(LivenessTest, Loop3) {
TEST_F(LivenessTest, Loop4) {
+ TEST_DISABLED_FOR_RISCV64();
// Make sure we support a preheader of a loop not being the first predecessor
// in the predecessor list of the header.
// var a = 0;
@@ -382,6 +390,7 @@ TEST_F(LivenessTest, Loop4) {
}
TEST_F(LivenessTest, Loop5) {
+ TEST_DISABLED_FOR_RISCV64();
// Make sure we create a preheader of a loop when a header originally has two
// incoming blocks and one back edge.
// Bitsets are made of:
@@ -438,6 +447,7 @@ TEST_F(LivenessTest, Loop5) {
}
TEST_F(LivenessTest, Loop6) {
+ TEST_DISABLED_FOR_RISCV64();
// Bitsets are made of:
// (constant0, constant4, constant5, phi in block 2)
const char* expected =
@@ -489,6 +499,7 @@ TEST_F(LivenessTest, Loop6) {
TEST_F(LivenessTest, Loop7) {
+ TEST_DISABLED_FOR_RISCV64();
// Bitsets are made of:
// (constant0, constant4, constant5, phi in block 2, phi in block 6)
const char* expected =
@@ -543,6 +554,7 @@ TEST_F(LivenessTest, Loop7) {
}
TEST_F(LivenessTest, Loop8) {
+ TEST_DISABLED_FOR_RISCV64();
// var a = 0;
// while (a == a) {
// a = a + a;
diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc
index f1c50ac03c..474c3bd92f 100644
--- a/compiler/optimizing/load_store_analysis.cc
+++ b/compiler/optimizing/load_store_analysis.cc
@@ -41,7 +41,7 @@ static bool CanBinaryOpAndIndexAlias(const HBinaryOperation* idx1,
// We currently only support Add and Sub operations.
return true;
}
- if (idx1->AsBinaryOperation()->GetLeastConstantLeft() != idx2) {
+ if (idx1->GetLeastConstantLeft() != idx2) {
// Cannot analyze [i+CONST1] and [j].
return true;
}
@@ -51,9 +51,9 @@ static bool CanBinaryOpAndIndexAlias(const HBinaryOperation* idx1,
// Since 'i' are the same in [i+CONST] and [i],
// further compare [CONST] and [0].
- int64_t l1 = idx1->IsAdd() ?
- idx1->GetConstantRight()->AsIntConstant()->GetValue() :
- -idx1->GetConstantRight()->AsIntConstant()->GetValue();
+ int64_t l1 = idx1->IsAdd()
+ ? idx1->GetConstantRight()->AsIntConstant()->GetValue()
+ : -idx1->GetConstantRight()->AsIntConstant()->GetValue();
int64_t l2 = 0;
int64_t h1 = l1 + (vector_length1 - 1);
int64_t h2 = l2 + (vector_length2 - 1);
@@ -68,8 +68,7 @@ static bool CanBinaryOpsAlias(const HBinaryOperation* idx1,
// We currently only support Add and Sub operations.
return true;
}
- if (idx1->AsBinaryOperation()->GetLeastConstantLeft() !=
- idx2->AsBinaryOperation()->GetLeastConstantLeft()) {
+ if (idx1->GetLeastConstantLeft() != idx2->GetLeastConstantLeft()) {
// Cannot analyze [i+CONST1] and [j+CONST2].
return true;
}
@@ -80,54 +79,17 @@ static bool CanBinaryOpsAlias(const HBinaryOperation* idx1,
// Since 'i' are the same in [i+CONST1] and [i+CONST2],
// further compare [CONST1] and [CONST2].
- int64_t l1 = idx1->IsAdd() ?
- idx1->GetConstantRight()->AsIntConstant()->GetValue() :
- -idx1->GetConstantRight()->AsIntConstant()->GetValue();
- int64_t l2 = idx2->IsAdd() ?
- idx2->GetConstantRight()->AsIntConstant()->GetValue() :
- -idx2->GetConstantRight()->AsIntConstant()->GetValue();
+ int64_t l1 = idx1->IsAdd()
+ ? idx1->GetConstantRight()->AsIntConstant()->GetValue()
+ : -idx1->GetConstantRight()->AsIntConstant()->GetValue();
+ int64_t l2 = idx2->IsAdd()
+ ? idx2->GetConstantRight()->AsIntConstant()->GetValue()
+ : -idx2->GetConstantRight()->AsIntConstant()->GetValue();
int64_t h1 = l1 + (vector_length1 - 1);
int64_t h2 = l2 + (vector_length2 - 1);
return CanIntegerRangesOverlap(l1, h1, l2, h2);
}
-// Make sure we mark any writes/potential writes to heap-locations within partially
-// escaped values as escaping.
-void ReferenceInfo::PrunePartialEscapeWrites() {
- DCHECK(subgraph_ != nullptr);
- if (!subgraph_->IsValid()) {
- // All paths escape.
- return;
- }
- HGraph* graph = reference_->GetBlock()->GetGraph();
- ArenaBitVector additional_exclusions(
- allocator_, graph->GetBlocks().size(), false, kArenaAllocLSA);
- for (const HUseListNode<HInstruction*>& use : reference_->GetUses()) {
- const HInstruction* user = use.GetUser();
- if (!additional_exclusions.IsBitSet(user->GetBlock()->GetBlockId()) &&
- subgraph_->ContainsBlock(user->GetBlock()) &&
- (user->IsUnresolvedInstanceFieldSet() || user->IsUnresolvedStaticFieldSet() ||
- user->IsInstanceFieldSet() || user->IsStaticFieldSet() || user->IsArraySet()) &&
- (reference_ == user->InputAt(0)) &&
- std::any_of(subgraph_->UnreachableBlocks().begin(),
- subgraph_->UnreachableBlocks().end(),
- [&](const HBasicBlock* excluded) -> bool {
- return reference_->GetBlock()->GetGraph()->PathBetween(excluded,
- user->GetBlock());
- })) {
- // This object had memory written to it somewhere, if it escaped along
- // some paths prior to the current block this write also counts as an
- // escape.
- additional_exclusions.SetBit(user->GetBlock()->GetBlockId());
- }
- }
- if (UNLIKELY(additional_exclusions.IsAnyBitSet())) {
- for (uint32_t exc : additional_exclusions.Indexes()) {
- subgraph_->RemoveBlock(graph->GetBlocks()[exc]);
- }
- }
-}
-
bool HeapLocationCollector::InstructionEligibleForLSERemoval(HInstruction* inst) const {
if (inst->IsNewInstance()) {
return !inst->AsNewInstance()->NeedsChecks();
@@ -149,37 +111,6 @@ bool HeapLocationCollector::InstructionEligibleForLSERemoval(HInstruction* inst)
}
}
-void ReferenceInfo::CollectPartialEscapes(HGraph* graph) {
- ScopedArenaAllocator saa(graph->GetArenaStack());
- ArenaBitVector seen_instructions(&saa, graph->GetCurrentInstructionId(), false, kArenaAllocLSA);
- // Get regular escapes.
- ScopedArenaVector<HInstruction*> additional_escape_vectors(saa.Adapter(kArenaAllocLSA));
- LambdaEscapeVisitor scan_instructions([&](HInstruction* escape) -> bool {
- HandleEscape(escape);
- // LSE can't track heap-locations through Phi and Select instructions so we
- // need to assume all escapes from these are escapes for the base reference.
- if ((escape->IsPhi() || escape->IsSelect()) && !seen_instructions.IsBitSet(escape->GetId())) {
- seen_instructions.SetBit(escape->GetId());
- additional_escape_vectors.push_back(escape);
- }
- return true;
- });
- additional_escape_vectors.push_back(reference_);
- while (!additional_escape_vectors.empty()) {
- HInstruction* ref = additional_escape_vectors.back();
- additional_escape_vectors.pop_back();
- DCHECK(ref == reference_ || ref->IsPhi() || ref->IsSelect()) << *ref;
- VisitEscapes(ref, scan_instructions);
- }
-
- // Mark irreducible loop headers as escaping since they cannot be tracked through.
- for (HBasicBlock* blk : graph->GetActiveBlocks()) {
- if (blk->IsLoopHeader() && blk->GetLoopInformation()->IsIrreducible()) {
- HandleEscape(blk);
- }
- }
-}
-
void HeapLocationCollector::DumpReferenceStats(OptimizingCompilerStats* stats) {
if (stats == nullptr) {
return;
@@ -197,14 +128,6 @@ void HeapLocationCollector::DumpReferenceStats(OptimizingCompilerStats* stats) {
MaybeRecordStat(stats, MethodCompilationStat::kFullLSEPossible);
}
}
- // TODO This is an estimate of the number of allocations we will be able
- // to (partially) remove. As additional work is done this can be refined.
- if (ri->IsPartialSingleton() && instruction->IsNewInstance() &&
- ri->GetNoEscapeSubgraph()->ContainsBlock(instruction->GetBlock()) &&
- !ri->GetNoEscapeSubgraph()->GetExcludedCohorts().empty() &&
- InstructionEligibleForLSERemoval(instruction)) {
- MaybeRecordStat(stats, MethodCompilationStat::kPartialLSEPossible);
- }
}
}
@@ -269,6 +192,13 @@ bool HeapLocationCollector::CanArrayElementsAlias(const HInstruction* idx1,
}
bool LoadStoreAnalysis::Run() {
+ // Currently load_store analysis can't handle predicated load/stores; specifically pairs of
+ // memory operations with different predicates.
+ // TODO: support predicated SIMD.
+ if (graph_->HasPredicatedSIMD()) {
+ return false;
+ }
+
for (HBasicBlock* block : graph_->GetReversePostOrder()) {
heap_location_collector_.VisitBasicBlock(block);
}
diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h
index c46a5b9cc1..4a630ddf8f 100644
--- a/compiler/optimizing/load_store_analysis.h
+++ b/compiler/optimizing/load_store_analysis.h
@@ -25,65 +25,26 @@
#include "base/scoped_arena_containers.h"
#include "base/stl_util.h"
#include "escape.h"
-#include "execution_subgraph.h"
#include "nodes.h"
#include "optimizing/optimizing_compiler_stats.h"
namespace art HIDDEN {
-enum class LoadStoreAnalysisType {
- kBasic,
- kNoPredicatedInstructions,
- kFull,
-};
-
// A ReferenceInfo contains additional info about a reference such as
// whether it's a singleton, returned, etc.
class ReferenceInfo : public DeletableArenaObject<kArenaAllocLSA> {
public:
- ReferenceInfo(HInstruction* reference,
- ScopedArenaAllocator* allocator,
- size_t pos,
- LoadStoreAnalysisType elimination_type)
+ ReferenceInfo(HInstruction* reference, size_t pos)
: reference_(reference),
position_(pos),
is_singleton_(true),
is_singleton_and_not_returned_(true),
- is_singleton_and_not_deopt_visible_(true),
- allocator_(allocator),
- subgraph_(nullptr) {
- // TODO We can do this in one pass.
- // TODO NewArray is possible but will need to get a handle on how to deal with the dynamic loads
- // for now just ignore it.
- bool can_be_partial = elimination_type != LoadStoreAnalysisType::kBasic &&
- (/* reference_->IsNewArray() || */ reference_->IsNewInstance());
- if (can_be_partial) {
- subgraph_.reset(
- new (allocator) ExecutionSubgraph(reference->GetBlock()->GetGraph(), allocator));
- CollectPartialEscapes(reference_->GetBlock()->GetGraph());
- }
+ is_singleton_and_not_deopt_visible_(true) {
CalculateEscape(reference_,
nullptr,
&is_singleton_,
&is_singleton_and_not_returned_,
&is_singleton_and_not_deopt_visible_);
- if (can_be_partial) {
- if (elimination_type == LoadStoreAnalysisType::kNoPredicatedInstructions) {
- // This is to mark writes to partially escaped values as also part of the escaped subset.
- // TODO We can avoid this if we have a 'ConditionalWrite' instruction. Will require testing
- // to see if the additional branches are worth it.
- PrunePartialEscapeWrites();
- }
- DCHECK(subgraph_ != nullptr);
- subgraph_->Finalize();
- } else {
- DCHECK(subgraph_ == nullptr);
- }
- }
-
- const ExecutionSubgraph* GetNoEscapeSubgraph() const {
- DCHECK(IsPartialSingleton());
- return subgraph_.get();
}
HInstruction* GetReference() const {
@@ -101,16 +62,6 @@ class ReferenceInfo : public DeletableArenaObject<kArenaAllocLSA> {
return is_singleton_;
}
- // This is a singleton and there are paths that don't escape the method
- bool IsPartialSingleton() const {
- auto ref = GetReference();
- // TODO NewArray is possible but will need to get a handle on how to deal with the dynamic loads
- // for now just ignore it.
- return (/* ref->IsNewArray() || */ ref->IsNewInstance()) &&
- subgraph_ != nullptr &&
- subgraph_->IsValid();
- }
-
// Returns true if reference_ is a singleton and not returned to the caller or
// used as an environment local of an HDeoptimize instruction.
// The allocation and stores into reference_ may be eliminated for such cases.
@@ -126,19 +77,6 @@ class ReferenceInfo : public DeletableArenaObject<kArenaAllocLSA> {
}
private:
- void CollectPartialEscapes(HGraph* graph);
- void HandleEscape(HBasicBlock* escape) {
- DCHECK(subgraph_ != nullptr);
- subgraph_->RemoveBlock(escape);
- }
- void HandleEscape(HInstruction* escape) {
- HandleEscape(escape->GetBlock());
- }
-
- // Make sure we mark any writes/potential writes to heap-locations within partially
- // escaped values as escaping.
- void PrunePartialEscapeWrites();
-
HInstruction* const reference_;
const size_t position_; // position in HeapLocationCollector's ref_info_array_.
@@ -149,10 +87,6 @@ class ReferenceInfo : public DeletableArenaObject<kArenaAllocLSA> {
// Is singleton and not used as an environment local of HDeoptimize.
bool is_singleton_and_not_deopt_visible_;
- ScopedArenaAllocator* allocator_;
-
- std::unique_ptr<ExecutionSubgraph> subgraph_;
-
DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
};
@@ -249,16 +183,13 @@ class HeapLocationCollector : public HGraphVisitor {
// aliasing matrix of 8 heap locations.
static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32;
- HeapLocationCollector(HGraph* graph,
- ScopedArenaAllocator* allocator,
- LoadStoreAnalysisType lse_type)
+ HeapLocationCollector(HGraph* graph, ScopedArenaAllocator* allocator)
: HGraphVisitor(graph),
allocator_(allocator),
ref_info_array_(allocator->Adapter(kArenaAllocLSA)),
heap_locations_(allocator->Adapter(kArenaAllocLSA)),
aliasing_matrix_(allocator, kInitialAliasingMatrixBitVectorSize, true, kArenaAllocLSA),
- has_heap_stores_(false),
- lse_type_(lse_type) {
+ has_heap_stores_(false) {
aliasing_matrix_.ClearAllBits();
}
@@ -272,12 +203,6 @@ class HeapLocationCollector : public HGraphVisitor {
ref_info_array_.clear();
}
- size_t CountPartialSingletons() const {
- return std::count_if(ref_info_array_.begin(),
- ref_info_array_.end(),
- [](ReferenceInfo* ri) { return ri->IsPartialSingleton(); });
- }
-
size_t GetNumberOfHeapLocations() const {
return heap_locations_.size();
}
@@ -507,7 +432,7 @@ class HeapLocationCollector : public HGraphVisitor {
ReferenceInfo* ref_info = FindReferenceInfoOf(instruction);
if (ref_info == nullptr) {
size_t pos = ref_info_array_.size();
- ref_info = new (allocator_) ReferenceInfo(instruction, allocator_, pos, lse_type_);
+ ref_info = new (allocator_) ReferenceInfo(instruction, pos);
ref_info_array_.push_back(ref_info);
}
return ref_info;
@@ -566,10 +491,6 @@ class HeapLocationCollector : public HGraphVisitor {
is_vec_op);
}
- void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* instruction) override {
- VisitFieldAccess(instruction->GetTarget(), instruction->GetFieldInfo());
- CreateReferenceInfoForReferenceType(instruction);
- }
void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
CreateReferenceInfoForReferenceType(instruction);
@@ -610,6 +531,7 @@ class HeapLocationCollector : public HGraphVisitor {
}
void VisitVecLoad(HVecLoad* instruction) override {
+ DCHECK(!instruction->IsPredicated());
HInstruction* array = instruction->InputAt(0);
HInstruction* index = instruction->InputAt(1);
DataType::Type type = instruction->GetPackedType();
@@ -618,6 +540,7 @@ class HeapLocationCollector : public HGraphVisitor {
}
void VisitVecStore(HVecStore* instruction) override {
+ DCHECK(!instruction->IsPredicated());
HInstruction* array = instruction->InputAt(0);
HInstruction* index = instruction->InputAt(1);
DataType::Type type = instruction->GetPackedType();
@@ -643,25 +566,16 @@ class HeapLocationCollector : public HGraphVisitor {
ArenaBitVector aliasing_matrix_; // aliasing info between each pair of locations.
bool has_heap_stores_; // If there is no heap stores, LSE acts as GVN with better
// alias analysis and won't be as effective.
- LoadStoreAnalysisType lse_type_;
DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector);
};
class LoadStoreAnalysis {
public:
- // for_elimination controls whether we should keep track of escapes at a per-block level for
- // partial LSE.
explicit LoadStoreAnalysis(HGraph* graph,
OptimizingCompilerStats* stats,
- ScopedArenaAllocator* local_allocator,
- LoadStoreAnalysisType lse_type)
- : graph_(graph),
- stats_(stats),
- heap_location_collector_(
- graph,
- local_allocator,
- ExecutionSubgraph::CanAnalyse(graph_) ? lse_type : LoadStoreAnalysisType::kBasic) {}
+ ScopedArenaAllocator* local_allocator)
+ : graph_(graph), stats_(stats), heap_location_collector_(graph, local_allocator) {}
const HeapLocationCollector& GetHeapLocationCollector() const {
return heap_location_collector_;
diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc
index 865febbd31..947bf04923 100644
--- a/compiler/optimizing/load_store_analysis_test.cc
+++ b/compiler/optimizing/load_store_analysis_test.cc
@@ -27,8 +27,6 @@
#include "dex/dex_file_types.h"
#include "dex/method_reference.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
-#include "execution_subgraph.h"
-#include "execution_subgraph_test.h"
#include "gtest/gtest.h"
#include "handle.h"
#include "handle_scope.h"
@@ -52,13 +50,6 @@ class LoadStoreAnalysisTest : public CommonCompilerTest, public OptimizingUnitTe
return AdjacencyListGraph(graph_, GetAllocator(), entry_name, exit_name, adj);
}
- bool IsValidSubgraph(const ExecutionSubgraph* esg) {
- return ExecutionSubgraphTestHelper::CalculateValidity(graph_, esg);
- }
-
- bool IsValidSubgraph(const ExecutionSubgraph& esg) {
- return ExecutionSubgraphTestHelper::CalculateValidity(graph_, &esg);
- }
void CheckReachability(const AdjacencyListGraph& adj,
const std::vector<AdjacencyListGraph::Edge>& reach);
};
@@ -102,7 +93,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) {
// Test HeapLocationCollector initialization.
// Should be no heap locations, no operations on the heap.
ScopedArenaAllocator allocator(graph_->GetArenaStack());
- HeapLocationCollector heap_location_collector(graph_, &allocator, LoadStoreAnalysisType::kFull);
+ HeapLocationCollector heap_location_collector(graph_, &allocator);
ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U);
ASSERT_FALSE(heap_location_collector.HasHeapStores());
@@ -201,7 +192,7 @@ TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) {
// Test HeapLocationCollector initialization.
// Should be no heap locations, no operations on the heap.
ScopedArenaAllocator allocator(graph_->GetArenaStack());
- HeapLocationCollector heap_location_collector(graph_, &allocator, LoadStoreAnalysisType::kFull);
+ HeapLocationCollector heap_location_collector(graph_, &allocator);
ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U);
ASSERT_FALSE(heap_location_collector.HasHeapStores());
@@ -283,7 +274,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) {
body->AddInstruction(new (GetAllocator()) HReturnVoid());
ScopedArenaAllocator allocator(graph_->GetArenaStack());
- LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kBasic);
+ LoadStoreAnalysis lsa(graph_, nullptr, &allocator);
lsa.Run();
const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
@@ -451,7 +442,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayAliasingTest) {
entry->AddInstruction(vstore_i_add6_vlen2);
ScopedArenaAllocator allocator(graph_->GetArenaStack());
- LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kBasic);
+ LoadStoreAnalysis lsa(graph_, nullptr, &allocator);
lsa.Run();
const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
@@ -611,7 +602,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexCalculationOverflowTest) {
entry->AddInstruction(arr_set_8);
ScopedArenaAllocator allocator(graph_->GetArenaStack());
- LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kBasic);
+ LoadStoreAnalysis lsa(graph_, nullptr, &allocator);
lsa.Run();
const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
@@ -702,7 +693,7 @@ TEST_F(LoadStoreAnalysisTest, TestHuntOriginalRef) {
entry->AddInstruction(array_get4);
ScopedArenaAllocator allocator(graph_->GetArenaStack());
- HeapLocationCollector heap_location_collector(graph_, &allocator, LoadStoreAnalysisType::kFull);
+ HeapLocationCollector heap_location_collector(graph_, &allocator);
heap_location_collector.VisitBasicBlock(entry);
// Test that the HeapLocationCollector should be able to tell
@@ -817,756 +808,6 @@ TEST_F(LoadStoreAnalysisTest, ReachabilityTest3) {
});
}
-static bool AreExclusionsIndependent(HGraph* graph, const ExecutionSubgraph* esg) {
- auto excluded = esg->GetExcludedCohorts();
- if (excluded.size() < 2) {
- return true;
- }
- for (auto first = excluded.begin(); first != excluded.end(); ++first) {
- for (auto second = excluded.begin(); second != excluded.end(); ++second) {
- if (first == second) {
- continue;
- }
- for (const HBasicBlock* entry : first->EntryBlocks()) {
- for (const HBasicBlock* exit : second->ExitBlocks()) {
- if (graph->PathBetween(exit, entry)) {
- return false;
- }
- }
- }
- }
- }
- return true;
-}
-
-// // ENTRY
-// obj = new Obj();
-// if (parameter_value) {
-// // LEFT
-// call_func(obj);
-// } else {
-// // RIGHT
-// obj.field = 1;
-// }
-// // EXIT
-// obj.field;
-TEST_F(LoadStoreAnalysisTest, PartialEscape) {
- CreateGraph();
- AdjacencyListGraph blks(SetupFromAdjacencyList(
- "entry",
- "exit",
- { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } }));
- HBasicBlock* entry = blks.Get("entry");
- HBasicBlock* left = blks.Get("left");
- HBasicBlock* right = blks.Get("right");
- HBasicBlock* exit = blks.Get("exit");
-
- HInstruction* bool_value = new (GetAllocator())
- HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool);
- HInstruction* c0 = graph_->GetIntConstant(0);
- HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(),
- dex::TypeIndex(10),
- graph_->GetDexFile(),
- ScopedNullHandle<mirror::Class>(),
- false,
- 0,
- false);
- HInstruction* new_inst =
- new (GetAllocator()) HNewInstance(cls,
- 0,
- dex::TypeIndex(10),
- graph_->GetDexFile(),
- false,
- QuickEntrypointEnum::kQuickAllocObjectInitialized);
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(if_inst);
-
- HInstruction* call_left = new (GetAllocator())
- HInvokeStaticOrDirect(GetAllocator(),
- 1,
- DataType::Type::kVoid,
- 0,
- { nullptr, 0 },
- nullptr,
- {},
- InvokeType::kStatic,
- { nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
- !graph_->IsDebuggable());
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- call_left->AsInvoke()->SetRawInputAt(0, new_inst);
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
-
- HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst,
- c0,
- nullptr,
- DataType::Type::kInt32,
- MemberOffset(32),
- false,
- 0,
- 0,
- graph_->GetDexFile(),
- 0);
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* read_final = new (GetAllocator()) HInstanceFieldGet(new_inst,
- nullptr,
- DataType::Type::kInt32,
- MemberOffset(32),
- false,
- 0,
- 0,
- graph_->GetDexFile(),
- 0);
- exit->AddInstruction(read_final);
-
- ScopedArenaAllocator allocator(graph_->GetArenaStack());
- LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull);
- lsa.Run();
-
- const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
- ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst);
- ASSERT_TRUE(info->IsPartialSingleton());
- const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph();
-
- ASSERT_TRUE(esg->IsValid());
- ASSERT_TRUE(IsValidSubgraph(esg));
- ASSERT_TRUE(AreExclusionsIndependent(graph_, esg));
- std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(),
- esg->ReachableBlocks().end());
-
- ASSERT_EQ(contents.size(), 3u);
- ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end());
-
- ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end());
-}
-
-// // ENTRY
-// obj = new Obj();
-// if (parameter_value) {
-// // LEFT
-// call_func(obj);
-// } else {
-// // RIGHT
-// obj.field = 1;
-// }
-// // EXIT
-// obj.field2;
-TEST_F(LoadStoreAnalysisTest, PartialEscape2) {
- CreateGraph();
- AdjacencyListGraph blks(SetupFromAdjacencyList(
- "entry",
- "exit",
- { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } }));
- HBasicBlock* entry = blks.Get("entry");
- HBasicBlock* left = blks.Get("left");
- HBasicBlock* right = blks.Get("right");
- HBasicBlock* exit = blks.Get("exit");
-
- HInstruction* bool_value = new (GetAllocator())
- HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool);
- HInstruction* c0 = graph_->GetIntConstant(0);
- HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(),
- dex::TypeIndex(10),
- graph_->GetDexFile(),
- ScopedNullHandle<mirror::Class>(),
- false,
- 0,
- false);
- HInstruction* new_inst =
- new (GetAllocator()) HNewInstance(cls,
- 0,
- dex::TypeIndex(10),
- graph_->GetDexFile(),
- false,
- QuickEntrypointEnum::kQuickAllocObjectInitialized);
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(if_inst);
-
- HInstruction* call_left = new (GetAllocator())
- HInvokeStaticOrDirect(GetAllocator(),
- 1,
- DataType::Type::kVoid,
- 0,
- { nullptr, 0 },
- nullptr,
- {},
- InvokeType::kStatic,
- { nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
- !graph_->IsDebuggable());
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- call_left->AsInvoke()->SetRawInputAt(0, new_inst);
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
-
- HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst,
- c0,
- nullptr,
- DataType::Type::kInt32,
- MemberOffset(32),
- false,
- 0,
- 0,
- graph_->GetDexFile(),
- 0);
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* read_final = new (GetAllocator()) HInstanceFieldGet(new_inst,
- nullptr,
- DataType::Type::kInt32,
- MemberOffset(16),
- false,
- 0,
- 0,
- graph_->GetDexFile(),
- 0);
- exit->AddInstruction(read_final);
-
- ScopedArenaAllocator allocator(graph_->GetArenaStack());
- LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull);
- lsa.Run();
-
- const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
- ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst);
- ASSERT_TRUE(info->IsPartialSingleton());
- const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph();
-
- ASSERT_TRUE(esg->IsValid());
- ASSERT_TRUE(IsValidSubgraph(esg));
- ASSERT_TRUE(AreExclusionsIndependent(graph_, esg));
- std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(),
- esg->ReachableBlocks().end());
-
- ASSERT_EQ(contents.size(), 3u);
- ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end());
-
- ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end());
-}
-
-// // ENTRY
-// obj = new Obj();
-// obj.field = 10;
-// if (parameter_value) {
-// // LEFT
-// call_func(obj);
-// } else {
-// // RIGHT
-// obj.field = 20;
-// }
-// // EXIT
-// obj.field;
-TEST_F(LoadStoreAnalysisTest, PartialEscape3) {
- CreateGraph();
- AdjacencyListGraph blks(SetupFromAdjacencyList(
- "entry",
- "exit",
- { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } }));
- HBasicBlock* entry = blks.Get("entry");
- HBasicBlock* left = blks.Get("left");
- HBasicBlock* right = blks.Get("right");
- HBasicBlock* exit = blks.Get("exit");
-
- HInstruction* bool_value = new (GetAllocator())
- HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool);
- HInstruction* c10 = graph_->GetIntConstant(10);
- HInstruction* c20 = graph_->GetIntConstant(20);
- HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(),
- dex::TypeIndex(10),
- graph_->GetDexFile(),
- ScopedNullHandle<mirror::Class>(),
- false,
- 0,
- false);
- HInstruction* new_inst =
- new (GetAllocator()) HNewInstance(cls,
- 0,
- dex::TypeIndex(10),
- graph_->GetDexFile(),
- false,
- QuickEntrypointEnum::kQuickAllocObjectInitialized);
-
- HInstruction* write_entry = new (GetAllocator()) HInstanceFieldSet(new_inst,
- c10,
- nullptr,
- DataType::Type::kInt32,
- MemberOffset(32),
- false,
- 0,
- 0,
- graph_->GetDexFile(),
- 0);
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_entry);
- entry->AddInstruction(if_inst);
-
- HInstruction* call_left = new (GetAllocator())
- HInvokeStaticOrDirect(GetAllocator(),
- 1,
- DataType::Type::kVoid,
- 0,
- { nullptr, 0 },
- nullptr,
- {},
- InvokeType::kStatic,
- { nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
- !graph_->IsDebuggable());
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- call_left->AsInvoke()->SetRawInputAt(0, new_inst);
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
-
- HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst,
- c20,
- nullptr,
- DataType::Type::kInt32,
- MemberOffset(32),
- false,
- 0,
- 0,
- graph_->GetDexFile(),
- 0);
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* read_final = new (GetAllocator()) HInstanceFieldGet(new_inst,
- nullptr,
- DataType::Type::kInt32,
- MemberOffset(32),
- false,
- 0,
- 0,
- graph_->GetDexFile(),
- 0);
- exit->AddInstruction(read_final);
-
- ScopedArenaAllocator allocator(graph_->GetArenaStack());
- LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull);
- lsa.Run();
-
- const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
- ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst);
- ASSERT_TRUE(info->IsPartialSingleton());
- const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph();
-
- ASSERT_TRUE(esg->IsValid());
- ASSERT_TRUE(IsValidSubgraph(esg));
- ASSERT_TRUE(AreExclusionsIndependent(graph_, esg));
- std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(),
- esg->ReachableBlocks().end());
-
- ASSERT_EQ(contents.size(), 3u);
- ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end());
-
- ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end());
-}
-
-// For simplicity Partial LSE considers check-casts to escape. It means we don't
-// need to worry about inserting throws.
-// // ENTRY
-// obj = new Obj();
-// obj.field = 10;
-// if (parameter_value) {
-// // LEFT
-// (Foo)obj;
-// } else {
-// // RIGHT
-// obj.field = 20;
-// }
-// // EXIT
-// obj.field;
-TEST_F(LoadStoreAnalysisTest, PartialEscape4) {
- CreateGraph();
- AdjacencyListGraph blks(SetupFromAdjacencyList(
- "entry",
- "exit",
- { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } }));
- HBasicBlock* entry = blks.Get("entry");
- HBasicBlock* left = blks.Get("left");
- HBasicBlock* right = blks.Get("right");
- HBasicBlock* exit = blks.Get("exit");
-
- HInstruction* bool_value = new (GetAllocator())
- HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool);
- HInstruction* c10 = graph_->GetIntConstant(10);
- HInstruction* c20 = graph_->GetIntConstant(20);
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
-
- HInstruction* write_entry = MakeIFieldSet(new_inst, c10, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_entry);
- entry->AddInstruction(if_inst);
-
- ScopedNullHandle<mirror::Class> null_klass_;
- HInstruction* cls2 = MakeClassLoad();
- HInstruction* check_cast = new (GetAllocator()) HCheckCast(
- new_inst, cls2, TypeCheckKind::kExactCheck, null_klass_, 0, GetAllocator(), nullptr, nullptr);
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(cls2);
- left->AddInstruction(check_cast);
- left->AddInstruction(goto_left);
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c20, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* read_final = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- exit->AddInstruction(read_final);
-
- ScopedArenaAllocator allocator(graph_->GetArenaStack());
- LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull);
- lsa.Run();
-
- const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
- ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst);
- ASSERT_TRUE(info->IsPartialSingleton());
- const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph();
-
- ASSERT_TRUE(esg->IsValid());
- ASSERT_TRUE(IsValidSubgraph(esg));
- ASSERT_TRUE(AreExclusionsIndependent(graph_, esg));
- std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(),
- esg->ReachableBlocks().end());
-
- ASSERT_EQ(contents.size(), 3u);
- ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end());
-
- ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end());
-}
-
-// For simplicity Partial LSE considers instance-ofs with bitvectors to escape.
-// // ENTRY
-// obj = new Obj();
-// obj.field = 10;
-// if (parameter_value) {
-// // LEFT
-// obj instanceof /*bitvector*/ Foo;
-// } else {
-// // RIGHT
-// obj.field = 20;
-// }
-// // EXIT
-// obj.field;
-TEST_F(LoadStoreAnalysisTest, PartialEscape5) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList(
- "entry",
- "exit",
- { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } }));
- HBasicBlock* entry = blks.Get("entry");
- HBasicBlock* left = blks.Get("left");
- HBasicBlock* right = blks.Get("right");
- HBasicBlock* exit = blks.Get("exit");
-
- HInstruction* bool_value = new (GetAllocator())
- HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool);
- HInstruction* c10 = graph_->GetIntConstant(10);
- HInstruction* c20 = graph_->GetIntConstant(20);
- HIntConstant* bs1 = graph_->GetIntConstant(0xffff);
- HIntConstant* bs2 = graph_->GetIntConstant(0x00ff);
- HInstruction* cls = MakeClassLoad();
- HInstruction* null_const = graph_->GetNullConstant();
- HInstruction* new_inst = MakeNewInstance(cls);
-
- HInstruction* write_entry = MakeIFieldSet(new_inst, c10, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_entry);
- entry->AddInstruction(if_inst);
-
- ScopedNullHandle<mirror::Class> null_klass_;
- HInstruction* instanceof = new (GetAllocator()) HInstanceOf(new_inst,
- null_const,
- TypeCheckKind::kBitstringCheck,
- null_klass_,
- 0,
- GetAllocator(),
- bs1,
- bs2);
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(instanceof);
- left->AddInstruction(goto_left);
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c20, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* read_final = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- exit->AddInstruction(read_final);
-
- ScopedArenaAllocator allocator(graph_->GetArenaStack());
- LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull);
- lsa.Run();
-
- const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
- ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst);
- ASSERT_TRUE(info->IsPartialSingleton());
- const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph();
-
- ASSERT_TRUE(esg->IsValid());
- ASSERT_TRUE(IsValidSubgraph(esg));
- ASSERT_TRUE(AreExclusionsIndependent(graph_, esg));
- std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(),
- esg->ReachableBlocks().end());
-
- ASSERT_EQ(contents.size(), 3u);
- ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end());
-
- ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end());
- ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end());
-}
-
-// before we had predicated-set we needed to be able to remove the store as
-// well. This test makes sure that still works.
-// // ENTRY
-// obj = new Obj();
-// if (parameter_value) {
-// // LEFT
-// call_func(obj);
-// } else {
-// // RIGHT
-// obj.f1 = 0;
-// }
-// // EXIT
-// // call_func prevents the elimination of this store.
-// obj.f2 = 0;
-TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacentNoPredicated) {
- CreateGraph();
- AdjacencyListGraph blks(SetupFromAdjacencyList(
- "entry",
- "exit",
- {{"entry", "left"}, {"entry", "right"}, {"left", "exit"}, {"right", "exit"}}));
- HBasicBlock* entry = blks.Get("entry");
- HBasicBlock* left = blks.Get("left");
- HBasicBlock* right = blks.Get("right");
- HBasicBlock* exit = blks.Get("exit");
-
- HInstruction* bool_value = new (GetAllocator())
- HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool);
- HInstruction* c0 = graph_->GetIntConstant(0);
- HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(),
- dex::TypeIndex(10),
- graph_->GetDexFile(),
- ScopedNullHandle<mirror::Class>(),
- false,
- 0,
- false);
- HInstruction* new_inst =
- new (GetAllocator()) HNewInstance(cls,
- 0,
- dex::TypeIndex(10),
- graph_->GetDexFile(),
- false,
- QuickEntrypointEnum::kQuickAllocObjectInitialized);
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(if_inst);
-
- HInstruction* call_left = new (GetAllocator())
- HInvokeStaticOrDirect(GetAllocator(),
- 1,
- DataType::Type::kVoid,
- 0,
- {nullptr, 0},
- nullptr,
- {},
- InvokeType::kStatic,
- {nullptr, 0},
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
- !graph_->IsDebuggable());
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- call_left->AsInvoke()->SetRawInputAt(0, new_inst);
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
-
- HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst,
- c0,
- nullptr,
- DataType::Type::kInt32,
- MemberOffset(32),
- false,
- 0,
- 0,
- graph_->GetDexFile(),
- 0);
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* write_final = new (GetAllocator()) HInstanceFieldSet(new_inst,
- c0,
- nullptr,
- DataType::Type::kInt32,
- MemberOffset(16),
- false,
- 0,
- 0,
- graph_->GetDexFile(),
- 0);
- exit->AddInstruction(write_final);
-
- ScopedArenaAllocator allocator(graph_->GetArenaStack());
- graph_->ClearDominanceInformation();
- graph_->BuildDominatorTree();
- LoadStoreAnalysis lsa(
- graph_, nullptr, &allocator, LoadStoreAnalysisType::kNoPredicatedInstructions);
- lsa.Run();
-
- const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
- ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst);
- ASSERT_FALSE(info->IsPartialSingleton());
-}
-
-// With predicated-set we can (partially) remove the store as well.
-// // ENTRY
-// obj = new Obj();
-// if (parameter_value) {
-// // LEFT
-// call_func(obj);
-// } else {
-// // RIGHT
-// obj.f1 = 0;
-// }
-// // EXIT
-// // call_func prevents the elimination of this store.
-// obj.f2 = 0;
-TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacent) {
- CreateGraph();
- AdjacencyListGraph blks(SetupFromAdjacencyList(
- "entry",
- "exit",
- { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } }));
- HBasicBlock* entry = blks.Get("entry");
- HBasicBlock* left = blks.Get("left");
- HBasicBlock* right = blks.Get("right");
- HBasicBlock* exit = blks.Get("exit");
-
- HInstruction* bool_value = new (GetAllocator())
- HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool);
- HInstruction* c0 = graph_->GetIntConstant(0);
- HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(),
- dex::TypeIndex(10),
- graph_->GetDexFile(),
- ScopedNullHandle<mirror::Class>(),
- false,
- 0,
- false);
- HInstruction* new_inst =
- new (GetAllocator()) HNewInstance(cls,
- 0,
- dex::TypeIndex(10),
- graph_->GetDexFile(),
- false,
- QuickEntrypointEnum::kQuickAllocObjectInitialized);
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(if_inst);
-
- HInstruction* call_left = new (GetAllocator())
- HInvokeStaticOrDirect(GetAllocator(),
- 1,
- DataType::Type::kVoid,
- 0,
- { nullptr, 0 },
- nullptr,
- {},
- InvokeType::kStatic,
- { nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
- !graph_->IsDebuggable());
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- call_left->AsInvoke()->SetRawInputAt(0, new_inst);
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
-
- HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst,
- c0,
- nullptr,
- DataType::Type::kInt32,
- MemberOffset(32),
- false,
- 0,
- 0,
- graph_->GetDexFile(),
- 0);
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* write_final = new (GetAllocator()) HInstanceFieldSet(new_inst,
- c0,
- nullptr,
- DataType::Type::kInt32,
- MemberOffset(16),
- false,
- 0,
- 0,
- graph_->GetDexFile(),
- 0);
- exit->AddInstruction(write_final);
-
- ScopedArenaAllocator allocator(graph_->GetArenaStack());
- graph_->ClearDominanceInformation();
- graph_->BuildDominatorTree();
- LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull);
- lsa.Run();
-
- const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
- ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst);
- ASSERT_TRUE(info->IsPartialSingleton());
- const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph();
-
- EXPECT_TRUE(esg->IsValid()) << esg->GetExcludedCohorts();
- EXPECT_TRUE(IsValidSubgraph(esg));
- std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(),
- esg->ReachableBlocks().end());
-
- EXPECT_EQ(contents.size(), 3u);
- EXPECT_TRUE(contents.find(blks.Get("left")) == contents.end());
- EXPECT_FALSE(contents.find(blks.Get("right")) == contents.end());
- EXPECT_FALSE(contents.find(blks.Get("entry")) == contents.end());
- EXPECT_FALSE(contents.find(blks.Get("exit")) == contents.end());
-}
-
// // ENTRY
// obj = new Obj();
// if (parameter_value) {
@@ -1626,7 +867,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) {
HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
!graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
- call_left->AsInvoke()->SetRawInputAt(0, new_inst);
+ call_left->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
left->AddInstruction(goto_left);
@@ -1653,7 +894,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) {
graph_->GetDexFile(),
0);
HInstruction* goto_right = new (GetAllocator()) HGoto();
- call_right->AsInvoke()->SetRawInputAt(0, new_inst);
+ call_right->SetRawInputAt(0, new_inst);
right->AddInstruction(write_right);
right->AddInstruction(call_right);
right->AddInstruction(goto_right);
@@ -1670,12 +911,12 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) {
exit->AddInstruction(read_final);
ScopedArenaAllocator allocator(graph_->GetArenaStack());
- LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull);
+ LoadStoreAnalysis lsa(graph_, nullptr, &allocator);
lsa.Run();
const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst);
- ASSERT_FALSE(info->IsPartialSingleton());
+ ASSERT_FALSE(info->IsSingleton());
}
// // ENTRY
@@ -1725,12 +966,12 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape2) {
exit->AddInstruction(return_final);
ScopedArenaAllocator allocator(graph_->GetArenaStack());
- LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull);
+ LoadStoreAnalysis lsa(graph_, nullptr, &allocator);
lsa.Run();
const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst);
- ASSERT_FALSE(info->IsPartialSingleton());
+ ASSERT_TRUE(info->IsSingletonAndNonRemovable());
}
// // ENTRY
@@ -1813,7 +1054,7 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) {
HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
!graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
- call_left->AsInvoke()->SetRawInputAt(0, new_inst);
+ call_left->SetRawInputAt(0, new_inst);
high_left->AddInstruction(call_left);
high_left->AddInstruction(goto_left);
@@ -1870,7 +1111,7 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) {
HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
!graph_->IsDebuggable());
HInstruction* goto_low_left = new (GetAllocator()) HGoto();
- call_low_left->AsInvoke()->SetRawInputAt(0, new_inst);
+ call_low_left->SetRawInputAt(0, new_inst);
low_left->AddInstruction(call_low_left);
low_left->AddInstruction(goto_low_left);
@@ -1900,12 +1141,12 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) {
exit->AddInstruction(read_final);
ScopedArenaAllocator allocator(graph_->GetArenaStack());
- LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull);
+ LoadStoreAnalysis lsa(graph_, nullptr, &allocator);
lsa.Run();
const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst);
- ASSERT_FALSE(info->IsPartialSingleton());
+ ASSERT_FALSE(info->IsSingleton());
}
// // ENTRY
@@ -2030,7 +1271,7 @@ TEST_F(LoadStoreAnalysisTest, PartialPhiPropagation1) {
HInstruction* goto_left_merge = new (GetAllocator()) HGoto();
left_phi->SetRawInputAt(0, obj_param);
left_phi->SetRawInputAt(1, new_inst);
- call_left->AsInvoke()->SetRawInputAt(0, left_phi);
+ call_left->SetRawInputAt(0, left_phi);
left_merge->AddPhi(left_phi);
left_merge->AddInstruction(call_left);
left_merge->AddInstruction(goto_left_merge);
@@ -2065,11 +1306,11 @@ TEST_F(LoadStoreAnalysisTest, PartialPhiPropagation1) {
graph_->BuildDominatorTree();
ScopedArenaAllocator allocator(graph_->GetArenaStack());
- LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull);
+ LoadStoreAnalysis lsa(graph_, nullptr, &allocator);
lsa.Run();
const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst);
- ASSERT_FALSE(info->IsPartialSingleton());
+ ASSERT_FALSE(info->IsSingleton());
}
} // namespace art
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 9cabb12a9f..2e5ee84d76 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -33,13 +33,11 @@
#include "base/scoped_arena_containers.h"
#include "base/transform_iterator.h"
#include "escape.h"
-#include "execution_subgraph.h"
#include "handle.h"
#include "load_store_analysis.h"
#include "mirror/class_loader.h"
#include "mirror/dex_cache.h"
#include "nodes.h"
-#include "optimizing/execution_subgraph.h"
#include "optimizing_compiler_stats.h"
#include "reference_type_propagation.h"
#include "side_effects_analysis.h"
@@ -94,9 +92,7 @@
* to maintain the validity of all heap locations during the optimization
* phase, we only record substitutes at this phase and the real elimination
* is delayed till the end of LSE. Loads that require a loop Phi placeholder
- * replacement are recorded for processing later. We also keep track of the
- * heap-value at the start load so that later partial-LSE can predicate the
- * load.
+ * replacement are recorded for processing later.
* - If the instruction is a store, it updates the heap value for the heap
* location with the stored value and records the store itself so that we can
* mark it for keeping if the value becomes observable. Heap values are
@@ -240,79 +236,6 @@
* The time complexity of this phase is
* O(instructions + instruction_uses) .
*
- * 5. Partial LSE
- *
- * Move allocations closer to their escapes and remove/predicate loads and
- * stores as required.
- *
- * Partial singletons are objects which only escape from the function or have
- * multiple names along certain execution paths. In cases where we recognize
- * these partial singletons we can move the allocation and initialization
- * closer to the actual escape(s). We can then perform a simplified version of
- * LSE step 2 to determine the unescaped value of any reads performed after the
- * object may have escaped. These are used to replace these reads with
- * 'predicated-read' instructions where the value is only read if the object
- * has actually escaped. We use the existence of the object itself as the
- * marker of whether escape has occurred.
- *
- * There are several steps in this sub-pass
- *
- * 5.1 Group references
- *
- * Since all heap-locations for a single reference escape at the same time, we
- * need to group the heap-locations by reference and process them at the same
- * time.
- *
- * O(heap_locations).
- *
- * FIXME: The time complexity above assumes we can bucket the heap-locations in
- * O(1) which is not true since we just perform a linear-scan of the heap-ref
- * list. Since there are generally only a small number of heap-references which
- * are partial-singletons this is fine and lower real overhead than a hash map.
- *
- * 5.2 Generate materializations
- *
- * Once we have the references we add new 'materialization blocks' on the edges
- * where escape becomes inevitable. This information is calculated by the
- * execution-subgraphs created during load-store-analysis. We create new
- * 'materialization's in these blocks and initialize them with the value of
- * each heap-location ignoring side effects (since the object hasn't escaped
- * yet). Worst case this is the same time-complexity as step 3 since we may
- * need to materialize phis.
- *
- * O(heap_locations^2 * materialization_edges)
- *
- * 5.3 Propagate materializations
- *
- * Since we use the materialization as the marker for escape we need to
- * propagate it throughout the graph. Since the subgraph analysis considers any
- * lifetime that escapes a loop (and hence would require a loop-phi) to be
- * escaping at the loop-header we do not need to create any loop-phis to do
- * this.
- *
- * O(edges)
- *
- * NB: Currently the subgraph analysis considers all objects to have their
- * lifetimes start at the entry block. This simplifies that analysis enormously
- * but means that we cannot distinguish between an escape in a loop where the
- * lifetime does not escape the loop (in which case this pass could optimize)
- * and one where it does escape the loop (in which case the whole loop is
- * escaping). This is a shortcoming that would be good to fix at some point.
- *
- * 5.4 Propagate partial values
- *
- * We need to replace loads and stores to the partial reference with predicated
- * ones that have default non-escaping values. Again this is the same as step 3.
- *
- * O(heap_locations^2 * edges)
- *
- * 5.5 Final fixup
- *
- * Now all we need to do is replace and remove uses of the old reference with the
- * appropriate materialization.
- *
- * O(instructions + uses)
- *
* FIXME: The time complexities described above assumes that the
* HeapLocationCollector finds a heap location for an instruction in O(1)
* time but it is currently O(heap_locations); this can be fixed by adding
@@ -324,7 +247,6 @@ namespace art HIDDEN {
#define LSE_VLOG \
if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO)
-class PartialLoadStoreEliminationHelper;
class HeapRefHolder;
// Use HGraphDelegateVisitor for which all VisitInvokeXXX() delegate to VisitInvoke().
@@ -332,7 +254,6 @@ class LSEVisitor final : private HGraphDelegateVisitor {
public:
LSEVisitor(HGraph* graph,
const HeapLocationCollector& heap_location_collector,
- bool perform_partial_lse,
OptimizingCompilerStats* stats);
void Run();
@@ -615,27 +536,7 @@ class LSEVisitor final : private HGraphDelegateVisitor {
return PhiPlaceholderIndex(phi_placeholder.GetPhiPlaceholder());
}
- bool IsEscapingObject(ReferenceInfo* info, HBasicBlock* block, size_t index) {
- return !info->IsSingletonAndRemovable() &&
- !(info->IsPartialSingleton() && IsPartialNoEscape(block, index));
- }
-
- bool IsPartialNoEscape(HBasicBlock* blk, size_t idx) {
- auto* ri = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
- if (!ri->IsPartialSingleton()) {
- return false;
- }
- ArrayRef<const ExecutionSubgraph::ExcludedCohort> cohorts =
- ri->GetNoEscapeSubgraph()->GetExcludedCohorts();
- return std::none_of(cohorts.cbegin(),
- cohorts.cend(),
- [&](const ExecutionSubgraph::ExcludedCohort& ex) -> bool {
- // Make sure we haven't yet and never will escape.
- return ex.PrecedesBlock(blk) ||
- ex.ContainsBlock(blk) ||
- ex.SucceedsBlock(blk);
- });
- }
+ bool IsEscapingObject(ReferenceInfo* info) { return !info->IsSingletonAndRemovable(); }
PhiPlaceholder GetPhiPlaceholderAt(size_t off) const {
DCHECK_LT(off, num_phi_placeholders_);
@@ -652,9 +553,7 @@ class LSEVisitor final : private HGraphDelegateVisitor {
}
Value Replacement(Value value) const {
- DCHECK(value.NeedsPhi() ||
- (current_phase_ == Phase::kPartialElimination && value.IsMergedUnknown()))
- << value << " phase: " << current_phase_;
+ DCHECK(value.NeedsPhi()) << value << " phase: " << current_phase_;
Value replacement = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
DCHECK(replacement.IsUnknown() || replacement.IsInstruction());
DCHECK(replacement.IsUnknown() ||
@@ -663,35 +562,6 @@ class LSEVisitor final : private HGraphDelegateVisitor {
}
Value ReplacementOrValue(Value value) const {
- if (current_phase_ == Phase::kPartialElimination) {
- // In this phase we are materializing the default values which are used
- // only if the partial singleton did not escape, so we can replace
- // a partial unknown with the prior value.
- if (value.IsPartialUnknown()) {
- value = value.GetPriorValue().ToValue();
- }
- if ((value.IsMergedUnknown() || value.NeedsPhi()) &&
- phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
- value = phi_placeholder_replacements_[PhiPlaceholderIndex(value)];
- DCHECK(!value.IsMergedUnknown());
- DCHECK(!value.NeedsPhi());
- } else if (value.IsMergedUnknown()) {
- return Value::ForLoopPhiPlaceholder(value.GetPhiPlaceholder());
- }
- if (value.IsInstruction() && value.GetInstruction()->IsInstanceFieldGet()) {
- DCHECK_LT(static_cast<size_t>(value.GetInstruction()->GetId()),
- substitute_instructions_for_loads_.size());
- HInstruction* substitute =
- substitute_instructions_for_loads_[value.GetInstruction()->GetId()];
- if (substitute != nullptr) {
- DCHECK(substitute->IsPredicatedInstanceFieldGet());
- return Value::ForInstruction(substitute);
- }
- }
- DCHECK_IMPLIES(value.IsInstruction(),
- FindSubstitute(value.GetInstruction()) == value.GetInstruction());
- return value;
- }
if (value.NeedsPhi() && phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) {
return Replacement(value);
} else {
@@ -752,8 +622,8 @@ class LSEVisitor final : private HGraphDelegateVisitor {
HInstruction* FindSubstitute(HInstruction* instruction) const {
size_t id = static_cast<size_t>(instruction->GetId());
if (id >= substitute_instructions_for_loads_.size()) {
- // New Phi (may not be in the graph yet), default value or PredicatedInstanceFieldGet.
- DCHECK_IMPLIES(IsLoad(instruction), instruction->IsPredicatedInstanceFieldGet());
+ // New Phi (may not be in the graph yet), or default value.
+ DCHECK(!IsLoad(instruction));
return instruction;
}
HInstruction* substitute = substitute_instructions_for_loads_[id];
@@ -789,7 +659,6 @@ class LSEVisitor final : private HGraphDelegateVisitor {
static bool IsLoad(HInstruction* instruction) {
// Unresolved load is not treated as a load.
return instruction->IsInstanceFieldGet() ||
- instruction->IsPredicatedInstanceFieldGet() ||
instruction->IsStaticFieldGet() ||
instruction->IsVecLoad() ||
instruction->IsArrayGet();
@@ -818,12 +687,7 @@ class LSEVisitor final : private HGraphDelegateVisitor {
if (value.IsPureUnknown() || value.IsPartialUnknown()) {
return;
}
- if (value.IsMergedUnknown()) {
- kept_merged_unknowns_.SetBit(PhiPlaceholderIndex(value));
- phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
- return;
- }
- if (value.NeedsPhi()) {
+ if (value.IsMergedUnknown() || value.NeedsPhi()) {
phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
} else {
HInstruction* instruction = value.GetInstruction();
@@ -843,9 +707,7 @@ class LSEVisitor final : private HGraphDelegateVisitor {
// We use this function when reading a location with unknown value and
// therefore we cannot know what exact store wrote that unknown value.
// But we can have a phi placeholder here marking multiple stores to keep.
- DCHECK(
- !heap_values[i].stored_by.IsInstruction() ||
- heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo()->IsPartialSingleton());
+ DCHECK(!heap_values[i].stored_by.IsInstruction());
KeepStores(heap_values[i].stored_by);
heap_values[i].stored_by = Value::PureUnknown();
} else if (heap_location_collector_.MayAlias(i, loc_index)) {
@@ -925,7 +787,6 @@ class LSEVisitor final : private HGraphDelegateVisitor {
enum class Phase {
kLoadElimination,
kStoreElimination,
- kPartialElimination,
};
bool MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const;
@@ -958,21 +819,6 @@ class LSEVisitor final : private HGraphDelegateVisitor {
void FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder, DataType::Type type);
void FindStoresWritingOldValues();
void FinishFullLSE();
- void PrepareForPartialPhiComputation();
- // Create materialization block and materialization object for the given predecessor of entry.
- HInstruction* SetupPartialMaterialization(PartialLoadStoreEliminationHelper& helper,
- HeapRefHolder&& holder,
- size_t pred_idx,
- HBasicBlock* blk);
- // Returns the value that would be read by the 'read' instruction on
- // 'orig_new_inst' if 'orig_new_inst' has not escaped.
- HInstruction* GetPartialValueAt(HNewInstance* orig_new_inst, HInstruction* read);
- void MovePartialEscapes();
-
- void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* instruction) override {
- LOG(FATAL) << "Visited instruction " << instruction->DumpWithoutArgs()
- << " but LSE should be the only source of predicated-ifield-gets!";
- }
void HandleAcquireLoad(HInstruction* instruction) {
DCHECK((instruction->IsInstanceFieldGet() && instruction->AsInstanceFieldGet()->IsVolatile()) ||
@@ -1080,10 +926,12 @@ class LSEVisitor final : private HGraphDelegateVisitor {
}
void VisitVecLoad(HVecLoad* instruction) override {
+ DCHECK(!instruction->IsPredicated());
VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
}
void VisitVecStore(HVecStore* instruction) override {
+ DCHECK(!instruction->IsPredicated());
size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
VisitSetLocation(instruction, idx, instruction->GetValue());
}
@@ -1107,7 +955,7 @@ class LSEVisitor final : private HGraphDelegateVisitor {
// Finalizable objects always escape.
const bool finalizable_object =
reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable();
- if (!finalizable_object && !IsEscapingObject(info, block, i)) {
+ if (!finalizable_object && !IsEscapingObject(info)) {
// Check whether the reference for a store is used by an environment local of
// the HDeoptimize. If not, the singleton is not observed after deoptimization.
const HUseList<HEnvironment*>& env_uses = reference->GetEnvUses();
@@ -1131,7 +979,7 @@ class LSEVisitor final : private HGraphDelegateVisitor {
ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
- if (must_keep_stores || IsEscapingObject(ref_info, block, i)) {
+ if (must_keep_stores || IsEscapingObject(ref_info)) {
KeepStores(heap_values[i].stored_by);
heap_values[i].stored_by = Value::PureUnknown();
}
@@ -1214,30 +1062,9 @@ class LSEVisitor final : private HGraphDelegateVisitor {
heap_values_for_[instruction->GetBlock()->GetBlockId()];
for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
- HBasicBlock* blk = instruction->GetBlock();
// We don't need to do anything if the reference has not escaped at this point.
- // This is true if either we (1) never escape or (2) sometimes escape but
- // there is no possible execution where we have done so at this time. NB
- // We count being in the excluded cohort as escaping. Technically, this is
- // a bit over-conservative (since we can have multiple non-escaping calls
- // before a single escaping one) but this simplifies everything greatly.
- auto partial_singleton_did_not_escape = [](ReferenceInfo* ref_info, HBasicBlock* blk) {
- DCHECK(ref_info->IsPartialSingleton());
- if (!ref_info->GetNoEscapeSubgraph()->ContainsBlock(blk)) {
- return false;
- }
- ArrayRef<const ExecutionSubgraph::ExcludedCohort> cohorts =
- ref_info->GetNoEscapeSubgraph()->GetExcludedCohorts();
- return std::none_of(cohorts.begin(),
- cohorts.end(),
- [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
- return cohort.PrecedesBlock(blk);
- });
- };
- if (!can_throw_inside_a_try &&
- (ref_info->IsSingleton() ||
- // partial and we aren't currently escaping and we haven't escaped yet.
- (ref_info->IsPartialSingleton() && partial_singleton_did_not_escape(ref_info, blk)))) {
+ // This is true if we never escape.
+ if (!can_throw_inside_a_try && ref_info->IsSingleton()) {
// Singleton references cannot be seen by the callee.
} else {
if (can_throw || side_effects.DoesAnyRead() || side_effects.DoesAnyWrite()) {
@@ -1313,7 +1140,7 @@ class LSEVisitor final : private HGraphDelegateVisitor {
heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass());
heap_values[i].stored_by = Value::PureUnknown();
}
- } else if (inside_a_try || IsEscapingObject(info, block, i)) {
+ } else if (inside_a_try || IsEscapingObject(info)) {
// Since NewInstance can throw, we presume all previous stores could be visible.
KeepStores(heap_values[i].stored_by);
heap_values[i].stored_by = Value::PureUnknown();
@@ -1348,7 +1175,7 @@ class LSEVisitor final : private HGraphDelegateVisitor {
// Array elements are set to default heap values.
heap_values[i].value = Value::Default();
heap_values[i].stored_by = Value::PureUnknown();
- } else if (inside_a_try || IsEscapingObject(info, block, i)) {
+ } else if (inside_a_try || IsEscapingObject(info)) {
// Since NewArray can throw, we presume all previous stores could be visible.
KeepStores(heap_values[i].stored_by);
heap_values[i].stored_by = Value::PureUnknown();
@@ -1361,12 +1188,6 @@ class LSEVisitor final : private HGraphDelegateVisitor {
DCHECK(!instruction->CanThrow());
}
- bool ShouldPerformPartialLSE() const {
- return perform_partial_lse_ && !GetGraph()->IsCompilingOsr();
- }
-
- bool perform_partial_lse_;
-
const HeapLocationCollector& heap_location_collector_;
// Use local allocator for allocating memory.
@@ -1423,10 +1244,6 @@ class LSEVisitor final : private HGraphDelegateVisitor {
// The invalid heap value is used to mark Phi placeholders that cannot be replaced.
ScopedArenaVector<Value> phi_placeholder_replacements_;
- // Merged-unknowns that must have their predecessor values kept to ensure
- // partially escaped values are written
- ArenaBitVector kept_merged_unknowns_;
-
ScopedArenaVector<HInstruction*> singleton_new_instances_;
// The field infos for each heap location (if relevant).
@@ -1434,7 +1251,6 @@ class LSEVisitor final : private HGraphDelegateVisitor {
Phase current_phase_;
- friend class PartialLoadStoreEliminationHelper;
friend struct ScopedRestoreHeapValues;
friend std::ostream& operator<<(std::ostream& os, const Value& v);
@@ -1455,8 +1271,6 @@ std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase) {
return oss << "kLoadElimination";
case LSEVisitor::Phase::kStoreElimination:
return oss << "kStoreElimination";
- case LSEVisitor::Phase::kPartialElimination:
- return oss << "kPartialElimination";
}
}
@@ -1580,10 +1394,8 @@ std::ostream& operator<<(std::ostream& os, const LSEVisitor::Value& v) {
LSEVisitor::LSEVisitor(HGraph* graph,
const HeapLocationCollector& heap_location_collector,
- bool perform_partial_lse,
OptimizingCompilerStats* stats)
: HGraphDelegateVisitor(graph, stats),
- perform_partial_lse_(perform_partial_lse),
heap_location_collector_(heap_location_collector),
allocator_(graph->GetArenaStack()),
num_phi_placeholders_(GetGraph()->GetBlocks().size() *
@@ -1613,10 +1425,6 @@ LSEVisitor::LSEVisitor(HGraph* graph,
phi_placeholder_replacements_(num_phi_placeholders_,
Value::Invalid(),
allocator_.Adapter(kArenaAllocLSE)),
- kept_merged_unknowns_(&allocator_,
- /*start_bits=*/num_phi_placeholders_,
- /*expandable=*/false,
- kArenaAllocLSE),
singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)),
field_infos_(heap_location_collector_.GetNumberOfHeapLocations(),
allocator_.Adapter(kArenaAllocLSE)),
@@ -1856,8 +1664,7 @@ void LSEVisitor::MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType
Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
DCHECK(!pred_value.IsPureUnknown()) << pred_value << " block " << current_block->GetBlockId()
<< " pred: " << predecessor->GetBlockId();
- if (pred_value.NeedsNonLoopPhi() ||
- (current_phase_ == Phase::kPartialElimination && pred_value.IsMergedUnknown())) {
+ if (pred_value.NeedsNonLoopPhi()) {
// We need to process the Phi placeholder first.
work_queue.push_back(pred_value.GetPhiPlaceholder());
} else if (pred_value.IsDefault()) {
@@ -1888,12 +1695,6 @@ void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) {
RecordFieldInfo(&instruction->GetFieldInfo(), idx);
}
DCHECK(record.value.IsUnknown() || record.value.Equals(ReplacementOrValue(record.value)));
- // If we are unknown, we either come from somewhere untracked or we can reconstruct the partial
- // value.
- DCHECK(!record.value.IsPureUnknown() ||
- heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo() == nullptr ||
- !heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()->IsPartialSingleton())
- << "In " << GetGraph()->PrettyMethod() << ": " << record.value << " for " << *instruction;
intermediate_values_.insert({instruction, record.value});
loads_and_stores_.push_back({ instruction, idx });
if ((record.value.IsDefault() || record.value.NeedsNonLoopPhi()) &&
@@ -2302,9 +2103,7 @@ bool LSEVisitor::MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_inde
for (HBasicBlock* predecessor : block->GetPredecessors()) {
Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
if (value.NeedsNonLoopPhi()) {
- DCHECK(current_phase_ == Phase::kLoadElimination ||
- current_phase_ == Phase::kPartialElimination)
- << current_phase_;
+ DCHECK(current_phase_ == Phase::kLoadElimination) << current_phase_;
MaterializeNonLoopPhis(value.GetPhiPlaceholder(), type);
value = Replacement(value);
}
@@ -2765,22 +2564,9 @@ void LSEVisitor::SearchPhiPlaceholdersForKeptStores() {
work_queue.push_back(index);
}
const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
- std::optional<ArenaBitVector> not_kept_stores;
- if (stats_) {
- not_kept_stores.emplace(GetGraph()->GetAllocator(),
- kept_stores_.GetBitSizeOf(),
- false,
- ArenaAllocKind::kArenaAllocLSE);
- }
while (!work_queue.empty()) {
uint32_t cur_phi_idx = work_queue.back();
PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(cur_phi_idx);
- // Only writes to partial-escapes need to be specifically kept.
- bool is_partial_kept_merged_unknown =
- kept_merged_unknowns_.IsBitSet(cur_phi_idx) &&
- heap_location_collector_.GetHeapLocation(phi_placeholder.GetHeapLocation())
- ->GetReferenceInfo()
- ->IsPartialSingleton();
work_queue.pop_back();
size_t idx = phi_placeholder.GetHeapLocation();
HBasicBlock* block = blocks[phi_placeholder.GetBlockId()];
@@ -2800,11 +2586,6 @@ void LSEVisitor::SearchPhiPlaceholdersForKeptStores() {
if (!stored_by.IsUnknown() && (i == idx || MayAliasOnBackEdge(block, idx, i))) {
if (stored_by.NeedsPhi()) {
size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by);
- if (is_partial_kept_merged_unknown) {
- // Propagate merged-unknown keep since otherwise this might look
- // like a partial escape we can remove.
- kept_merged_unknowns_.SetBit(phi_placeholder_index);
- }
if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) {
phi_placeholders_to_search_for_kept_stores_.SetBit(phi_placeholder_index);
work_queue.push_back(phi_placeholder_index);
@@ -2815,24 +2596,12 @@ void LSEVisitor::SearchPhiPlaceholdersForKeptStores() {
DCHECK(ri != nullptr) << "No heap value for " << stored_by.GetInstruction()->DebugName()
<< " id: " << stored_by.GetInstruction()->GetId() << " block: "
<< stored_by.GetInstruction()->GetBlock()->GetBlockId();
- if (!is_partial_kept_merged_unknown && IsPartialNoEscape(predecessor, idx)) {
- if (not_kept_stores) {
- not_kept_stores->SetBit(stored_by.GetInstruction()->GetId());
- }
- } else {
- kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
- }
+ kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
}
}
}
}
}
- if (not_kept_stores) {
- // a - b := (a & ~b)
- not_kept_stores->Subtract(&kept_stores_);
- auto num_removed = not_kept_stores->NumSetBits();
- MaybeRecordStat(stats_, MethodCompilationStat::kPartialStoreRemoved, num_removed);
- }
}
void LSEVisitor::UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record) {
@@ -3022,934 +2791,8 @@ void LSEVisitor::Run() {
// 4. Replace loads and remove unnecessary stores and singleton allocations.
FinishFullLSE();
-
- // 5. Move partial escapes down and fixup with PHIs.
- current_phase_ = Phase::kPartialElimination;
- MovePartialEscapes();
}
-// Clear unknown loop-phi results. Here we'll be able to use partial-unknowns so we need to
-// retry all of them with more information about where they come from.
-void LSEVisitor::PrepareForPartialPhiComputation() {
- std::replace_if(
- phi_placeholder_replacements_.begin(),
- phi_placeholder_replacements_.end(),
- [](const Value& val) { return !val.IsDefault() && !val.IsInstruction(); },
- Value::Invalid());
-}
-
-class PartialLoadStoreEliminationHelper {
- public:
- PartialLoadStoreEliminationHelper(LSEVisitor* lse, ScopedArenaAllocator* alloc)
- : lse_(lse),
- alloc_(alloc),
- new_ref_phis_(alloc_->Adapter(kArenaAllocLSE)),
- heap_refs_(alloc_->Adapter(kArenaAllocLSE)),
- max_preds_per_block_((*std::max_element(GetGraph()->GetActiveBlocks().begin(),
- GetGraph()->GetActiveBlocks().end(),
- [](HBasicBlock* a, HBasicBlock* b) {
- return a->GetNumberOfPredecessors() <
- b->GetNumberOfPredecessors();
- }))
- ->GetNumberOfPredecessors()),
- materialization_blocks_(GetGraph()->GetBlocks().size() * max_preds_per_block_,
- nullptr,
- alloc_->Adapter(kArenaAllocLSE)),
- first_materialization_block_id_(GetGraph()->GetBlocks().size()) {
- size_t num_partial_singletons = lse_->heap_location_collector_.CountPartialSingletons();
- heap_refs_.reserve(num_partial_singletons);
- new_ref_phis_.reserve(num_partial_singletons * GetGraph()->GetBlocks().size());
- CollectInterestingHeapRefs();
- }
-
- ~PartialLoadStoreEliminationHelper() {
- if (heap_refs_.empty()) {
- return;
- }
- ReferenceTypePropagation rtp_fixup(GetGraph(),
- Handle<mirror::DexCache>(),
- /* is_first_run= */ false);
- rtp_fixup.Visit(ArrayRef<HInstruction* const>(new_ref_phis_));
- GetGraph()->ClearLoopInformation();
- GetGraph()->ClearDominanceInformation();
- GetGraph()->ClearReachabilityInformation();
- GetGraph()->BuildDominatorTree();
- GetGraph()->ComputeReachabilityInformation();
- }
-
- class IdxToHeapLoc {
- public:
- explicit IdxToHeapLoc(const HeapLocationCollector* hlc) : collector_(hlc) {}
- HeapLocation* operator()(size_t idx) const {
- return collector_->GetHeapLocation(idx);
- }
-
- private:
- const HeapLocationCollector* collector_;
- };
-
-
- class HeapReferenceData {
- public:
- using LocIterator = IterationRange<TransformIterator<BitVector::IndexIterator, IdxToHeapLoc>>;
- HeapReferenceData(PartialLoadStoreEliminationHelper* helper,
- HNewInstance* new_inst,
- const ExecutionSubgraph* subgraph,
- ScopedArenaAllocator* alloc)
- : new_instance_(new_inst),
- helper_(helper),
- heap_locs_(alloc,
- helper->lse_->heap_location_collector_.GetNumberOfHeapLocations(),
- /* expandable= */ false,
- kArenaAllocLSE),
- materializations_(
- // We generally won't need to create too many materialization blocks and we can expand
- // this as needed so just start off with 2x.
- 2 * helper->lse_->GetGraph()->GetBlocks().size(),
- nullptr,
- alloc->Adapter(kArenaAllocLSE)),
- collector_(helper->lse_->heap_location_collector_),
- subgraph_(subgraph) {}
-
- LocIterator IterateLocations() {
- auto idxs = heap_locs_.Indexes();
- return MakeTransformRange(idxs, IdxToHeapLoc(&collector_));
- }
-
- void AddHeapLocation(size_t idx) {
- heap_locs_.SetBit(idx);
- }
-
- const ExecutionSubgraph* GetNoEscapeSubgraph() const {
- return subgraph_;
- }
-
- bool IsPostEscape(HBasicBlock* blk) {
- return std::any_of(
- subgraph_->GetExcludedCohorts().cbegin(),
- subgraph_->GetExcludedCohorts().cend(),
- [&](const ExecutionSubgraph::ExcludedCohort& ec) { return ec.PrecedesBlock(blk); });
- }
-
- bool InEscapeCohort(HBasicBlock* blk) {
- return std::any_of(
- subgraph_->GetExcludedCohorts().cbegin(),
- subgraph_->GetExcludedCohorts().cend(),
- [&](const ExecutionSubgraph::ExcludedCohort& ec) { return ec.ContainsBlock(blk); });
- }
-
- bool BeforeAllEscapes(HBasicBlock* b) {
- return std::none_of(subgraph_->GetExcludedCohorts().cbegin(),
- subgraph_->GetExcludedCohorts().cend(),
- [&](const ExecutionSubgraph::ExcludedCohort& ec) {
- return ec.PrecedesBlock(b) || ec.ContainsBlock(b);
- });
- }
-
- HNewInstance* OriginalNewInstance() const {
- return new_instance_;
- }
-
- // Collect and replace all uses. We need to perform this twice since we will
- // generate PHIs and additional uses as we create the default-values for
- // pred-gets. These values might be other references that are also being
- // partially eliminated. By running just the replacement part again we are
- // able to avoid having to keep another whole in-progress partial map
- // around. Since we will have already handled all the other uses in the
- // first pass the second one will be quite fast.
- void FixupUses(bool first_pass) {
- ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
- // Replace uses with materialized values.
- ScopedArenaVector<InstructionUse<HInstruction>> to_replace(saa.Adapter(kArenaAllocLSE));
- ScopedArenaVector<HInstruction*> to_remove(saa.Adapter(kArenaAllocLSE));
- // Do we need to add a constructor-fence.
- ScopedArenaVector<InstructionUse<HConstructorFence>> constructor_fences(
- saa.Adapter(kArenaAllocLSE));
- ScopedArenaVector<InstructionUse<HInstruction>> to_predicate(saa.Adapter(kArenaAllocLSE));
-
- CollectReplacements(to_replace, to_remove, constructor_fences, to_predicate);
-
- if (!first_pass) {
- // If another partial creates new references they can only be in Phis or pred-get defaults
- // so they must be in the to_replace group.
- DCHECK(to_predicate.empty());
- DCHECK(constructor_fences.empty());
- DCHECK(to_remove.empty());
- }
-
- ReplaceInput(to_replace);
- RemoveAndReplaceInputs(to_remove);
- CreateConstructorFences(constructor_fences);
- PredicateInstructions(to_predicate);
-
- CHECK(OriginalNewInstance()->GetUses().empty())
- << OriginalNewInstance()->GetUses() << ", " << OriginalNewInstance()->GetEnvUses();
- }
-
- void AddMaterialization(HBasicBlock* blk, HInstruction* ins) {
- if (blk->GetBlockId() >= materializations_.size()) {
- // Make sure the materialization array is large enough, try to avoid
- // re-sizing too many times by giving extra space.
- materializations_.resize(blk->GetBlockId() * 2, nullptr);
- }
- DCHECK(materializations_[blk->GetBlockId()] == nullptr)
- << "Already have a materialization in block " << blk->GetBlockId() << ": "
- << *materializations_[blk->GetBlockId()] << " when trying to set materialization to "
- << *ins;
- materializations_[blk->GetBlockId()] = ins;
- LSE_VLOG << "In block " << blk->GetBlockId() << " materialization is " << *ins;
- helper_->NotifyNewMaterialization(ins);
- }
-
- bool HasMaterialization(HBasicBlock* blk) const {
- return blk->GetBlockId() < materializations_.size() &&
- materializations_[blk->GetBlockId()] != nullptr;
- }
-
- HInstruction* GetMaterialization(HBasicBlock* blk) const {
- if (materializations_.size() <= blk->GetBlockId() ||
- materializations_[blk->GetBlockId()] == nullptr) {
- // This must be a materialization block added after the partial LSE of
- // the current reference finished. Since every edge can only have at
- // most one materialization block added to it we can just check the
- // blocks predecessor.
- DCHECK(helper_->IsMaterializationBlock(blk));
- blk = helper_->FindDominatingNonMaterializationBlock(blk);
- DCHECK(!helper_->IsMaterializationBlock(blk));
- }
- DCHECK_GT(materializations_.size(), blk->GetBlockId());
- DCHECK(materializations_[blk->GetBlockId()] != nullptr);
- return materializations_[blk->GetBlockId()];
- }
-
- void GenerateMaterializationValueFromPredecessors(HBasicBlock* blk) {
- DCHECK(std::none_of(GetNoEscapeSubgraph()->GetExcludedCohorts().begin(),
- GetNoEscapeSubgraph()->GetExcludedCohorts().end(),
- [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
- return cohort.IsEntryBlock(blk);
- }));
- DCHECK(!HasMaterialization(blk));
- if (blk->IsExitBlock()) {
- return;
- } else if (blk->IsLoopHeader()) {
- // See comment in execution_subgraph.h. Currently we act as though every
- // allocation for partial elimination takes place in the entry block.
- // This simplifies the analysis by making it so any escape cohort
- // expands to contain any loops it is a part of. This is something that
- // we should rectify at some point. In either case however we can still
- // special case the loop-header since (1) currently the loop can't have
- // any merges between different cohort entries since the pre-header will
- // be the earliest place entry can happen and (2) even if the analysis
- // is improved to consider lifetime of the object WRT loops any values
- // which would require loop-phis would have to make the whole loop
- // escape anyway.
- // This all means we can always use value from the pre-header when the
- // block is the loop-header and we didn't already create a
- // materialization block. (NB when we do improve the analysis we will
- // need to modify the materialization creation code to deal with this
- // correctly.)
- HInstruction* pre_header_val =
- GetMaterialization(blk->GetLoopInformation()->GetPreHeader());
- AddMaterialization(blk, pre_header_val);
- return;
- }
- ScopedArenaAllocator saa(GetGraph()->GetArenaStack());
- ScopedArenaVector<HInstruction*> pred_vals(saa.Adapter(kArenaAllocLSE));
- pred_vals.reserve(blk->GetNumberOfPredecessors());
- for (HBasicBlock* pred : blk->GetPredecessors()) {
- DCHECK(HasMaterialization(pred));
- pred_vals.push_back(GetMaterialization(pred));
- }
- GenerateMaterializationValueFromPredecessorsDirect(blk, pred_vals);
- }
-
- void GenerateMaterializationValueFromPredecessorsForEntry(
- HBasicBlock* entry, const ScopedArenaVector<HInstruction*>& pred_vals) {
- DCHECK(std::any_of(GetNoEscapeSubgraph()->GetExcludedCohorts().begin(),
- GetNoEscapeSubgraph()->GetExcludedCohorts().end(),
- [&](const ExecutionSubgraph::ExcludedCohort& cohort) {
- return cohort.IsEntryBlock(entry);
- }));
- GenerateMaterializationValueFromPredecessorsDirect(entry, pred_vals);
- }
-
- private:
- template <typename InstructionType>
- struct InstructionUse {
- InstructionType* instruction_;
- size_t index_;
- };
-
- void ReplaceInput(const ScopedArenaVector<InstructionUse<HInstruction>>& to_replace) {
- for (auto& [ins, idx] : to_replace) {
- HInstruction* merged_inst = GetMaterialization(ins->GetBlock());
- if (ins->IsPhi() && merged_inst->IsPhi() && ins->GetBlock() == merged_inst->GetBlock()) {
- // Phis we just pass through the appropriate inputs.
- ins->ReplaceInput(merged_inst->InputAt(idx), idx);
- } else {
- ins->ReplaceInput(merged_inst, idx);
- }
- }
- }
-
- void RemoveAndReplaceInputs(const ScopedArenaVector<HInstruction*>& to_remove) {
- for (HInstruction* ins : to_remove) {
- if (ins->GetBlock() == nullptr) {
- // Already dealt with.
- continue;
- }
- DCHECK(BeforeAllEscapes(ins->GetBlock())) << *ins;
- if (ins->IsInstanceFieldGet() || ins->IsInstanceFieldSet()) {
- bool instruction_has_users =
- ins->IsInstanceFieldGet() && (!ins->GetUses().empty() || !ins->GetEnvUses().empty());
- if (instruction_has_users) {
- // Make sure any remaining users of read are replaced.
- HInstruction* replacement =
- helper_->lse_->GetPartialValueAt(OriginalNewInstance(), ins);
- // NB ReplaceInput will remove a use from the list so this is
- // guaranteed to finish eventually.
- while (!ins->GetUses().empty()) {
- const HUseListNode<HInstruction*>& use = ins->GetUses().front();
- use.GetUser()->ReplaceInput(replacement, use.GetIndex());
- }
- while (!ins->GetEnvUses().empty()) {
- const HUseListNode<HEnvironment*>& use = ins->GetEnvUses().front();
- use.GetUser()->ReplaceInput(replacement, use.GetIndex());
- }
- } else {
- DCHECK(ins->GetUses().empty())
- << "Instruction has users!\n"
- << ins->DumpWithArgs() << "\nUsers are " << ins->GetUses();
- DCHECK(ins->GetEnvUses().empty())
- << "Instruction has users!\n"
- << ins->DumpWithArgs() << "\nUsers are " << ins->GetEnvUses();
- }
- ins->GetBlock()->RemoveInstruction(ins);
- } else {
- // Can only be obj == other, obj != other, obj == obj (!?) or, obj != obj (!?)
- // Since PHIs are escapes as far as LSE is concerned and we are before
- // any escapes these are the only 4 options.
- DCHECK(ins->IsEqual() || ins->IsNotEqual()) << *ins;
- HInstruction* replacement;
- if (UNLIKELY(ins->InputAt(0) == ins->InputAt(1))) {
- replacement = ins->IsEqual() ? GetGraph()->GetIntConstant(1)
- : GetGraph()->GetIntConstant(0);
- } else {
- replacement = ins->IsEqual() ? GetGraph()->GetIntConstant(0)
- : GetGraph()->GetIntConstant(1);
- }
- ins->ReplaceWith(replacement);
- ins->GetBlock()->RemoveInstruction(ins);
- }
- }
- }
-
- void CreateConstructorFences(
- const ScopedArenaVector<InstructionUse<HConstructorFence>>& constructor_fences) {
- if (!constructor_fences.empty()) {
- uint32_t pc = constructor_fences.front().instruction_->GetDexPc();
- for (auto& [cf, idx] : constructor_fences) {
- if (cf->GetInputs().size() == 1) {
- cf->GetBlock()->RemoveInstruction(cf);
- } else {
- cf->RemoveInputAt(idx);
- }
- }
- for (const ExecutionSubgraph::ExcludedCohort& ec :
- GetNoEscapeSubgraph()->GetExcludedCohorts()) {
- for (HBasicBlock* blk : ec.EntryBlocks()) {
- for (HBasicBlock* materializer :
- Filter(MakeIterationRange(blk->GetPredecessors()),
- [&](HBasicBlock* blk) { return helper_->IsMaterializationBlock(blk); })) {
- HInstruction* new_cf = new (GetGraph()->GetAllocator()) HConstructorFence(
- GetMaterialization(materializer), pc, GetGraph()->GetAllocator());
- materializer->InsertInstructionBefore(new_cf, materializer->GetLastInstruction());
- }
- }
- }
- }
- }
-
- void PredicateInstructions(
- const ScopedArenaVector<InstructionUse<HInstruction>>& to_predicate) {
- for (auto& [ins, idx] : to_predicate) {
- if (UNLIKELY(ins->GetBlock() == nullptr)) {
- // Already handled due to obj == obj;
- continue;
- } else if (ins->IsInstanceFieldGet()) {
- // IFieldGet[obj] => PredicatedIFieldGet[PartialValue, obj]
- HInstruction* new_fget = new (GetGraph()->GetAllocator()) HPredicatedInstanceFieldGet(
- ins->AsInstanceFieldGet(),
- GetMaterialization(ins->GetBlock()),
- helper_->lse_->GetPartialValueAt(OriginalNewInstance(), ins));
- MaybeRecordStat(helper_->lse_->stats_, MethodCompilationStat::kPredicatedLoadAdded);
- ins->GetBlock()->InsertInstructionBefore(new_fget, ins);
- if (ins->GetType() == DataType::Type::kReference) {
- // Reference info is the same
- new_fget->SetReferenceTypeInfoIfValid(ins->GetReferenceTypeInfo());
- }
- // In this phase, substitute instructions are used only for the predicated get
- // default values which are used only if the partial singleton did not escape,
- // so the out value of the `new_fget` for the relevant cases is the same as
- // the default value.
- // TODO: Use the default value for materializing default values used by
- // other predicated loads to avoid some unnecessary Phis. (This shall
- // complicate the search for replacement in `ReplacementOrValue()`.)
- DCHECK(helper_->lse_->substitute_instructions_for_loads_[ins->GetId()] == nullptr);
- helper_->lse_->substitute_instructions_for_loads_[ins->GetId()] = new_fget;
- ins->ReplaceWith(new_fget);
- ins->ReplaceEnvUsesDominatedBy(ins, new_fget);
- CHECK(ins->GetEnvUses().empty() && ins->GetUses().empty())
- << "Instruction: " << *ins << " uses: " << ins->GetUses()
- << ", env: " << ins->GetEnvUses();
- ins->GetBlock()->RemoveInstruction(ins);
- } else if (ins->IsInstanceFieldSet()) {
- // Any predicated sets shouldn't require movement.
- ins->AsInstanceFieldSet()->SetIsPredicatedSet();
- MaybeRecordStat(helper_->lse_->stats_, MethodCompilationStat::kPredicatedStoreAdded);
- HInstruction* merged_inst = GetMaterialization(ins->GetBlock());
- ins->ReplaceInput(merged_inst, idx);
- } else {
- // comparisons need to be split into 2.
- DCHECK(ins->IsEqual() || ins->IsNotEqual()) << "bad instruction " << *ins;
- bool this_is_first = idx == 0;
- if (ins->InputAt(0) == ins->InputAt(1)) {
- // This is a obj == obj or obj != obj.
- // No idea why anyone would do this but whatever.
- ins->ReplaceWith(GetGraph()->GetIntConstant(ins->IsEqual() ? 1 : 0));
- ins->GetBlock()->RemoveInstruction(ins);
- continue;
- } else {
- HInstruction* is_escaped = new (GetGraph()->GetAllocator())
- HNotEqual(GetMaterialization(ins->GetBlock()), GetGraph()->GetNullConstant());
- HInstruction* combine_inst =
- ins->IsEqual() ? static_cast<HInstruction*>(new (GetGraph()->GetAllocator()) HAnd(
- DataType::Type::kBool, is_escaped, ins))
- : static_cast<HInstruction*>(new (GetGraph()->GetAllocator()) HOr(
- DataType::Type::kBool, is_escaped, ins));
- ins->ReplaceInput(GetMaterialization(ins->GetBlock()), this_is_first ? 0 : 1);
- ins->GetBlock()->InsertInstructionBefore(is_escaped, ins);
- ins->GetBlock()->InsertInstructionAfter(combine_inst, ins);
- ins->ReplaceWith(combine_inst);
- combine_inst->ReplaceInput(ins, 1);
- }
- }
- }
- }
-
- // Figure out all the instructions we need to
- // fixup/replace/remove/duplicate. Since this requires an iteration of an
- // intrusive linked list we want to do it only once and collect all the data
- // here.
- void CollectReplacements(
- ScopedArenaVector<InstructionUse<HInstruction>>& to_replace,
- ScopedArenaVector<HInstruction*>& to_remove,
- ScopedArenaVector<InstructionUse<HConstructorFence>>& constructor_fences,
- ScopedArenaVector<InstructionUse<HInstruction>>& to_predicate) {
- size_t size = new_instance_->GetUses().SizeSlow();
- to_replace.reserve(size);
- to_remove.reserve(size);
- constructor_fences.reserve(size);
- to_predicate.reserve(size);
- for (auto& use : new_instance_->GetUses()) {
- HBasicBlock* blk =
- helper_->FindDominatingNonMaterializationBlock(use.GetUser()->GetBlock());
- if (InEscapeCohort(blk)) {
- LSE_VLOG << "Replacing " << *new_instance_ << " use in " << *use.GetUser() << " with "
- << *GetMaterialization(blk);
- to_replace.push_back({use.GetUser(), use.GetIndex()});
- } else if (IsPostEscape(blk)) {
- LSE_VLOG << "User " << *use.GetUser() << " after escapes!";
- // The fields + cmp are normal uses. Phi can only be here if it was
- // generated by full LSE so whatever store+load that created the phi
- // is the escape.
- if (use.GetUser()->IsPhi()) {
- to_replace.push_back({use.GetUser(), use.GetIndex()});
- } else {
- DCHECK(use.GetUser()->IsFieldAccess() ||
- use.GetUser()->IsEqual() ||
- use.GetUser()->IsNotEqual())
- << *use.GetUser() << "@" << use.GetIndex();
- to_predicate.push_back({use.GetUser(), use.GetIndex()});
- }
- } else if (use.GetUser()->IsConstructorFence()) {
- LSE_VLOG << "User " << *use.GetUser() << " being moved to materialization!";
- constructor_fences.push_back({use.GetUser()->AsConstructorFence(), use.GetIndex()});
- } else {
- LSE_VLOG << "User " << *use.GetUser() << " not contained in cohort!";
- to_remove.push_back(use.GetUser());
- }
- }
- DCHECK_EQ(
- to_replace.size() + to_remove.size() + constructor_fences.size() + to_predicate.size(),
- size);
- }
-
- void GenerateMaterializationValueFromPredecessorsDirect(
- HBasicBlock* blk, const ScopedArenaVector<HInstruction*>& pred_vals) {
- DCHECK(!pred_vals.empty());
- bool all_equal = std::all_of(pred_vals.begin() + 1, pred_vals.end(), [&](HInstruction* val) {
- return val == pred_vals.front();
- });
- if (LIKELY(all_equal)) {
- AddMaterialization(blk, pred_vals.front());
- } else {
- // Make a PHI for the predecessors.
- HPhi* phi = new (GetGraph()->GetAllocator()) HPhi(
- GetGraph()->GetAllocator(), kNoRegNumber, pred_vals.size(), DataType::Type::kReference);
- for (const auto& [ins, off] : ZipCount(MakeIterationRange(pred_vals))) {
- phi->SetRawInputAt(off, ins);
- }
- blk->AddPhi(phi);
- AddMaterialization(blk, phi);
- }
- }
-
- HGraph* GetGraph() const {
- return helper_->GetGraph();
- }
-
- HNewInstance* new_instance_;
- PartialLoadStoreEliminationHelper* helper_;
- ArenaBitVector heap_locs_;
- ScopedArenaVector<HInstruction*> materializations_;
- const HeapLocationCollector& collector_;
- const ExecutionSubgraph* subgraph_;
- };
-
- ArrayRef<HeapReferenceData> GetHeapRefs() {
- return ArrayRef<HeapReferenceData>(heap_refs_);
- }
-
- bool IsMaterializationBlock(HBasicBlock* blk) const {
- return blk->GetBlockId() >= first_materialization_block_id_;
- }
-
- HBasicBlock* GetOrCreateMaterializationBlock(HBasicBlock* entry, size_t pred_num) {
- size_t idx = GetMaterializationBlockIndex(entry, pred_num);
- HBasicBlock* blk = materialization_blocks_[idx];
- if (blk == nullptr) {
- blk = new (GetGraph()->GetAllocator()) HBasicBlock(GetGraph());
- GetGraph()->AddBlock(blk);
- LSE_VLOG << "creating materialization block " << blk->GetBlockId() << " on edge "
- << entry->GetPredecessors()[pred_num]->GetBlockId() << "->" << entry->GetBlockId();
- blk->AddInstruction(new (GetGraph()->GetAllocator()) HGoto());
- materialization_blocks_[idx] = blk;
- }
- return blk;
- }
-
- HBasicBlock* GetMaterializationBlock(HBasicBlock* entry, size_t pred_num) {
- HBasicBlock* out = materialization_blocks_[GetMaterializationBlockIndex(entry, pred_num)];
- DCHECK(out != nullptr) << "No materialization block for edge " << entry->GetBlockId() << "->"
- << entry->GetPredecessors()[pred_num]->GetBlockId();
- return out;
- }
-
- IterationRange<ArenaVector<HBasicBlock*>::const_iterator> IterateMaterializationBlocks() {
- return MakeIterationRange(GetGraph()->GetBlocks().begin() + first_materialization_block_id_,
- GetGraph()->GetBlocks().end());
- }
-
- void FixupPartialObjectUsers() {
- for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : GetHeapRefs()) {
- // Use the materialized instances to replace original instance
- ref_data.FixupUses(/*first_pass=*/true);
- CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
- << ref_data.OriginalNewInstance()->GetUses() << ", "
- << ref_data.OriginalNewInstance()->GetEnvUses();
- }
- // This can cause new uses to be created due to the creation of phis/pred-get defaults
- for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : GetHeapRefs()) {
- // Only need to handle new phis/pred-get defaults. DCHECK that's all we find.
- ref_data.FixupUses(/*first_pass=*/false);
- CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
- << ref_data.OriginalNewInstance()->GetUses() << ", "
- << ref_data.OriginalNewInstance()->GetEnvUses();
- }
- }
-
- // Finds the first block which either is or dominates the given block which is
- // not a materialization block
- HBasicBlock* FindDominatingNonMaterializationBlock(HBasicBlock* blk) {
- if (LIKELY(!IsMaterializationBlock(blk))) {
- // Not a materialization block so itself.
- return blk;
- } else if (blk->GetNumberOfPredecessors() != 0) {
- // We're far enough along that the materialization blocks have been
- // inserted into the graph so no need to go searching.
- return blk->GetSinglePredecessor();
- }
- // Search through the materialization blocks to find where it will be
- // inserted.
- for (auto [mat, idx] : ZipCount(MakeIterationRange(materialization_blocks_))) {
- if (mat == blk) {
- size_t cur_pred_idx = idx % max_preds_per_block_;
- HBasicBlock* entry = GetGraph()->GetBlocks()[idx / max_preds_per_block_];
- return entry->GetPredecessors()[cur_pred_idx];
- }
- }
- LOG(FATAL) << "Unable to find materialization block position for " << blk->GetBlockId() << "!";
- return nullptr;
- }
-
- void InsertMaterializationBlocks() {
- for (auto [mat, idx] : ZipCount(MakeIterationRange(materialization_blocks_))) {
- if (mat == nullptr) {
- continue;
- }
- size_t cur_pred_idx = idx % max_preds_per_block_;
- HBasicBlock* entry = GetGraph()->GetBlocks()[idx / max_preds_per_block_];
- HBasicBlock* pred = entry->GetPredecessors()[cur_pred_idx];
- mat->InsertBetween(pred, entry);
- LSE_VLOG << "Adding materialization block " << mat->GetBlockId() << " on edge "
- << pred->GetBlockId() << "->" << entry->GetBlockId();
- }
- }
-
- // Replace any env-uses remaining of the partial singletons with the
- // appropriate phis and remove the instructions.
- void RemoveReplacedInstructions() {
- for (HeapReferenceData& ref_data : GetHeapRefs()) {
- CHECK(ref_data.OriginalNewInstance()->GetUses().empty())
- << ref_data.OriginalNewInstance()->GetUses() << ", "
- << ref_data.OriginalNewInstance()->GetEnvUses()
- << " inst is: " << ref_data.OriginalNewInstance();
- const auto& env_uses = ref_data.OriginalNewInstance()->GetEnvUses();
- while (!env_uses.empty()) {
- const HUseListNode<HEnvironment*>& use = env_uses.front();
- HInstruction* merged_inst =
- ref_data.GetMaterialization(use.GetUser()->GetHolder()->GetBlock());
- LSE_VLOG << "Replacing env use of " << *use.GetUser()->GetHolder() << "@" << use.GetIndex()
- << " with " << *merged_inst;
- use.GetUser()->ReplaceInput(merged_inst, use.GetIndex());
- }
- ref_data.OriginalNewInstance()->GetBlock()->RemoveInstruction(ref_data.OriginalNewInstance());
- }
- }
-
- // We need to make sure any allocations dominate their environment uses.
- // Technically we could probably remove the env-uses and be fine but this is easy.
- void ReorderMaterializationsForEnvDominance() {
- for (HBasicBlock* blk : IterateMaterializationBlocks()) {
- ScopedArenaAllocator alloc(alloc_->GetArenaStack());
- ArenaBitVector still_unsorted(
- &alloc, GetGraph()->GetCurrentInstructionId(), false, kArenaAllocLSE);
- // This is guaranteed to be very short (since we will abandon LSE if there
- // are >= kMaxNumberOfHeapLocations (32) heap locations so that is the
- // absolute maximum size this list can be) so doing a selection sort is
- // fine. This avoids the need to do a complicated recursive check to
- // ensure transitivity for std::sort.
- ScopedArenaVector<HNewInstance*> materializations(alloc.Adapter(kArenaAllocLSE));
- materializations.reserve(GetHeapRefs().size());
- for (HInstruction* ins :
- MakeSTLInstructionIteratorRange(HInstructionIterator(blk->GetInstructions()))) {
- if (ins->IsNewInstance()) {
- materializations.push_back(ins->AsNewInstance());
- still_unsorted.SetBit(ins->GetId());
- }
- }
- using Iter = ScopedArenaVector<HNewInstance*>::iterator;
- Iter unsorted_start = materializations.begin();
- Iter unsorted_end = materializations.end();
- // selection sort. Required since the only check we can easily perform a
- // is-before-all-unsorted check.
- while (unsorted_start != unsorted_end) {
- bool found_instruction = false;
- for (Iter candidate = unsorted_start; candidate != unsorted_end; ++candidate) {
- HNewInstance* ni = *candidate;
- if (std::none_of(ni->GetAllEnvironments().cbegin(),
- ni->GetAllEnvironments().cend(),
- [&](const HEnvironment* env) {
- return std::any_of(
- env->GetEnvInputs().cbegin(),
- env->GetEnvInputs().cend(),
- [&](const HInstruction* env_element) {
- return env_element != nullptr &&
- still_unsorted.IsBitSet(env_element->GetId());
- });
- })) {
- still_unsorted.ClearBit(ni->GetId());
- std::swap(*unsorted_start, *candidate);
- ++unsorted_start;
- found_instruction = true;
- break;
- }
- }
- CHECK(found_instruction) << "Unable to select next materialization instruction."
- << " Environments have a dependency loop!";
- }
- // Reverse so we as we prepend them we end up with the correct order.
- auto reverse_iter = MakeIterationRange(materializations.rbegin(), materializations.rend());
- for (HNewInstance* ins : reverse_iter) {
- if (blk->GetFirstInstruction() != ins) {
- // Don't do checks since that makes sure the move is safe WRT
- // ins->CanBeMoved which for NewInstance is false.
- ins->MoveBefore(blk->GetFirstInstruction(), /*do_checks=*/false);
- }
- }
- }
- }
-
- private:
- void CollectInterestingHeapRefs() {
- // Get all the partials we need to move around.
- for (size_t i = 0; i < lse_->heap_location_collector_.GetNumberOfHeapLocations(); ++i) {
- ReferenceInfo* ri = lse_->heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
- if (ri->IsPartialSingleton() &&
- ri->GetReference()->GetBlock() != nullptr &&
- ri->GetNoEscapeSubgraph()->ContainsBlock(ri->GetReference()->GetBlock())) {
- RecordHeapRefField(ri->GetReference()->AsNewInstance(), i);
- }
- }
- }
-
- void RecordHeapRefField(HNewInstance* ni, size_t loc) {
- DCHECK(ni != nullptr);
- // This is likely to be very short so just do a linear search.
- auto it = std::find_if(heap_refs_.begin(), heap_refs_.end(), [&](HeapReferenceData& data) {
- return data.OriginalNewInstance() == ni;
- });
- HeapReferenceData& cur_ref =
- (it == heap_refs_.end())
- ? heap_refs_.emplace_back(this,
- ni,
- lse_->heap_location_collector_.GetHeapLocation(loc)
- ->GetReferenceInfo()
- ->GetNoEscapeSubgraph(),
- alloc_)
- : *it;
- cur_ref.AddHeapLocation(loc);
- }
-
-
- void NotifyNewMaterialization(HInstruction* ins) {
- if (ins->IsPhi()) {
- new_ref_phis_.push_back(ins->AsPhi());
- }
- }
-
- size_t GetMaterializationBlockIndex(HBasicBlock* blk, size_t pred_num) const {
- DCHECK_LT(blk->GetBlockId(), first_materialization_block_id_)
- << "block is a materialization block!";
- DCHECK_LT(pred_num, max_preds_per_block_);
- return blk->GetBlockId() * max_preds_per_block_ + pred_num;
- }
-
- HGraph* GetGraph() const {
- return lse_->GetGraph();
- }
-
- LSEVisitor* lse_;
- ScopedArenaAllocator* alloc_;
- ScopedArenaVector<HInstruction*> new_ref_phis_;
- ScopedArenaVector<HeapReferenceData> heap_refs_;
- size_t max_preds_per_block_;
- // An array of (# of non-materialization blocks) * max_preds_per_block
- // arranged in block-id major order. Since we can only have at most one
- // materialization block on each edge this is the maximum possible number of
- // materialization blocks.
- ScopedArenaVector<HBasicBlock*> materialization_blocks_;
- size_t first_materialization_block_id_;
-
- friend void LSEVisitor::MovePartialEscapes();
-};
-
-// Work around c++ type checking annoyances with not being able to forward-declare inner types.
-class HeapRefHolder
- : public std::reference_wrapper<PartialLoadStoreEliminationHelper::HeapReferenceData> {};
-
-HInstruction* LSEVisitor::SetupPartialMaterialization(PartialLoadStoreEliminationHelper& helper,
- HeapRefHolder&& holder,
- size_t pred_idx,
- HBasicBlock* entry) {
- PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data = holder.get();
- HBasicBlock* old_pred = entry->GetPredecessors()[pred_idx];
- HInstruction* new_inst = ref_data.OriginalNewInstance();
- if (UNLIKELY(!new_inst->GetBlock()->Dominates(entry))) {
- LSE_VLOG << "Initial materialization in non-dominating block " << entry->GetBlockId()
- << " is null!";
- return GetGraph()->GetNullConstant();
- }
- HBasicBlock* bb = helper.GetOrCreateMaterializationBlock(entry, pred_idx);
- CHECK(bb != nullptr) << "entry " << entry->GetBlockId() << " -> " << old_pred->GetBlockId();
- HNewInstance* repl_create = new_inst->Clone(GetGraph()->GetAllocator())->AsNewInstance();
- repl_create->SetPartialMaterialization();
- bb->InsertInstructionBefore(repl_create, bb->GetLastInstruction());
- repl_create->CopyEnvironmentFrom(new_inst->GetEnvironment());
- MaybeRecordStat(stats_, MethodCompilationStat::kPartialAllocationMoved);
- LSE_VLOG << "In blk " << bb->GetBlockId() << " initial materialization is " << *repl_create;
- ref_data.AddMaterialization(bb, repl_create);
- const FieldInfo* info = nullptr;
- for (const HeapLocation* loc : ref_data.IterateLocations()) {
- size_t loc_off = heap_location_collector_.GetHeapLocationIndex(loc);
- info = field_infos_[loc_off];
- DCHECK(loc->GetIndex() == nullptr);
- Value value = ReplacementOrValue(heap_values_for_[old_pred->GetBlockId()][loc_off].value);
- if (value.NeedsLoopPhi() || value.IsMergedUnknown()) {
- Value repl = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
- DCHECK(repl.IsDefault() || repl.IsInvalid() || repl.IsInstruction())
- << repl << " from " << value << " pred is " << old_pred->GetBlockId();
- if (!repl.IsInvalid()) {
- value = repl;
- } else {
- FullyMaterializePhi(value.GetPhiPlaceholder(), info->GetFieldType());
- value = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
- }
- } else if (value.NeedsNonLoopPhi()) {
- Value repl = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
- DCHECK(repl.IsDefault() || repl.IsInvalid() || repl.IsInstruction())
- << repl << " from " << value << " pred is " << old_pred->GetBlockId();
- if (!repl.IsInvalid()) {
- value = repl;
- } else {
- MaterializeNonLoopPhis(value.GetPhiPlaceholder(), info->GetFieldType());
- value = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())];
- }
- }
- DCHECK(value.IsDefault() || value.IsInstruction())
- << GetGraph()->PrettyMethod() << ": " << value;
-
- if (!value.IsDefault() &&
- // shadow$_klass_ doesn't need to be manually initialized.
- MemberOffset(loc->GetOffset()) != mirror::Object::ClassOffset()) {
- CHECK(info != nullptr);
- HInstruction* set_value =
- new (GetGraph()->GetAllocator()) HInstanceFieldSet(repl_create,
- value.GetInstruction(),
- field_infos_[loc_off]->GetField(),
- loc->GetType(),
- MemberOffset(loc->GetOffset()),
- false,
- field_infos_[loc_off]->GetFieldIndex(),
- loc->GetDeclaringClassDefIndex(),
- field_infos_[loc_off]->GetDexFile(),
- 0u);
- bb->InsertInstructionAfter(set_value, repl_create);
- LSE_VLOG << "Adding " << *set_value << " for materialization setup!";
- }
- }
- return repl_create;
-}
-
-HInstruction* LSEVisitor::GetPartialValueAt(HNewInstance* orig_new_inst, HInstruction* read) {
- size_t loc = heap_location_collector_.GetFieldHeapLocation(orig_new_inst, &read->GetFieldInfo());
- Value pred = ReplacementOrValue(intermediate_values_.find(read)->second);
- LSE_VLOG << "using " << pred << " as default value for " << *read;
- if (pred.IsInstruction()) {
- return pred.GetInstruction();
- } else if (pred.IsMergedUnknown() || pred.NeedsPhi()) {
- FullyMaterializePhi(pred.GetPhiPlaceholder(),
- heap_location_collector_.GetHeapLocation(loc)->GetType());
- HInstruction* res = Replacement(pred).GetInstruction();
- LSE_VLOG << pred << " materialized to " << res->DumpWithArgs();
- return res;
- } else if (pred.IsDefault()) {
- HInstruction* res = GetDefaultValue(read->GetType());
- LSE_VLOG << pred << " materialized to " << res->DumpWithArgs();
- return res;
- }
- LOG(FATAL) << "Unable to find unescaped value at " << read->DumpWithArgs()
- << "! This should be impossible! Value is " << pred;
- UNREACHABLE();
-}
-
-void LSEVisitor::MovePartialEscapes() {
- if (!ShouldPerformPartialLSE()) {
- return;
- }
-
- ScopedArenaAllocator saa(allocator_.GetArenaStack());
- PartialLoadStoreEliminationHelper helper(this, &saa);
-
- // Since for PHIs we now will have more information (since we know the object
- // hasn't escaped) we need to clear the old phi-replacements where we weren't
- // able to find the value.
- PrepareForPartialPhiComputation();
-
- for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : helper.GetHeapRefs()) {
- LSE_VLOG << "Creating materializations for " << *ref_data.OriginalNewInstance();
- // Setup entry and exit blocks.
- for (const auto& excluded_cohort : ref_data.GetNoEscapeSubgraph()->GetExcludedCohorts()) {
- // Setup materialization blocks.
- for (HBasicBlock* entry : excluded_cohort.EntryBlocksReversePostOrder()) {
- // Setup entries.
- // TODO Assuming we correctly break critical edges every entry block
- // must have only a single predecessor so we could just put all this
- // stuff in there. OTOH simplifier can do it for us and this is simpler
- // to implement - giving clean separation between the original graph and
- // materialization blocks - so for now we might as well have these new
- // blocks.
- ScopedArenaAllocator pred_alloc(saa.GetArenaStack());
- ScopedArenaVector<HInstruction*> pred_vals(pred_alloc.Adapter(kArenaAllocLSE));
- pred_vals.reserve(entry->GetNumberOfPredecessors());
- for (const auto& [pred, pred_idx] :
- ZipCount(MakeIterationRange(entry->GetPredecessors()))) {
- DCHECK(!helper.IsMaterializationBlock(pred));
- if (excluded_cohort.IsEntryBlock(pred)) {
- pred_vals.push_back(ref_data.GetMaterialization(pred));
- continue;
- } else {
- pred_vals.push_back(SetupPartialMaterialization(helper, {ref_data}, pred_idx, entry));
- }
- }
- ref_data.GenerateMaterializationValueFromPredecessorsForEntry(entry, pred_vals);
- }
-
- // Setup exit block heap-values for later phi-generation.
- for (HBasicBlock* exit : excluded_cohort.ExitBlocks()) {
- // mark every exit of cohorts as having a value so we can easily
- // materialize the PHIs.
- // TODO By setting this we can easily use the normal MaterializeLoopPhis
- // (via FullyMaterializePhis) in order to generate the default-values
- // for predicated-gets. This has the unfortunate side effect of creating
- // somewhat more phis than are really needed (in some cases). We really
- // should try to eventually know that we can lower these PHIs to only
- // the non-escaping value in cases where it is possible. Currently this
- // is done to some extent in instruction_simplifier but we have more
- // information here to do the right thing.
- for (const HeapLocation* loc : ref_data.IterateLocations()) {
- size_t loc_off = heap_location_collector_.GetHeapLocationIndex(loc);
- // This Value::Default() is only used to fill in PHIs used as the
- // default value for PredicatedInstanceFieldGets. The actual value
- // stored there is meaningless since the Predicated-iget will use the
- // actual field value instead on these paths.
- heap_values_for_[exit->GetBlockId()][loc_off].value = Value::Default();
- }
- }
- }
-
- // string materialization through the graph.
- // // Visit RPO to PHI the materialized object through the cohort.
- for (HBasicBlock* blk : GetGraph()->GetReversePostOrder()) {
- // NB This doesn't include materialization blocks.
- DCHECK(!helper.IsMaterializationBlock(blk))
- << "Materialization blocks should not be in RPO yet.";
- if (ref_data.HasMaterialization(blk)) {
- continue;
- } else if (ref_data.BeforeAllEscapes(blk)) {
- ref_data.AddMaterialization(blk, GetGraph()->GetNullConstant());
- continue;
- } else {
- ref_data.GenerateMaterializationValueFromPredecessors(blk);
- }
- }
- }
-
- // Once we've generated all the materializations we can update the users.
- helper.FixupPartialObjectUsers();
-
- // Actually put materialization blocks into the graph
- helper.InsertMaterializationBlocks();
-
- // Get rid of the original instructions.
- helper.RemoveReplacedInstructions();
-
- // Ensure everything is ordered correctly in the materialization blocks. This
- // involves moving every NewInstance to the top and ordering them so that any
- // required env-uses are correctly ordered.
- helper.ReorderMaterializationsForEnvDominance();
-}
void LSEVisitor::FinishFullLSE() {
// Remove recorded load instructions that should be eliminated.
@@ -4004,9 +2847,8 @@ class LSEVisitorWrapper : public DeletableArenaObject<kArenaAllocLSE> {
public:
LSEVisitorWrapper(HGraph* graph,
const HeapLocationCollector& heap_location_collector,
- bool perform_partial_lse,
OptimizingCompilerStats* stats)
- : lse_visitor_(graph, heap_location_collector, perform_partial_lse, stats) {}
+ : lse_visitor_(graph, heap_location_collector, stats) {}
void Run() {
lse_visitor_.Run();
@@ -4016,7 +2858,7 @@ class LSEVisitorWrapper : public DeletableArenaObject<kArenaAllocLSE> {
LSEVisitor lse_visitor_;
};
-bool LoadStoreElimination::Run(bool enable_partial_lse) {
+bool LoadStoreElimination::Run() {
if (graph_->IsDebuggable()) {
// Debugger may set heap values or trigger deoptimization of callers.
// Skip this optimization.
@@ -4029,11 +2871,7 @@ bool LoadStoreElimination::Run(bool enable_partial_lse) {
// O(1) though.
graph_->ComputeReachabilityInformation();
ScopedArenaAllocator allocator(graph_->GetArenaStack());
- LoadStoreAnalysis lsa(graph_,
- stats_,
- &allocator,
- enable_partial_lse ? LoadStoreAnalysisType::kFull
- : LoadStoreAnalysisType::kBasic);
+ LoadStoreAnalysis lsa(graph_, stats_, &allocator);
lsa.Run();
const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
@@ -4041,8 +2879,15 @@ bool LoadStoreElimination::Run(bool enable_partial_lse) {
return false;
}
- std::unique_ptr<LSEVisitorWrapper> lse_visitor(new (&allocator) LSEVisitorWrapper(
- graph_, heap_location_collector, enable_partial_lse, stats_));
+ // Currently load_store analysis can't handle predicated load/stores; specifically pairs of
+ // memory operations with different predicates.
+ // TODO: support predicated SIMD.
+ if (graph_->HasPredicatedSIMD()) {
+ return false;
+ }
+
+ std::unique_ptr<LSEVisitorWrapper> lse_visitor(
+ new (&allocator) LSEVisitorWrapper(graph_, heap_location_collector, stats_));
lse_visitor->Run();
return true;
}
diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h
index 42de803ebd..e77168547d 100644
--- a/compiler/optimizing/load_store_elimination.h
+++ b/compiler/optimizing/load_store_elimination.h
@@ -26,10 +26,6 @@ class SideEffectsAnalysis;
class LoadStoreElimination : public HOptimization {
public:
- // Whether or not we should attempt partial Load-store-elimination which
- // requires additional blocks and predicated instructions.
- static constexpr bool kEnablePartialLSE = false;
-
// Controls whether to enable VLOG(compiler) logs explaining the transforms taking place.
static constexpr bool kVerboseLoggingMode = false;
@@ -38,12 +34,7 @@ class LoadStoreElimination : public HOptimization {
const char* name = kLoadStoreEliminationPassName)
: HOptimization(graph, name, stats) {}
- bool Run() override {
- return Run(kEnablePartialLSE);
- }
-
- // Exposed for testing.
- bool Run(bool enable_partial_lse);
+ bool Run();
static constexpr const char* kLoadStoreEliminationPassName = "load_store_elimination";
diff --git a/compiler/optimizing/load_store_elimination_test.cc b/compiler/optimizing/load_store_elimination_test.cc
index 1ee109980f..0775051eb4 100644
--- a/compiler/optimizing/load_store_elimination_test.cc
+++ b/compiler/optimizing/load_store_elimination_test.cc
@@ -68,47 +68,27 @@ class LoadStoreEliminationTestBase : public SuperTest, public OptimizingUnitTest
}
}
- void PerformLSE(bool with_partial = true) {
+ void PerformLSE() {
graph_->BuildDominatorTree();
LoadStoreElimination lse(graph_, /*stats=*/nullptr);
- lse.Run(with_partial);
+ lse.Run();
std::ostringstream oss;
EXPECT_TRUE(CheckGraph(oss)) << oss.str();
}
- void PerformLSEWithPartial(const AdjacencyListGraph& blks) {
- // PerformLSE expects this to be empty.
+ void PerformLSE(const AdjacencyListGraph& blks) {
+ // PerformLSE expects this to be empty, and the creation of
+ // an `AdjacencyListGraph` computes it.
graph_->ClearDominanceInformation();
if (kDebugLseTests) {
LOG(INFO) << "Pre LSE " << blks;
}
- PerformLSE(/*with_partial=*/ true);
+ PerformLSE();
if (kDebugLseTests) {
LOG(INFO) << "Post LSE " << blks;
}
}
- void PerformLSENoPartial(const AdjacencyListGraph& blks) {
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- if (kDebugLseTests) {
- LOG(INFO) << "Pre LSE " << blks;
- }
- PerformLSE(/*with_partial=*/ false);
- if (kDebugLseTests) {
- LOG(INFO) << "Post LSE " << blks;
- }
- }
-
- void PerformSimplifications(const AdjacencyListGraph& blks) {
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- if (kDebugLseTests) {
- LOG(INFO) << "Post simplification " << blks;
- }
- }
-
// Create instructions shared among tests.
void CreateEntryBlockInstructions() {
HInstruction* c1 = graph_->GetIntConstant(1);
@@ -327,190 +307,6 @@ std::ostream& operator<<(std::ostream& os, const TestOrder& ord) {
}
}
-class OrderDependentTestGroup
- : public LoadStoreEliminationTestBase<CommonCompilerTestWithParam<TestOrder>> {};
-
-// Various configs we can use for testing. Currently used in PartialComparison tests.
-struct PartialComparisonKind {
- public:
- enum class Type : uint8_t { kEquals, kNotEquals };
- enum class Target : uint8_t { kNull, kValue, kSelf };
- enum class Position : uint8_t { kLeft, kRight };
-
- const Type type_;
- const Target target_;
- const Position position_;
-
- bool IsDefinitelyFalse() const {
- return !IsPossiblyTrue();
- }
- bool IsPossiblyFalse() const {
- return !IsDefinitelyTrue();
- }
- bool IsDefinitelyTrue() const {
- if (target_ == Target::kSelf) {
- return type_ == Type::kEquals;
- } else if (target_ == Target::kNull) {
- return type_ == Type::kNotEquals;
- } else {
- return false;
- }
- }
- bool IsPossiblyTrue() const {
- if (target_ == Target::kSelf) {
- return type_ == Type::kEquals;
- } else if (target_ == Target::kNull) {
- return type_ == Type::kNotEquals;
- } else {
- return true;
- }
- }
- std::ostream& Dump(std::ostream& os) const {
- os << "PartialComparisonKind{" << (type_ == Type::kEquals ? "kEquals" : "kNotEquals") << ", "
- << (target_ == Target::kNull ? "kNull" : (target_ == Target::kSelf ? "kSelf" : "kValue"))
- << ", " << (position_ == Position::kLeft ? "kLeft" : "kRight") << "}";
- return os;
- }
-};
-
-std::ostream& operator<<(std::ostream& os, const PartialComparisonKind& comp) {
- return comp.Dump(os);
-}
-
-class PartialComparisonTestGroup
- : public LoadStoreEliminationTestBase<CommonCompilerTestWithParam<PartialComparisonKind>> {
- public:
- enum class ComparisonPlacement {
- kBeforeEscape,
- kInEscape,
- kAfterEscape,
- };
- void CheckFinalInstruction(HInstruction* ins, ComparisonPlacement placement) {
- using Target = PartialComparisonKind::Target;
- using Type = PartialComparisonKind::Type;
- using Position = PartialComparisonKind::Position;
- PartialComparisonKind kind = GetParam();
- if (ins->IsIntConstant()) {
- if (kind.IsDefinitelyTrue()) {
- EXPECT_TRUE(ins->AsIntConstant()->IsTrue()) << kind << " " << *ins;
- } else if (kind.IsDefinitelyFalse()) {
- EXPECT_TRUE(ins->AsIntConstant()->IsFalse()) << kind << " " << *ins;
- } else {
- EXPECT_EQ(placement, ComparisonPlacement::kBeforeEscape);
- EXPECT_EQ(kind.target_, Target::kValue);
- // We are before escape so value is not the object
- if (kind.type_ == Type::kEquals) {
- EXPECT_TRUE(ins->AsIntConstant()->IsFalse()) << kind << " " << *ins;
- } else {
- EXPECT_TRUE(ins->AsIntConstant()->IsTrue()) << kind << " " << *ins;
- }
- }
- return;
- }
- EXPECT_NE(placement, ComparisonPlacement::kBeforeEscape)
- << "For comparisons before escape we should always be able to transform into a constant."
- << " Instead we got:" << std::endl << ins->DumpWithArgs();
- if (placement == ComparisonPlacement::kInEscape) {
- // Should be the same type.
- ASSERT_TRUE(ins->IsEqual() || ins->IsNotEqual()) << *ins;
- HInstruction* other = kind.position_ == Position::kLeft ? ins->AsBinaryOperation()->GetRight()
- : ins->AsBinaryOperation()->GetLeft();
- if (kind.target_ == Target::kSelf) {
- EXPECT_INS_EQ(ins->AsBinaryOperation()->GetLeft(), ins->AsBinaryOperation()->GetRight())
- << " ins is: " << *ins;
- } else if (kind.target_ == Target::kNull) {
- EXPECT_INS_EQ(other, graph_->GetNullConstant()) << " ins is: " << *ins;
- } else {
- EXPECT_TRUE(other->IsStaticFieldGet()) << " ins is: " << *ins;
- }
- if (kind.type_ == Type::kEquals) {
- EXPECT_TRUE(ins->IsEqual()) << *ins;
- } else {
- EXPECT_TRUE(ins->IsNotEqual()) << *ins;
- }
- } else {
- ASSERT_EQ(placement, ComparisonPlacement::kAfterEscape);
- if (kind.type_ == Type::kEquals) {
- // obj == <anything> can only be true if (1) it's obj == obj or (2) obj has escaped.
- ASSERT_TRUE(ins->IsAnd()) << ins->DumpWithArgs();
- EXPECT_TRUE(ins->InputAt(1)->IsEqual()) << ins->DumpWithArgs();
- } else {
- // obj != <anything> is true if (2) obj has escaped.
- ASSERT_TRUE(ins->IsOr()) << ins->DumpWithArgs();
- EXPECT_TRUE(ins->InputAt(1)->IsNotEqual()) << ins->DumpWithArgs();
- }
- // Check the first part of AND is the obj-has-escaped
- ASSERT_TRUE(ins->InputAt(0)->IsNotEqual()) << ins->DumpWithArgs();
- EXPECT_TRUE(ins->InputAt(0)->InputAt(0)->IsPhi()) << ins->DumpWithArgs();
- EXPECT_TRUE(ins->InputAt(0)->InputAt(1)->IsNullConstant()) << ins->DumpWithArgs();
- // Check the second part of AND is the eq other
- EXPECT_INS_EQ(ins->InputAt(1)->InputAt(kind.position_ == Position::kLeft ? 0 : 1),
- ins->InputAt(0)->InputAt(0))
- << ins->DumpWithArgs();
- }
- }
-
- struct ComparisonInstructions {
- void AddSetup(HBasicBlock* blk) const {
- for (HInstruction* i : setup_instructions_) {
- blk->AddInstruction(i);
- }
- }
-
- void AddEnvironment(HEnvironment* env) const {
- for (HInstruction* i : setup_instructions_) {
- if (i->NeedsEnvironment()) {
- i->CopyEnvironmentFrom(env);
- }
- }
- }
-
- const std::vector<HInstruction*> setup_instructions_;
- HInstruction* const cmp_;
- };
-
- ComparisonInstructions GetComparisonInstructions(HInstruction* partial) {
- PartialComparisonKind kind = GetParam();
- std::vector<HInstruction*> setup;
- HInstruction* target_other;
- switch (kind.target_) {
- case PartialComparisonKind::Target::kSelf:
- target_other = partial;
- break;
- case PartialComparisonKind::Target::kNull:
- target_other = graph_->GetNullConstant();
- break;
- case PartialComparisonKind::Target::kValue: {
- HInstruction* cls = MakeClassLoad();
- HInstruction* static_read =
- new (GetAllocator()) HStaticFieldGet(cls,
- /* field= */ nullptr,
- DataType::Type::kReference,
- /* field_offset= */ MemberOffset(40),
- /* is_volatile= */ false,
- /* field_idx= */ 0,
- /* declaring_class_def_index= */ 0,
- graph_->GetDexFile(),
- /* dex_pc= */ 0);
- setup.push_back(cls);
- setup.push_back(static_read);
- target_other = static_read;
- break;
- }
- }
- HInstruction* target_left;
- HInstruction* target_right;
- std::tie(target_left, target_right) = kind.position_ == PartialComparisonKind::Position::kLeft
- ? std::pair{partial, target_other}
- : std::pair{target_other, partial};
- HInstruction* cmp =
- kind.type_ == PartialComparisonKind::Type::kEquals
- ? static_cast<HInstruction*>(new (GetAllocator()) HEqual(target_left, target_right))
- : static_cast<HInstruction*>(new (GetAllocator()) HNotEqual(target_left, target_right));
- return {setup, cmp};
- }
-};
-
TEST_F(LoadStoreEliminationTest, ArrayGetSetElimination) {
CreateTestControlFlowGraph();
@@ -573,7 +369,8 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue2) {
AddVecStore(entry_block_, array_, j_);
HInstruction* vstore = AddVecStore(entry_block_, array_, i_);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vstore));
@@ -589,7 +386,8 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue3) {
AddVecStore(entry_block_, array_, i_add1_);
HInstruction* vstore = AddVecStore(entry_block_, array_, i_);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vstore));
@@ -634,7 +432,8 @@ TEST_F(LoadStoreEliminationTest, OverlappingLoadStore) {
AddArraySet(entry_block_, array_, i_, c1);
HInstruction* vload5 = AddVecLoad(entry_block_, array_, i_);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(load1));
@@ -668,7 +467,8 @@ TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithoutSideEffects) {
// a[j] = 1;
HInstruction* array_set = AddArraySet(return_block_, array_, j_, c1);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(array_set));
@@ -701,12 +501,13 @@ TEST_F(LoadStoreEliminationTest, StoreAfterSIMDLoopWithSideEffects) {
// b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3];
AddVecStore(loop_, array_, phi_);
HInstruction* vload = AddVecLoad(loop_, array_, phi_);
- AddVecStore(loop_, array_b, phi_, vload->AsVecLoad());
+ AddVecStore(loop_, array_b, phi_, vload);
// a[j] = 0;
HInstruction* a_set = AddArraySet(return_block_, array_, j_, c0);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(vload));
@@ -740,12 +541,13 @@ TEST_F(LoadStoreEliminationTest, LoadAfterSIMDLoopWithSideEffects) {
// b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3];
AddVecStore(loop_, array_, phi_);
HInstruction* vload = AddVecLoad(loop_, array_, phi_);
- AddVecStore(loop_, array_b, phi_, vload->AsVecLoad());
+ AddVecStore(loop_, array_b, phi_, vload);
// x = a[j];
HInstruction* load = AddArrayGet(return_block_, array_, j_);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(vload));
@@ -786,7 +588,8 @@ TEST_F(LoadStoreEliminationTest, MergePredecessorVecStores) {
// down: a[i,... i + 3] = [1,...1]
HInstruction* vstore4 = AddVecStore(down, array_, i_, vdata);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(vstore2));
@@ -874,10 +677,11 @@ TEST_F(LoadStoreEliminationTest, RedundantVStoreVLoadInLoop) {
// a[i,... i + 3] = [1,...1]
HInstruction* vstore1 = AddVecStore(loop_, array_a, phi_);
HInstruction* vload = AddVecLoad(loop_, array_a, phi_);
- HInstruction* vstore2 = AddVecStore(loop_, array_b, phi_, vload->AsVecLoad());
+ HInstruction* vstore2 = AddVecStore(loop_, array_b, phi_, vload);
HInstruction* vstore3 = AddVecStore(loop_, array_a, phi_, vstore1->InputAt(2));
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vstore1));
@@ -963,9 +767,10 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueInLoopWithoutWriteSideEffects)
// v = a[i,... i + 3]
// array[0,... 3] = v
HInstruction* vload = AddVecLoad(loop_, array_a, phi_);
- HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+ HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload));
@@ -987,9 +792,10 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValue) {
// v = a[0,... 3]
// array[0,... 3] = v
HInstruction* vload = AddVecLoad(pre_header_, array_a, c0);
- HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+ HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload));
@@ -1063,10 +869,11 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValueInLoopWithoutWriteSideE
// array[0] = v1
HInstruction* vload = AddVecLoad(loop_, array_a, phi_);
HInstruction* load = AddArrayGet(loop_, array_a, phi_);
- HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+ HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload);
HInstruction* store = AddArraySet(return_block_, array_, c0, load);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload));
@@ -1094,10 +901,11 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValue) {
// array[0] = v1
HInstruction* vload = AddVecLoad(pre_header_, array_a, c0);
HInstruction* load = AddArrayGet(pre_header_, array_a, c0);
- HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+ HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload);
HInstruction* store = AddArraySet(return_block_, array_, c0, load);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload));
@@ -1126,10 +934,11 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoadInLoopWithoutWriteSide
// array[128,... 131] = v1
HInstruction* vload1 = AddVecLoad(loop_, array_a, phi_);
HInstruction* vload2 = AddVecLoad(loop_, array_a, phi_);
- HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad());
- HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad());
+ HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1);
+ HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload1));
@@ -1157,10 +966,11 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoad) {
// array[128,... 131] = v1
HInstruction* vload1 = AddVecLoad(pre_header_, array_a, c0);
HInstruction* vload2 = AddVecLoad(pre_header_, array_a, c0);
- HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad());
- HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad());
+ HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1);
+ HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2);
- graph_->SetHasSIMD(true);
+ // TODO: enable LSE for graphs with predicated SIMD.
+ graph_->SetHasTraditionalSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload1));
@@ -2069,7 +1879,7 @@ TEST_F(LoadStoreEliminationTest, PartialUnknownMerge) {
SetupExit(exit);
- PerformLSENoPartial(blks);
+ PerformLSE(blks);
EXPECT_INS_RETAINED(read_bottom);
EXPECT_INS_RETAINED(write_c1);
@@ -2084,84 +1894,6 @@ TEST_F(LoadStoreEliminationTest, PartialUnknownMerge) {
// // LEFT
// obj.field = 1;
// call_func(obj);
-// foo_r = obj.field
-// } else {
-// // TO BE ELIMINATED
-// obj.field = 2;
-// // RIGHT
-// // TO BE ELIMINATED
-// foo_l = obj.field;
-// }
-// EXIT
-// return PHI(foo_l, foo_r)
-TEST_F(LoadStoreEliminationTest, PartialLoadElimination) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit_REAL",
- { { "entry", "left" },
- { "entry", "right" },
- { "left", "exit" },
- { "right", "exit" },
- { "exit", "exit_REAL" } }));
- HBasicBlock* entry = blks.Get("entry");
- HBasicBlock* left = blks.Get("left");
- HBasicBlock* right = blks.Get("right");
- HBasicBlock* exit = blks.Get("exit");
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c1 = graph_->GetIntConstant(1);
- HInstruction* c2 = graph_->GetIntConstant(2);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_left = MakeIFieldSet(new_inst, c1, MemberOffset(32));
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* read_left = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(16));
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(write_left);
- left->AddInstruction(call_left);
- left->AddInstruction(read_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(16));
- HInstruction* read_right = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(16));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(read_right);
- right->AddInstruction(goto_right);
-
- HInstruction* phi_final = MakePhi({read_left, read_right});
- HInstruction* return_exit = new (GetAllocator()) HReturn(phi_final);
- exit->AddPhi(phi_final->AsPhi());
- exit->AddInstruction(return_exit);
-
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- PerformLSE();
-
- ASSERT_TRUE(IsRemoved(read_right));
- ASSERT_FALSE(IsRemoved(read_left));
- ASSERT_FALSE(IsRemoved(phi_final));
- ASSERT_TRUE(phi_final->GetInputs()[1] == c2);
- ASSERT_TRUE(phi_final->GetInputs()[0] == read_left);
- ASSERT_TRUE(IsRemoved(write_right));
-}
-
-// // ENTRY
-// obj = new Obj();
-// if (parameter_value) {
-// // LEFT
-// obj.field = 1;
-// call_func(obj);
// // We don't know what obj.field is now we aren't able to eliminate the read below!
// } else {
// // DO NOT ELIMINATE
@@ -2217,7 +1949,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved) {
exit->AddInstruction(read_bottom);
exit->AddInstruction(return_exit);
- PerformLSENoPartial(blks);
+ PerformLSE(blks);
EXPECT_INS_RETAINED(read_bottom) << *read_bottom;
EXPECT_INS_RETAINED(write_right) << *write_right;
@@ -2308,7 +2040,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved2) {
exit->AddInstruction(read_bottom);
exit->AddInstruction(return_exit);
- PerformLSENoPartial(blks);
+ PerformLSE(blks);
EXPECT_INS_RETAINED(read_bottom);
EXPECT_INS_RETAINED(write_right_first);
@@ -2320,2090 +2052,6 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved2) {
// if (parameter_value) {
// // LEFT
// // DO NOT ELIMINATE
-// escape(obj);
-// obj.field = 1;
-// } else {
-// // RIGHT
-// // ELIMINATE
-// obj.field = 2;
-// }
-// EXIT
-// ELIMINATE
-// return obj.field
-TEST_F(LoadStoreEliminationTest, PartialLoadElimination2) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
-#undef GET_BLOCK
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c1 = graph_->GetIntConstant(1);
- HInstruction* c2 = graph_->GetIntConstant(2);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* write_left = MakeIFieldSet(new_inst, c1, MemberOffset(32));
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left);
- left->AddInstruction(write_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- PerformLSE();
-
- EXPECT_INS_REMOVED(read_bottom);
- EXPECT_INS_REMOVED(write_right);
- EXPECT_INS_RETAINED(write_left);
- EXPECT_INS_RETAINED(call_left);
-}
-
-template<typename Iter, typename Func>
-typename Iter::value_type FindOrNull(Iter begin, Iter end, Func func) {
- static_assert(std::is_pointer_v<typename Iter::value_type>);
- auto it = std::find_if(begin, end, func);
- if (it == end) {
- return nullptr;
- } else {
- return *it;
- }
-}
-
-// // ENTRY
-// Obj new_inst = new Obj();
-// new_inst.foo = 12;
-// Obj obj;
-// Obj out;
-// int first;
-// if (param0) {
-// // ESCAPE_ROUTE
-// if (param1) {
-// // LEFT_START
-// if (param2) {
-// // LEFT_LEFT
-// obj = new_inst;
-// } else {
-// // LEFT_RIGHT
-// obj = obj_param;
-// }
-// // LEFT_MERGE
-// // technically the phi is enough to cause an escape but might as well be
-// // thorough.
-// // obj = phi[new_inst, param]
-// escape(obj);
-// out = obj;
-// } else {
-// // RIGHT
-// out = obj_param;
-// }
-// // EXIT
-// // Can't do anything with this since we don't have good tracking for the heap-locations
-// // out = phi[param, phi[new_inst, param]]
-// first = out.foo
-// } else {
-// new_inst.foo = 15;
-// first = 13;
-// }
-// // first = phi[out.foo, 13]
-// return first + new_inst.foo;
-TEST_F(LoadStoreEliminationTest, PartialPhiPropagation) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "escape_route"},
- {"entry", "noescape_route"},
- {"escape_route", "left"},
- {"escape_route", "right"},
- {"left", "left_left"},
- {"left", "left_right"},
- {"left_left", "left_merge"},
- {"left_right", "left_merge"},
- {"left_merge", "escape_end"},
- {"right", "escape_end"},
- {"escape_end", "breturn"},
- {"noescape_route", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
- GET_BLOCK(left_left);
- GET_BLOCK(left_right);
- GET_BLOCK(left_merge);
- GET_BLOCK(escape_end);
- GET_BLOCK(escape_route);
- GET_BLOCK(noescape_route);
-#undef GET_BLOCK
- EnsurePredecessorOrder(escape_end, {left_merge, right});
- EnsurePredecessorOrder(left_merge, {left_left, left_right});
- EnsurePredecessorOrder(breturn, {escape_end, noescape_route});
- HInstruction* param0 = MakeParam(DataType::Type::kBool);
- HInstruction* param1 = MakeParam(DataType::Type::kBool);
- HInstruction* param2 = MakeParam(DataType::Type::kBool);
- HInstruction* obj_param = MakeParam(DataType::Type::kReference);
- HInstruction* c12 = graph_->GetIntConstant(12);
- HInstruction* c13 = graph_->GetIntConstant(13);
- HInstruction* c15 = graph_->GetIntConstant(15);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* store = MakeIFieldSet(new_inst, c12, MemberOffset(32));
- HInstruction* if_param0 = new (GetAllocator()) HIf(param0);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(store);
- entry->AddInstruction(if_param0);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* store_noescape = MakeIFieldSet(new_inst, c15, MemberOffset(32));
- noescape_route->AddInstruction(store_noescape);
- noescape_route->AddInstruction(new (GetAllocator()) HGoto());
-
- escape_route->AddInstruction(new (GetAllocator()) HIf(param1));
-
- HInstruction* if_left = new (GetAllocator()) HIf(param2);
- left->AddInstruction(if_left);
-
- HInstruction* goto_left_left = new (GetAllocator()) HGoto();
- left_left->AddInstruction(goto_left_left);
-
- HInstruction* goto_left_right = new (GetAllocator()) HGoto();
- left_right->AddInstruction(goto_left_right);
-
- HPhi* left_phi = MakePhi({obj_param, new_inst});
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { left_phi });
- HInstruction* goto_left_merge = new (GetAllocator()) HGoto();
- left_merge->AddPhi(left_phi);
- left_merge->AddInstruction(call_left);
- left_merge->AddInstruction(goto_left_merge);
- left_phi->SetCanBeNull(true);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(goto_right);
-
- HPhi* escape_end_phi = MakePhi({left_phi, obj_param});
- HInstruction* read_escape_end =
- MakeIFieldGet(escape_end_phi, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* goto_escape_end = new (GetAllocator()) HGoto();
- escape_end->AddPhi(escape_end_phi);
- escape_end->AddInstruction(read_escape_end);
- escape_end->AddInstruction(goto_escape_end);
-
- HPhi* return_phi = MakePhi({read_escape_end, c13});
- HInstruction* read_exit = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* add_exit = new (GetAllocator()) HAdd(DataType::Type::kInt32, return_phi, read_exit);
- HInstruction* return_exit = new (GetAllocator()) HReturn(add_exit);
- breturn->AddPhi(return_phi);
- breturn->AddInstruction(read_exit);
- breturn->AddInstruction(add_exit);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_);
- std::vector<HPhi*> all_return_phis;
- std::tie(all_return_phis) = FindAllInstructions<HPhi>(graph_, breturn);
- EXPECT_EQ(all_return_phis.size(), 3u);
- EXPECT_INS_RETAINED(return_phi);
- EXPECT_TRUE(std::find(all_return_phis.begin(), all_return_phis.end(), return_phi) !=
- all_return_phis.end());
- HPhi* instance_phi =
- FindOrNull(all_return_phis.begin(), all_return_phis.end(), [&](HPhi* phi) {
- return phi != return_phi && phi->GetType() == DataType::Type::kReference;
- });
- ASSERT_NE(instance_phi, nullptr);
- HPhi* value_phi = FindOrNull(all_return_phis.begin(), all_return_phis.end(), [&](HPhi* phi) {
- return phi != return_phi && phi->GetType() == DataType::Type::kInt32;
- });
- ASSERT_NE(value_phi, nullptr);
- EXPECT_INS_EQ(
- instance_phi->InputAt(0),
- FindSingleInstruction<HNewInstance>(graph_, escape_route->GetSinglePredecessor()));
- // Check materialize block
- EXPECT_INS_EQ(FindSingleInstruction<HInstanceFieldSet>(
- graph_, escape_route->GetSinglePredecessor())
- ->InputAt(1),
- c12);
-
- EXPECT_INS_EQ(instance_phi->InputAt(1), graph_->GetNullConstant());
- EXPECT_INS_EQ(value_phi->InputAt(0), graph_->GetIntConstant(0));
- EXPECT_INS_EQ(value_phi->InputAt(1), c15);
- EXPECT_INS_REMOVED(store_noescape);
- EXPECT_INS_EQ(pred_get->GetTarget(), instance_phi);
- EXPECT_INS_EQ(pred_get->GetDefaultValue(), value_phi);
-}
-
-// // ENTRY
-// // To be moved
-// // NB Order important. By having alloc and store of obj1 before obj2 that
-// // ensure we'll build the materialization for obj1 first (just due to how
-// // we iterate.)
-// obj1 = new Obj();
-// obj2 = new Obj(); // has env[obj1]
-// // Swap the order of these
-// obj1.foo = param_obj1;
-// obj2.foo = param_obj2;
-// if (param1) {
-// // LEFT
-// obj2.foo = obj1;
-// if (param2) {
-// // LEFT_LEFT
-// escape(obj2);
-// } else {}
-// } else {}
-// return select(param3, obj1.foo, obj2.foo);
-// EXIT
-TEST_P(OrderDependentTestGroup, PredicatedUse) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "left_left"},
- {"left", "left_right"},
- {"left_left", "left_end"},
- {"left_right", "left_end"},
- {"left_end", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(right);
- GET_BLOCK(left);
- GET_BLOCK(left_left);
- GET_BLOCK(left_right);
- GET_BLOCK(left_end);
-#undef GET_BLOCK
- TestOrder order = GetParam();
- EnsurePredecessorOrder(breturn, {left_end, right});
- EnsurePredecessorOrder(left_end, {left_left, left_right});
- HInstruction* param1 = MakeParam(DataType::Type::kBool);
- HInstruction* param2 = MakeParam(DataType::Type::kBool);
- HInstruction* param3 = MakeParam(DataType::Type::kBool);
- HInstruction* param_obj1 = MakeParam(DataType::Type::kReference);
- HInstruction* param_obj2 = MakeParam(DataType::Type::kReference);
-
- HInstruction* cls1 = MakeClassLoad();
- HInstruction* cls2 = MakeClassLoad();
- HInstruction* new_inst1 = MakeNewInstance(cls1);
- HInstruction* new_inst2 = MakeNewInstance(cls2);
- HInstruction* store1 = MakeIFieldSet(new_inst1, param_obj1, MemberOffset(32));
- HInstruction* store2 = MakeIFieldSet(new_inst2, param_obj2, MemberOffset(32));
- HInstruction* null_const = graph_->GetNullConstant();
- HInstruction* if_inst = new (GetAllocator()) HIf(param1);
- entry->AddInstruction(cls1);
- entry->AddInstruction(cls2);
- entry->AddInstruction(new_inst1);
- entry->AddInstruction(new_inst2);
- if (order == TestOrder::kSameAsAlloc) {
- entry->AddInstruction(store1);
- entry->AddInstruction(store2);
- } else {
- entry->AddInstruction(store2);
- entry->AddInstruction(store1);
- }
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls1, {});
- cls2->CopyEnvironmentFrom(cls1->GetEnvironment());
- new_inst1->CopyEnvironmentFrom(cls1->GetEnvironment());
- new_inst2->CopyEnvironmentFrom(cls1->GetEnvironment());
-
- // This is the escape of new_inst1
- HInstruction* store_left = MakeIFieldSet(new_inst2, new_inst1, MemberOffset(32));
- HInstruction* if_left = new (GetAllocator()) HIf(param2);
- left->AddInstruction(store_left);
- left->AddInstruction(if_left);
-
- HInstruction* call_left_left = MakeInvoke(DataType::Type::kVoid, { new_inst2 });
- HInstruction* goto_left_left = new (GetAllocator()) HGoto();
- left_left->AddInstruction(call_left_left);
- left_left->AddInstruction(goto_left_left);
- call_left_left->CopyEnvironmentFrom(new_inst2->GetEnvironment());
-
- left_right->AddInstruction(new (GetAllocator()) HGoto());
- left_end->AddInstruction(new (GetAllocator()) HGoto());
-
- right->AddInstruction(new (GetAllocator()) HGoto());
-
- // Used to distinguish the pred-gets without having to dig through the
- // multiple phi layers.
- constexpr uint32_t kRead1DexPc = 10;
- constexpr uint32_t kRead2DexPc = 20;
- HInstruction* read1 =
- MakeIFieldGet(new_inst1, DataType::Type::kReference, MemberOffset(32), kRead1DexPc);
- read1->SetReferenceTypeInfo(
- ReferenceTypeInfo::CreateUnchecked(graph_->GetHandleCache()->GetObjectClassHandle(), false));
- HInstruction* read2 =
- MakeIFieldGet(new_inst2, DataType::Type::kReference, MemberOffset(32), kRead2DexPc);
- read2->SetReferenceTypeInfo(
- ReferenceTypeInfo::CreateUnchecked(graph_->GetHandleCache()->GetObjectClassHandle(), false));
- HInstruction* sel_return = new (GetAllocator()) HSelect(param3, read1, read2, 0);
- HInstruction* return_exit = new (GetAllocator()) HReturn(sel_return);
- breturn->AddInstruction(read1);
- breturn->AddInstruction(read2);
- breturn->AddInstruction(sel_return);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_RETAINED(call_left_left);
- EXPECT_INS_REMOVED(read1);
- EXPECT_INS_REMOVED(read2);
- EXPECT_INS_REMOVED(new_inst1);
- EXPECT_INS_REMOVED(new_inst2);
- EXPECT_TRUE(new_inst1->GetUses().empty()) << *new_inst1 << " " << new_inst1->GetUses();
- EXPECT_TRUE(new_inst2->GetUses().empty()) << *new_inst2 << " " << new_inst2->GetUses();
- EXPECT_INS_RETAINED(sel_return);
- // Make sure the selector is the same
- EXPECT_INS_EQ(sel_return->InputAt(2), param3);
- std::vector<HPredicatedInstanceFieldGet*> pred_gets;
- std::tie(pred_gets) = FindAllInstructions<HPredicatedInstanceFieldGet>(graph_, breturn);
- HPredicatedInstanceFieldGet* pred1 = FindOrNull(pred_gets.begin(), pred_gets.end(), [&](auto i) {
- return i->GetDexPc() == kRead1DexPc;
- });
- HPredicatedInstanceFieldGet* pred2 = FindOrNull(pred_gets.begin(), pred_gets.end(), [&](auto i) {
- return i->GetDexPc() == kRead2DexPc;
- });
- ASSERT_NE(pred1, nullptr);
- ASSERT_NE(pred2, nullptr);
- EXPECT_INS_EQ(sel_return->InputAt(0), pred2);
- EXPECT_INS_EQ(sel_return->InputAt(1), pred1);
- // Check targets
- EXPECT_TRUE(pred1->GetTarget()->IsPhi()) << pred1->DumpWithArgs();
- EXPECT_TRUE(pred2->GetTarget()->IsPhi()) << pred2->DumpWithArgs();
- HInstruction* mat1 = FindSingleInstruction<HNewInstance>(graph_, left->GetSinglePredecessor());
- HInstruction* mat2 =
- FindSingleInstruction<HNewInstance>(graph_, left_left->GetSinglePredecessor());
- EXPECT_INS_EQ(pred1->GetTarget()->InputAt(0), mat1);
- EXPECT_INS_EQ(pred1->GetTarget()->InputAt(1), null_const);
- EXPECT_TRUE(pred2->GetTarget()->InputAt(0)->IsPhi()) << pred2->DumpWithArgs();
- EXPECT_INS_EQ(pred2->GetTarget()->InputAt(0)->InputAt(0), mat2);
- EXPECT_INS_EQ(pred2->GetTarget()->InputAt(0)->InputAt(1), null_const);
- EXPECT_INS_EQ(pred2->GetTarget()->InputAt(1), null_const);
- // Check default values.
- EXPECT_TRUE(pred1->GetDefaultValue()->IsPhi()) << pred1->DumpWithArgs();
- EXPECT_TRUE(pred2->GetDefaultValue()->IsPhi()) << pred2->DumpWithArgs();
- EXPECT_INS_EQ(pred1->GetDefaultValue()->InputAt(0), null_const);
- EXPECT_INS_EQ(pred1->GetDefaultValue()->InputAt(1), param_obj1);
- EXPECT_TRUE(pred2->GetDefaultValue()->InputAt(0)->IsPhi()) << pred2->DumpWithArgs();
- EXPECT_INS_EQ(pred2->GetDefaultValue()->InputAt(0)->InputAt(0), null_const);
- EXPECT_INS_EQ(pred2->GetDefaultValue()->InputAt(0)->InputAt(1), mat1);
- EXPECT_INS_EQ(pred2->GetDefaultValue()->InputAt(1), param_obj2);
-}
-
-// // ENTRY
-// // To be moved
-// // NB Order important. By having alloc and store of obj1 before obj2 that
-// // ensure we'll build the materialization for obj1 first (just due to how
-// // we iterate.)
-// obj1 = new Obj();
-// obj.foo = 12;
-// obj2 = new Obj(); // has env[obj1]
-// obj2.foo = 15;
-// if (param1) {
-// // LEFT
-// // Need to update env to nullptr
-// escape(obj1/2);
-// if (param2) {
-// // LEFT_LEFT
-// escape(obj2/1);
-// } else {}
-// } else {}
-// return obj1.foo + obj2.foo;
-// EXIT
-TEST_P(OrderDependentTestGroup, PredicatedEnvUse) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "left_left"},
- {"left", "left_right"},
- {"left_left", "left_end"},
- {"left_right", "left_end"},
- {"left_end", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(right);
- GET_BLOCK(left);
- GET_BLOCK(left_left);
- GET_BLOCK(left_right);
- GET_BLOCK(left_end);
-#undef GET_BLOCK
- TestOrder order = GetParam();
- EnsurePredecessorOrder(breturn, {left_end, right});
- EnsurePredecessorOrder(left_end, {left_left, left_right});
- HInstruction* param1 = MakeParam(DataType::Type::kBool);
- HInstruction* param2 = MakeParam(DataType::Type::kBool);
- HInstruction* c12 = graph_->GetIntConstant(12);
- HInstruction* c15 = graph_->GetIntConstant(15);
-
- HInstruction* cls1 = MakeClassLoad();
- HInstruction* cls2 = MakeClassLoad();
- HInstruction* new_inst1 = MakeNewInstance(cls1);
- HInstruction* store1 = MakeIFieldSet(new_inst1, c12, MemberOffset(32));
- HInstruction* new_inst2 = MakeNewInstance(cls2);
- HInstruction* store2 = MakeIFieldSet(new_inst2, c15, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(param1);
- entry->AddInstruction(cls1);
- entry->AddInstruction(cls2);
- entry->AddInstruction(new_inst1);
- entry->AddInstruction(store1);
- entry->AddInstruction(new_inst2);
- entry->AddInstruction(store2);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls1, {});
- cls2->CopyEnvironmentFrom(cls1->GetEnvironment());
- new_inst1->CopyEnvironmentFrom(cls1->GetEnvironment());
- ManuallyBuildEnvFor(new_inst2, {new_inst1});
-
- HInstruction* first_inst = new_inst1;
- HInstruction* second_inst = new_inst2;
-
- if (order == TestOrder::kReverseOfAlloc) {
- std::swap(first_inst, second_inst);
- }
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { first_inst });
- HInstruction* if_left = new (GetAllocator()) HIf(param2);
- left->AddInstruction(call_left);
- left->AddInstruction(if_left);
- call_left->CopyEnvironmentFrom(new_inst2->GetEnvironment());
-
- HInstruction* call_left_left = MakeInvoke(DataType::Type::kVoid, { second_inst });
- HInstruction* goto_left_left = new (GetAllocator()) HGoto();
- left_left->AddInstruction(call_left_left);
- left_left->AddInstruction(goto_left_left);
- call_left_left->CopyEnvironmentFrom(new_inst2->GetEnvironment());
-
- left_right->AddInstruction(new (GetAllocator()) HGoto());
- left_end->AddInstruction(new (GetAllocator()) HGoto());
-
- right->AddInstruction(new (GetAllocator()) HGoto());
-
- HInstruction* read1 = MakeIFieldGet(new_inst1, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* read2 = MakeIFieldGet(new_inst2, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* add_return = new (GetAllocator()) HAdd(DataType::Type::kInt32, read1, read2);
- HInstruction* return_exit = new (GetAllocator()) HReturn(add_return);
- breturn->AddInstruction(read1);
- breturn->AddInstruction(read2);
- breturn->AddInstruction(add_return);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- HNewInstance* moved_new_inst1;
- HInstanceFieldSet* moved_set1;
- HNewInstance* moved_new_inst2;
- HInstanceFieldSet* moved_set2;
- HBasicBlock* first_mat_block = left->GetSinglePredecessor();
- HBasicBlock* second_mat_block = left_left->GetSinglePredecessor();
- if (order == TestOrder::kReverseOfAlloc) {
- std::swap(first_mat_block, second_mat_block);
- }
- std::tie(moved_new_inst1, moved_set1) =
- FindSingleInstructions<HNewInstance, HInstanceFieldSet>(graph_, first_mat_block);
- std::tie(moved_new_inst2, moved_set2) =
- FindSingleInstructions<HNewInstance, HInstanceFieldSet>(graph_, second_mat_block);
- std::vector<HPredicatedInstanceFieldGet*> pred_gets;
- std::vector<HPhi*> phis;
- std::tie(pred_gets, phis) = FindAllInstructions<HPredicatedInstanceFieldGet, HPhi>(graph_);
- EXPECT_NE(moved_new_inst1, nullptr);
- EXPECT_NE(moved_new_inst2, nullptr);
- EXPECT_NE(moved_set1, nullptr);
- EXPECT_NE(moved_set2, nullptr);
- EXPECT_INS_EQ(moved_set1->InputAt(1), c12);
- EXPECT_INS_EQ(moved_set2->InputAt(1), c15);
- EXPECT_INS_RETAINED(call_left);
- EXPECT_INS_RETAINED(call_left_left);
- EXPECT_INS_REMOVED(store1);
- EXPECT_INS_REMOVED(store2);
- EXPECT_INS_REMOVED(read1);
- EXPECT_INS_REMOVED(read2);
- EXPECT_INS_EQ(moved_new_inst2->GetEnvironment()->GetInstructionAt(0),
- order == TestOrder::kSameAsAlloc
- ? moved_new_inst1
- : static_cast<HInstruction*>(graph_->GetNullConstant()));
-}
-
-// // ENTRY
-// obj1 = new Obj1();
-// obj2 = new Obj2();
-// val1 = 3;
-// val2 = 13;
-// // The exact order the stores are written affects what the order we perform
-// // partial LSE on the values
-// obj1/2.field = val1/2;
-// obj2/1.field = val2/1;
-// if (parameter_value) {
-// // LEFT
-// escape(obj1);
-// escape(obj2);
-// } else {
-// // RIGHT
-// // ELIMINATE
-// obj1.field = 2;
-// obj2.field = 12;
-// }
-// EXIT
-// predicated-ELIMINATE
-// return obj1.field + obj2.field
-TEST_P(OrderDependentTestGroup, FieldSetOrderEnv) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
-#undef GET_BLOCK
- TestOrder order = GetParam();
- EnsurePredecessorOrder(breturn, {left, right});
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c2 = graph_->GetIntConstant(2);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c12 = graph_->GetIntConstant(12);
- HInstruction* c13 = graph_->GetIntConstant(13);
-
- HInstruction* cls1 = MakeClassLoad();
- HInstruction* cls2 = MakeClassLoad();
- HInstruction* new_inst1 = MakeNewInstance(cls1);
- HInstruction* new_inst2 = MakeNewInstance(cls2);
- HInstruction* write_entry1 = MakeIFieldSet(new_inst1, c3, MemberOffset(32));
- HInstruction* write_entry2 = MakeIFieldSet(new_inst2, c13, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls1);
- entry->AddInstruction(cls2);
- entry->AddInstruction(new_inst1);
- entry->AddInstruction(new_inst2);
- if (order == TestOrder::kSameAsAlloc) {
- entry->AddInstruction(write_entry1);
- entry->AddInstruction(write_entry2);
- } else {
- entry->AddInstruction(write_entry2);
- entry->AddInstruction(write_entry1);
- }
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls1, {});
- cls2->CopyEnvironmentFrom(cls1->GetEnvironment());
- new_inst1->CopyEnvironmentFrom(cls1->GetEnvironment());
- ManuallyBuildEnvFor(new_inst2, {new_inst1});
-
- HInstruction* call_left1 = MakeInvoke(DataType::Type::kVoid, { new_inst1 });
- HInstruction* call_left2 = MakeInvoke(DataType::Type::kVoid, { new_inst2 });
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left1);
- left->AddInstruction(call_left2);
- left->AddInstruction(goto_left);
- call_left1->CopyEnvironmentFrom(cls1->GetEnvironment());
- call_left2->CopyEnvironmentFrom(cls1->GetEnvironment());
-
- HInstruction* write_right1 = MakeIFieldSet(new_inst1, c2, MemberOffset(32));
- HInstruction* write_right2 = MakeIFieldSet(new_inst2, c12, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right1);
- right->AddInstruction(write_right2);
- right->AddInstruction(goto_right);
-
- HInstruction* read_bottom1 = MakeIFieldGet(new_inst1, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* read_bottom2 = MakeIFieldGet(new_inst2, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* combine =
- new (GetAllocator()) HAdd(DataType::Type::kInt32, read_bottom1, read_bottom2);
- HInstruction* return_exit = new (GetAllocator()) HReturn(combine);
- breturn->AddInstruction(read_bottom1);
- breturn->AddInstruction(read_bottom2);
- breturn->AddInstruction(combine);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_REMOVED(write_entry1);
- EXPECT_INS_REMOVED(write_entry2);
- EXPECT_INS_REMOVED(read_bottom1);
- EXPECT_INS_REMOVED(read_bottom2);
- EXPECT_INS_REMOVED(write_right1);
- EXPECT_INS_REMOVED(write_right2);
- EXPECT_INS_RETAINED(call_left1);
- EXPECT_INS_RETAINED(call_left2);
- std::vector<HPhi*> merges;
- std::vector<HPredicatedInstanceFieldGet*> pred_gets;
- std::vector<HNewInstance*> materializations;
- std::tie(merges, pred_gets) =
- FindAllInstructions<HPhi, HPredicatedInstanceFieldGet>(graph_, breturn);
- std::tie(materializations) = FindAllInstructions<HNewInstance>(graph_);
- ASSERT_EQ(merges.size(), 4u);
- ASSERT_EQ(pred_gets.size(), 2u);
- ASSERT_EQ(materializations.size(), 2u);
- HPhi* merge_value_return1 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kInt32 && p->InputAt(1) == c2;
- });
- HPhi* merge_value_return2 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kInt32 && p->InputAt(1) == c12;
- });
- HNewInstance* mat_alloc1 = FindOrNull(materializations.begin(),
- materializations.end(),
- [&](HNewInstance* n) { return n->InputAt(0) == cls1; });
- HNewInstance* mat_alloc2 = FindOrNull(materializations.begin(),
- materializations.end(),
- [&](HNewInstance* n) { return n->InputAt(0) == cls2; });
- ASSERT_NE(mat_alloc1, nullptr);
- ASSERT_NE(mat_alloc2, nullptr);
- HPhi* merge_alloc1 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kReference && p->InputAt(0) == mat_alloc1;
- });
- HPhi* merge_alloc2 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kReference && p->InputAt(0) == mat_alloc2;
- });
- ASSERT_NE(merge_alloc1, nullptr);
- HPredicatedInstanceFieldGet* pred_get1 =
- FindOrNull(pred_gets.begin(), pred_gets.end(), [&](HPredicatedInstanceFieldGet* pg) {
- return pg->GetTarget() == merge_alloc1;
- });
- ASSERT_NE(merge_alloc2, nullptr);
- HPredicatedInstanceFieldGet* pred_get2 =
- FindOrNull(pred_gets.begin(), pred_gets.end(), [&](HPredicatedInstanceFieldGet* pg) {
- return pg->GetTarget() == merge_alloc2;
- });
- ASSERT_NE(merge_value_return1, nullptr);
- ASSERT_NE(merge_value_return2, nullptr);
- EXPECT_INS_EQ(merge_alloc1->InputAt(1), graph_->GetNullConstant());
- EXPECT_INS_EQ(merge_alloc2->InputAt(1), graph_->GetNullConstant());
- ASSERT_NE(pred_get1, nullptr);
- EXPECT_INS_EQ(pred_get1->GetTarget(), merge_alloc1);
- EXPECT_INS_EQ(pred_get1->GetDefaultValue(), merge_value_return1)
- << " pred-get is: " << *pred_get1;
- EXPECT_INS_EQ(merge_value_return1->InputAt(0), graph_->GetIntConstant(0))
- << " merge val is: " << *merge_value_return1;
- EXPECT_INS_EQ(merge_value_return1->InputAt(1), c2) << " merge val is: " << *merge_value_return1;
- ASSERT_NE(pred_get2, nullptr);
- EXPECT_INS_EQ(pred_get2->GetTarget(), merge_alloc2);
- EXPECT_INS_EQ(pred_get2->GetDefaultValue(), merge_value_return2)
- << " pred-get is: " << *pred_get2;
- EXPECT_INS_EQ(merge_value_return2->InputAt(0), graph_->GetIntConstant(0))
- << " merge val is: " << *merge_value_return1;
- EXPECT_INS_EQ(merge_value_return2->InputAt(1), c12) << " merge val is: " << *merge_value_return1;
- EXPECT_INS_EQ(mat_alloc2->GetEnvironment()->GetInstructionAt(0), mat_alloc1);
-}
-
-// // TODO We can compile this better if we are better able to understand lifetimes.
-// // ENTRY
-// obj1 = new Obj1();
-// obj2 = new Obj2();
-// // The exact order the stores are written affects what the order we perform
-// // partial LSE on the values
-// obj{1,2}.var = param_obj;
-// obj{2,1}.var = param_obj;
-// if (param_1) {
-// // EARLY_RETURN
-// return;
-// }
-// // escape of obj1
-// obj2.var = obj1;
-// if (param_2) {
-// // escape of obj2 with a materialization that uses obj1
-// escape(obj2);
-// }
-// // EXIT
-// return;
-TEST_P(OrderDependentTestGroup, MaterializationMovedUse) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "early_return"},
- {"early_return", "exit"},
- {"entry", "escape_1"},
- {"escape_1", "escape_2"},
- {"escape_1", "escape_1_crit_break"},
- {"escape_1_crit_break", "exit"},
- {"escape_2", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(early_return);
- GET_BLOCK(escape_1);
- GET_BLOCK(escape_1_crit_break);
- GET_BLOCK(escape_2);
-#undef GET_BLOCK
- TestOrder order = GetParam();
- HInstruction* param_1 = MakeParam(DataType::Type::kBool);
- HInstruction* param_2 = MakeParam(DataType::Type::kBool);
- HInstruction* param_obj = MakeParam(DataType::Type::kReference);
-
- HInstruction* cls1 = MakeClassLoad();
- HInstruction* cls2 = MakeClassLoad();
- HInstruction* new_inst1 = MakeNewInstance(cls1);
- HInstruction* new_inst2 = MakeNewInstance(cls2);
- HInstruction* write_entry1 = MakeIFieldSet(new_inst1, param_obj, MemberOffset(32));
- HInstruction* write_entry2 = MakeIFieldSet(new_inst2, param_obj, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(param_1);
- entry->AddInstruction(cls1);
- entry->AddInstruction(cls2);
- entry->AddInstruction(new_inst1);
- entry->AddInstruction(new_inst2);
- if (order == TestOrder::kSameAsAlloc) {
- entry->AddInstruction(write_entry1);
- entry->AddInstruction(write_entry2);
- } else {
- entry->AddInstruction(write_entry2);
- entry->AddInstruction(write_entry1);
- }
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls1, {});
- cls2->CopyEnvironmentFrom(cls1->GetEnvironment());
- new_inst1->CopyEnvironmentFrom(cls1->GetEnvironment());
- new_inst2->CopyEnvironmentFrom(cls1->GetEnvironment());
-
- early_return->AddInstruction(new (GetAllocator()) HReturnVoid());
-
- HInstruction* escape_1_set = MakeIFieldSet(new_inst2, new_inst1, MemberOffset(32));
- HInstruction* escape_1_if = new (GetAllocator()) HIf(param_2);
- escape_1->AddInstruction(escape_1_set);
- escape_1->AddInstruction(escape_1_if);
-
- escape_1_crit_break->AddInstruction(new (GetAllocator()) HReturnVoid());
-
- HInstruction* escape_2_call = MakeInvoke(DataType::Type::kVoid, {new_inst2});
- HInstruction* escape_2_return = new (GetAllocator()) HReturnVoid();
- escape_2->AddInstruction(escape_2_call);
- escape_2->AddInstruction(escape_2_return);
- escape_2_call->CopyEnvironmentFrom(cls1->GetEnvironment());
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_REMOVED(new_inst1);
- EXPECT_INS_REMOVED(new_inst2);
- EXPECT_INS_REMOVED(write_entry1);
- EXPECT_INS_REMOVED(write_entry2);
- EXPECT_INS_REMOVED(escape_1_set);
- EXPECT_INS_RETAINED(escape_2_call);
-
- HInstruction* obj1_mat =
- FindSingleInstruction<HNewInstance>(graph_, escape_1->GetSinglePredecessor());
- HInstruction* obj1_set =
- FindSingleInstruction<HInstanceFieldSet>(graph_, escape_1->GetSinglePredecessor());
- HInstruction* obj2_mat =
- FindSingleInstruction<HNewInstance>(graph_, escape_2->GetSinglePredecessor());
- HInstruction* obj2_set =
- FindSingleInstruction<HInstanceFieldSet>(graph_, escape_2->GetSinglePredecessor());
- ASSERT_TRUE(obj1_mat != nullptr);
- ASSERT_TRUE(obj2_mat != nullptr);
- ASSERT_TRUE(obj1_set != nullptr);
- ASSERT_TRUE(obj2_set != nullptr);
- EXPECT_INS_EQ(obj1_set->InputAt(0), obj1_mat);
- EXPECT_INS_EQ(obj1_set->InputAt(1), param_obj);
- EXPECT_INS_EQ(obj2_set->InputAt(0), obj2_mat);
- EXPECT_INS_EQ(obj2_set->InputAt(1), obj1_mat);
-}
-
-INSTANTIATE_TEST_SUITE_P(LoadStoreEliminationTest,
- OrderDependentTestGroup,
- testing::Values(TestOrder::kSameAsAlloc, TestOrder::kReverseOfAlloc));
-
-// // ENTRY
-// // To be moved
-// obj = new Obj();
-// obj.foo = 12;
-// if (parameter_value) {
-// // LEFT
-// escape(obj);
-// } else {}
-// EXIT
-TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"right", "breturn"},
- {"left", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {left, right});
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c12 = graph_->GetIntConstant(12);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* store = MakeIFieldSet(new_inst, c12, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(store);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- right->AddInstruction(new (GetAllocator()) HGoto());
-
- HInstruction* return_exit = new (GetAllocator()) HReturnVoid();
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- HNewInstance* moved_new_inst = nullptr;
- HInstanceFieldSet* moved_set = nullptr;
- std::tie(moved_new_inst, moved_set) =
- FindSingleInstructions<HNewInstance, HInstanceFieldSet>(graph_);
- EXPECT_NE(moved_new_inst, nullptr);
- EXPECT_NE(moved_set, nullptr);
- EXPECT_INS_RETAINED(call_left);
- // store removed or moved.
- EXPECT_NE(store->GetBlock(), entry);
- // New-inst removed or moved.
- EXPECT_NE(new_inst->GetBlock(), entry);
- EXPECT_INS_EQ(moved_set->InputAt(0), moved_new_inst);
- EXPECT_INS_EQ(moved_set->InputAt(1), c12);
-}
-
-// // ENTRY
-// // To be moved
-// obj = new Obj();
-// obj.foo = 12;
-// if (parameter_value) {
-// // LEFT
-// escape(obj);
-// }
-// EXIT
-// int a = obj.foo;
-// obj.foo = 13;
-// noescape();
-// int b = obj.foo;
-// obj.foo = 14;
-// noescape();
-// int c = obj.foo;
-// obj.foo = 15;
-// noescape();
-// return a + b + c
-TEST_F(LoadStoreEliminationTest, MutiPartialLoadStore) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"right", "breturn"},
- {"left", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {left, right});
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c12 = graph_->GetIntConstant(12);
- HInstruction* c13 = graph_->GetIntConstant(13);
- HInstruction* c14 = graph_->GetIntConstant(14);
- HInstruction* c15 = graph_->GetIntConstant(15);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* store = MakeIFieldSet(new_inst, c12, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(store);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(goto_right);
-
- HInstruction* a_val = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* a_reset = MakeIFieldSet(new_inst, c13, MemberOffset(32));
- HInstruction* a_noescape = MakeInvoke(DataType::Type::kVoid, {});
- HInstruction* b_val = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* b_reset = MakeIFieldSet(new_inst, c14, MemberOffset(32));
- HInstruction* b_noescape = MakeInvoke(DataType::Type::kVoid, {});
- HInstruction* c_val = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* c_reset = MakeIFieldSet(new_inst, c15, MemberOffset(32));
- HInstruction* c_noescape = MakeInvoke(DataType::Type::kVoid, {});
- HInstruction* add_1_exit = new (GetAllocator()) HAdd(DataType::Type::kInt32, a_val, b_val);
- HInstruction* add_2_exit = new (GetAllocator()) HAdd(DataType::Type::kInt32, c_val, add_1_exit);
- HInstruction* return_exit = new (GetAllocator()) HReturn(add_2_exit);
- breturn->AddInstruction(a_val);
- breturn->AddInstruction(a_reset);
- breturn->AddInstruction(a_noescape);
- breturn->AddInstruction(b_val);
- breturn->AddInstruction(b_reset);
- breturn->AddInstruction(b_noescape);
- breturn->AddInstruction(c_val);
- breturn->AddInstruction(c_reset);
- breturn->AddInstruction(c_noescape);
- breturn->AddInstruction(add_1_exit);
- breturn->AddInstruction(add_2_exit);
- breturn->AddInstruction(return_exit);
- ManuallyBuildEnvFor(a_noescape, {new_inst, a_val});
- ManuallyBuildEnvFor(b_noescape, {new_inst, a_val, b_val});
- ManuallyBuildEnvFor(c_noescape, {new_inst, a_val, b_val, c_val});
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- HNewInstance* moved_new_inst = nullptr;
- HInstanceFieldSet* moved_set = nullptr;
- std::tie(moved_new_inst, moved_set) =
- FindSingleInstructions<HNewInstance, HInstanceFieldSet>(graph_, left->GetSinglePredecessor());
- std::vector<HPredicatedInstanceFieldGet*> pred_gets;
- std::vector<HInstanceFieldSet*> pred_sets;
- std::vector<HPhi*> return_phis;
- std::tie(return_phis, pred_gets, pred_sets) =
- FindAllInstructions<HPhi, HPredicatedInstanceFieldGet, HInstanceFieldSet>(graph_, breturn);
- ASSERT_EQ(return_phis.size(), 2u);
- HPhi* inst_phi = return_phis[0];
- HPhi* val_phi = return_phis[1];
- if (inst_phi->GetType() != DataType::Type::kReference) {
- std::swap(inst_phi, val_phi);
- }
- ASSERT_NE(moved_new_inst, nullptr);
- EXPECT_INS_EQ(inst_phi->InputAt(0), moved_new_inst);
- EXPECT_INS_EQ(inst_phi->InputAt(1), graph_->GetNullConstant());
- EXPECT_INS_EQ(val_phi->InputAt(0), graph_->GetIntConstant(0));
- EXPECT_EQ(val_phi->InputAt(1), c12);
- ASSERT_EQ(pred_gets.size(), 3u);
- ASSERT_EQ(pred_gets.size(), pred_sets.size());
- std::vector<HInstruction*> set_values{c13, c14, c15};
- std::vector<HInstruction*> get_values{val_phi, c13, c14};
- ASSERT_NE(moved_set, nullptr);
- EXPECT_INS_EQ(moved_set->InputAt(0), moved_new_inst);
- EXPECT_INS_EQ(moved_set->InputAt(1), c12);
- EXPECT_INS_RETAINED(call_left);
- // store removed or moved.
- EXPECT_NE(store->GetBlock(), entry);
- // New-inst removed or moved.
- EXPECT_NE(new_inst->GetBlock(), entry);
- for (auto [get, val] : ZipLeft(MakeIterationRange(pred_gets), MakeIterationRange(get_values))) {
- EXPECT_INS_EQ(get->GetDefaultValue(), val);
- }
- for (auto [set, val] : ZipLeft(MakeIterationRange(pred_sets), MakeIterationRange(set_values))) {
- EXPECT_INS_EQ(set->InputAt(1), val);
- EXPECT_TRUE(set->GetIsPredicatedSet()) << *set;
- }
- EXPECT_INS_RETAINED(a_noescape);
- EXPECT_INS_RETAINED(b_noescape);
- EXPECT_INS_RETAINED(c_noescape);
- EXPECT_INS_EQ(add_1_exit->InputAt(0), pred_gets[0]);
- EXPECT_INS_EQ(add_1_exit->InputAt(1), pred_gets[1]);
- EXPECT_INS_EQ(add_2_exit->InputAt(0), pred_gets[2]);
-
- EXPECT_EQ(a_noescape->GetEnvironment()->Size(), 2u);
- EXPECT_INS_EQ(a_noescape->GetEnvironment()->GetInstructionAt(0), inst_phi);
- EXPECT_INS_EQ(a_noescape->GetEnvironment()->GetInstructionAt(1), pred_gets[0]);
- EXPECT_EQ(b_noescape->GetEnvironment()->Size(), 3u);
- EXPECT_INS_EQ(b_noescape->GetEnvironment()->GetInstructionAt(0), inst_phi);
- EXPECT_INS_EQ(b_noescape->GetEnvironment()->GetInstructionAt(1), pred_gets[0]);
- EXPECT_INS_EQ(b_noescape->GetEnvironment()->GetInstructionAt(2), pred_gets[1]);
- EXPECT_EQ(c_noescape->GetEnvironment()->Size(), 4u);
- EXPECT_INS_EQ(c_noescape->GetEnvironment()->GetInstructionAt(0), inst_phi);
- EXPECT_INS_EQ(c_noescape->GetEnvironment()->GetInstructionAt(1), pred_gets[0]);
- EXPECT_INS_EQ(c_noescape->GetEnvironment()->GetInstructionAt(2), pred_gets[1]);
- EXPECT_INS_EQ(c_noescape->GetEnvironment()->GetInstructionAt(3), pred_gets[2]);
-}
-
-// // ENTRY
-// // To be moved
-// obj = new Obj();
-// obj.foo = 12;
-// int a = obj.foo;
-// obj.foo = 13;
-// noescape();
-// int b = obj.foo;
-// obj.foo = 14;
-// noescape();
-// int c = obj.foo;
-// obj.foo = 15;
-// noescape();
-// if (parameter_value) {
-// // LEFT
-// escape(obj);
-// }
-// EXIT
-// return a + b + c + obj.foo
-TEST_F(LoadStoreEliminationTest, MutiPartialLoadStore2) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- // Need to have an actual entry block since we check env-layout and the way we
- // add constants would screw this up otherwise.
- AdjacencyListGraph blks(SetupFromAdjacencyList("start",
- "exit",
- {{"start", "entry"},
- {"entry", "left"},
- {"entry", "right"},
- {"right", "breturn"},
- {"left", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(start);
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {left, right});
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c12 = graph_->GetIntConstant(12);
- HInstruction* c13 = graph_->GetIntConstant(13);
- HInstruction* c14 = graph_->GetIntConstant(14);
- HInstruction* c15 = graph_->GetIntConstant(15);
-
- HInstruction* start_suspend = new (GetAllocator()) HSuspendCheck();
- HInstruction* start_goto = new (GetAllocator()) HGoto();
-
- start->AddInstruction(start_suspend);
- start->AddInstruction(start_goto);
- ManuallyBuildEnvFor(start_suspend, {});
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* store = MakeIFieldSet(new_inst, c12, MemberOffset(32));
-
- HInstruction* a_val = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* a_reset = MakeIFieldSet(new_inst, c13, MemberOffset(32));
- HInstruction* a_noescape = MakeInvoke(DataType::Type::kVoid, {});
- HInstruction* b_val = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* b_reset = MakeIFieldSet(new_inst, c14, MemberOffset(32));
- HInstruction* b_noescape = MakeInvoke(DataType::Type::kVoid, {});
- HInstruction* c_val = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* c_reset = MakeIFieldSet(new_inst, c15, MemberOffset(32));
- HInstruction* c_noescape = MakeInvoke(DataType::Type::kVoid, {});
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(store);
- entry->AddInstruction(a_val);
- entry->AddInstruction(a_reset);
- entry->AddInstruction(a_noescape);
- entry->AddInstruction(b_val);
- entry->AddInstruction(b_reset);
- entry->AddInstruction(b_noescape);
- entry->AddInstruction(c_val);
- entry->AddInstruction(c_reset);
- entry->AddInstruction(c_noescape);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
- ManuallyBuildEnvFor(a_noescape, {new_inst, a_val});
- ManuallyBuildEnvFor(b_noescape, {new_inst, a_val, b_val});
- ManuallyBuildEnvFor(c_noescape, {new_inst, a_val, b_val, c_val});
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(c_noescape->GetEnvironment());
-
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(goto_right);
-
- HInstruction* val_exit = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* add_1_exit = new (GetAllocator()) HAdd(DataType::Type::kInt32, a_val, b_val);
- HInstruction* add_2_exit = new (GetAllocator()) HAdd(DataType::Type::kInt32, c_val, add_1_exit);
- HInstruction* add_3_exit =
- new (GetAllocator()) HAdd(DataType::Type::kInt32, val_exit, add_2_exit);
- HInstruction* return_exit = new (GetAllocator()) HReturn(add_3_exit);
- breturn->AddInstruction(val_exit);
- breturn->AddInstruction(add_1_exit);
- breturn->AddInstruction(add_2_exit);
- breturn->AddInstruction(add_3_exit);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- HNewInstance* moved_new_inst = nullptr;
- HInstanceFieldSet* moved_set = nullptr;
- std::tie(moved_new_inst, moved_set) =
- FindSingleInstructions<HNewInstance, HInstanceFieldSet>(graph_, left->GetSinglePredecessor());
- std::vector<HPredicatedInstanceFieldGet*> pred_gets;
- std::vector<HInstanceFieldSet*> pred_sets;
- std::vector<HPhi*> return_phis;
- std::tie(return_phis, pred_gets, pred_sets) =
- FindAllInstructions<HPhi, HPredicatedInstanceFieldGet, HInstanceFieldSet>(graph_, breturn);
- ASSERT_EQ(return_phis.size(), 2u);
- HPhi* inst_phi = return_phis[0];
- HPhi* val_phi = return_phis[1];
- if (inst_phi->GetType() != DataType::Type::kReference) {
- std::swap(inst_phi, val_phi);
- }
- ASSERT_NE(moved_new_inst, nullptr);
- EXPECT_INS_EQ(inst_phi->InputAt(0), moved_new_inst);
- EXPECT_INS_EQ(inst_phi->InputAt(1), graph_->GetNullConstant());
- EXPECT_INS_EQ(val_phi->InputAt(0), graph_->GetIntConstant(0));
- EXPECT_INS_EQ(val_phi->InputAt(1), c15);
- ASSERT_EQ(pred_gets.size(), 1u);
- ASSERT_EQ(pred_sets.size(), 0u);
- ASSERT_NE(moved_set, nullptr);
- EXPECT_INS_EQ(moved_set->InputAt(0), moved_new_inst);
- EXPECT_INS_EQ(moved_set->InputAt(1), c15);
- EXPECT_INS_RETAINED(call_left);
- // store removed or moved.
- EXPECT_NE(store->GetBlock(), entry);
- // New-inst removed or moved.
- EXPECT_NE(new_inst->GetBlock(), entry);
- EXPECT_INS_REMOVED(a_val);
- EXPECT_INS_REMOVED(b_val);
- EXPECT_INS_REMOVED(c_val);
- EXPECT_INS_RETAINED(a_noescape);
- EXPECT_INS_RETAINED(b_noescape);
- EXPECT_INS_RETAINED(c_noescape);
- EXPECT_INS_EQ(add_1_exit->InputAt(0), c12);
- EXPECT_INS_EQ(add_1_exit->InputAt(1), c13);
- EXPECT_INS_EQ(add_2_exit->InputAt(0), c14);
- EXPECT_INS_EQ(add_2_exit->InputAt(1), add_1_exit);
- EXPECT_INS_EQ(add_3_exit->InputAt(0), pred_gets[0]);
- EXPECT_INS_EQ(pred_gets[0]->GetDefaultValue(), val_phi);
- EXPECT_INS_EQ(add_3_exit->InputAt(1), add_2_exit);
- EXPECT_EQ(a_noescape->GetEnvironment()->Size(), 2u);
- EXPECT_INS_EQ(a_noescape->GetEnvironment()->GetInstructionAt(0), graph_->GetNullConstant());
- EXPECT_INS_EQ(a_noescape->GetEnvironment()->GetInstructionAt(1), c12);
- EXPECT_EQ(b_noescape->GetEnvironment()->Size(), 3u);
- EXPECT_INS_EQ(b_noescape->GetEnvironment()->GetInstructionAt(0), graph_->GetNullConstant());
- EXPECT_INS_EQ(b_noescape->GetEnvironment()->GetInstructionAt(1), c12);
- EXPECT_INS_EQ(b_noescape->GetEnvironment()->GetInstructionAt(2), c13);
- EXPECT_EQ(c_noescape->GetEnvironment()->Size(), 4u);
- EXPECT_INS_EQ(c_noescape->GetEnvironment()->GetInstructionAt(0), graph_->GetNullConstant());
- EXPECT_INS_EQ(c_noescape->GetEnvironment()->GetInstructionAt(1), c12);
- EXPECT_INS_EQ(c_noescape->GetEnvironment()->GetInstructionAt(2), c13);
- EXPECT_INS_EQ(c_noescape->GetEnvironment()->GetInstructionAt(3), c14);
-}
-
-// // ENTRY
-// // To be moved
-// obj = new Obj();
-// // Transforms required for creation non-trivial and unimportant
-// if (parameter_value) {
-// obj.foo = 10
-// } else {
-// obj.foo = 12;
-// }
-// if (parameter_value_2) {
-// escape(obj);
-// }
-// EXIT
-TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc2) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left_set"},
- {"entry", "right_set"},
- {"left_set", "merge_crit_break"},
- {"right_set", "merge_crit_break"},
- {"merge_crit_break", "merge"},
- {"merge", "escape"},
- {"escape", "breturn"},
- {"merge", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left_set);
- GET_BLOCK(right_set);
- GET_BLOCK(merge);
- GET_BLOCK(merge_crit_break);
- GET_BLOCK(escape);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {merge, escape});
- EnsurePredecessorOrder(merge_crit_break, {left_set, right_set});
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* bool_value_2 = MakeParam(DataType::Type::kBool);
- HInstruction* c10 = graph_->GetIntConstant(10);
- HInstruction* c12 = graph_->GetIntConstant(12);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* store_left = MakeIFieldSet(new_inst, c10, MemberOffset(32));
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left_set->AddInstruction(store_left);
- left_set->AddInstruction(goto_left);
-
- HInstruction* store_right = MakeIFieldSet(new_inst, c12, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right_set->AddInstruction(store_right);
- right_set->AddInstruction(goto_right);
-
- merge_crit_break->AddInstruction(new (GetAllocator()) HGoto());
- HInstruction* if_merge = new (GetAllocator()) HIf(bool_value_2);
- merge->AddInstruction(if_merge);
-
- HInstruction* escape_instruction = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* escape_goto = new (GetAllocator()) HGoto();
- escape->AddInstruction(escape_instruction);
- escape->AddInstruction(escape_goto);
- escape_instruction->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* return_exit = new (GetAllocator()) HReturnVoid();
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- HNewInstance* moved_new_inst;
- HInstanceFieldSet* moved_set;
- std::tie(moved_new_inst, moved_set) =
- FindSingleInstructions<HNewInstance, HInstanceFieldSet>(graph_);
- HPhi* merge_phi = FindSingleInstruction<HPhi>(graph_, merge_crit_break);
- HPhi* alloc_phi = FindSingleInstruction<HPhi>(graph_, breturn);
- EXPECT_INS_EQ(moved_new_inst, moved_set->InputAt(0));
- ASSERT_NE(alloc_phi, nullptr);
- EXPECT_EQ(alloc_phi->InputAt(0), graph_->GetNullConstant())
- << alloc_phi->GetBlock()->GetPredecessors()[0]->GetBlockId() << " " << *alloc_phi;
- EXPECT_TRUE(alloc_phi->InputAt(1)->IsNewInstance()) << *alloc_phi;
- ASSERT_NE(merge_phi, nullptr);
- EXPECT_EQ(merge_phi->InputCount(), 2u);
- EXPECT_INS_EQ(merge_phi->InputAt(0), c10);
- EXPECT_INS_EQ(merge_phi->InputAt(1), c12);
- EXPECT_TRUE(merge_phi->GetUses().HasExactlyOneElement());
- EXPECT_INS_EQ(merge_phi->GetUses().front().GetUser(), moved_set);
- EXPECT_INS_RETAINED(escape_instruction);
- EXPECT_INS_EQ(escape_instruction->InputAt(0), moved_new_inst);
- // store removed or moved.
- EXPECT_NE(store_left->GetBlock(), left_set);
- EXPECT_NE(store_right->GetBlock(), left_set);
- // New-inst removed or moved.
- EXPECT_NE(new_inst->GetBlock(), entry);
-}
-
-// // ENTRY
-// // To be moved
-// obj = new Obj();
-// switch(args) {
-// default:
-// return obj.a;
-// case b:
-// obj.a = 5; break;
-// case c:
-// obj.b = 4; break;
-// }
-// escape(obj);
-// return obj.a;
-// EXIT
-TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc3) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "early_return"},
- {"entry", "set_one"},
- {"entry", "set_two"},
- {"early_return", "exit"},
- {"set_one", "escape"},
- {"set_two", "escape"},
- {"escape", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(escape);
- GET_BLOCK(early_return);
- GET_BLOCK(set_one);
- GET_BLOCK(set_two);
-#undef GET_BLOCK
- EnsurePredecessorOrder(escape, {set_one, set_two});
- HInstruction* int_val = MakeParam(DataType::Type::kInt32);
- HInstruction* c0 = graph_->GetIntConstant(0);
- HInstruction* c4 = graph_->GetIntConstant(4);
- HInstruction* c5 = graph_->GetIntConstant(5);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, int_val);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(switch_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* store_one = MakeIFieldSet(new_inst, c4, MemberOffset(32));
- HInstruction* goto_one = new (GetAllocator()) HGoto();
- set_one->AddInstruction(store_one);
- set_one->AddInstruction(goto_one);
-
- HInstruction* store_two = MakeIFieldSet(new_inst, c5, MemberOffset(32));
- HInstruction* goto_two = new (GetAllocator()) HGoto();
- set_two->AddInstruction(store_two);
- set_two->AddInstruction(goto_two);
-
- HInstruction* read_early = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_early = new (GetAllocator()) HReturn(read_early);
- early_return->AddInstruction(read_early);
- early_return->AddInstruction(return_early);
-
- HInstruction* escape_instruction = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* read_escape = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_escape = new (GetAllocator()) HReturn(read_escape);
- escape->AddInstruction(escape_instruction);
- escape->AddInstruction(read_escape);
- escape->AddInstruction(return_escape);
- escape_instruction->CopyEnvironmentFrom(cls->GetEnvironment());
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- // Each escaping switch path gets its own materialization block.
- // Blocks:
- // early_return(5) -> [exit(4)]
- // entry(3) -> [early_return(5), <Unnamed>(9), <Unnamed>(10)]
- // escape(8) -> [exit(4)]
- // exit(4) -> []
- // set_one(6) -> [escape(8)]
- // set_two(7) -> [escape(8)]
- // <Unnamed>(10) -> [set_two(7)]
- // <Unnamed>(9) -> [set_one(6)]
- HBasicBlock* materialize_one = set_one->GetSinglePredecessor();
- HBasicBlock* materialize_two = set_two->GetSinglePredecessor();
- HNewInstance* materialization_ins_one =
- FindSingleInstruction<HNewInstance>(graph_, materialize_one);
- HNewInstance* materialization_ins_two =
- FindSingleInstruction<HNewInstance>(graph_, materialize_two);
- HPhi* new_phi = FindSingleInstruction<HPhi>(graph_, escape);
- EXPECT_NE(materialization_ins_one, nullptr);
- EXPECT_NE(materialization_ins_two, nullptr);
- EXPECT_EQ(materialization_ins_one, new_phi->InputAt(0))
- << *materialization_ins_one << " vs " << *new_phi;
- EXPECT_EQ(materialization_ins_two, new_phi->InputAt(1))
- << *materialization_ins_two << " vs " << *new_phi;
-
- EXPECT_INS_RETAINED(escape_instruction);
- EXPECT_INS_RETAINED(read_escape);
- EXPECT_EQ(read_escape->InputAt(0), new_phi) << *new_phi << " vs " << *read_escape->InputAt(0);
- EXPECT_EQ(store_one->InputAt(0), materialization_ins_one);
- EXPECT_EQ(store_two->InputAt(0), materialization_ins_two);
- EXPECT_EQ(escape_instruction->InputAt(0), new_phi);
- EXPECT_INS_REMOVED(read_early);
- EXPECT_EQ(return_early->InputAt(0), c0);
-}
-
-// // ENTRY
-// // To be moved
-// obj = new Obj();
-// switch(args) {
-// case a:
-// // set_one_and_escape
-// obj.a = 5;
-// escape(obj);
-// // FALLTHROUGH
-// case c:
-// // set_two
-// obj.a = 4; break;
-// default:
-// return obj.a;
-// }
-// escape(obj);
-// return obj.a;
-// EXIT
-TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc4) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- // Break the critical edge between entry and set_two with the
- // set_two_critical_break node. Graph simplification would do this for us if
- // we didn't do it manually. This way we have a nice-name for debugging and
- // testing.
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "early_return"},
- {"entry", "set_one_and_escape"},
- {"entry", "set_two_critical_break"},
- {"set_two_critical_break", "set_two"},
- {"early_return", "exit"},
- {"set_one_and_escape", "set_two"},
- {"set_two", "escape"},
- {"escape", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(escape);
- GET_BLOCK(early_return);
- GET_BLOCK(set_one_and_escape);
- GET_BLOCK(set_two);
- GET_BLOCK(set_two_critical_break);
-#undef GET_BLOCK
- EnsurePredecessorOrder(set_two, {set_one_and_escape, set_two_critical_break});
- HInstruction* int_val = MakeParam(DataType::Type::kInt32);
- HInstruction* c0 = graph_->GetIntConstant(0);
- HInstruction* c4 = graph_->GetIntConstant(4);
- HInstruction* c5 = graph_->GetIntConstant(5);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, int_val);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(switch_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* store_one = MakeIFieldSet(new_inst, c4, MemberOffset(32));
- HInstruction* escape_one = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_one = new (GetAllocator()) HGoto();
- set_one_and_escape->AddInstruction(store_one);
- set_one_and_escape->AddInstruction(escape_one);
- set_one_and_escape->AddInstruction(goto_one);
- escape_one->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* goto_crit_break = new (GetAllocator()) HGoto();
- set_two_critical_break->AddInstruction(goto_crit_break);
-
- HInstruction* store_two = MakeIFieldSet(new_inst, c5, MemberOffset(32));
- HInstruction* goto_two = new (GetAllocator()) HGoto();
- set_two->AddInstruction(store_two);
- set_two->AddInstruction(goto_two);
-
- HInstruction* read_early = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_early = new (GetAllocator()) HReturn(read_early);
- early_return->AddInstruction(read_early);
- early_return->AddInstruction(return_early);
-
- HInstruction* escape_instruction = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* read_escape = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_escape = new (GetAllocator()) HReturn(read_escape);
- escape->AddInstruction(escape_instruction);
- escape->AddInstruction(read_escape);
- escape->AddInstruction(return_escape);
- escape_instruction->CopyEnvironmentFrom(cls->GetEnvironment());
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_REMOVED(read_early);
- EXPECT_EQ(return_early->InputAt(0), c0);
- // Each escaping switch path gets its own materialization block.
- // Blocks:
- // early_return(5) -> [exit(4)]
- // entry(3) -> [early_return(5), <Unnamed>(10), <Unnamed>(11)]
- // escape(9) -> [exit(4)]
- // exit(4) -> []
- // set_one_and_escape(6) -> [set_two(8)]
- // set_two(8) -> [escape(9)]
- // set_two_critical_break(7) -> [set_two(8)]
- // <Unnamed>(11) -> [set_two_critical_break(7)]
- // <Unnamed>(10) -> [set_one_and_escape(6)]
- HBasicBlock* materialize_one = set_one_and_escape->GetSinglePredecessor();
- HBasicBlock* materialize_two = set_two_critical_break->GetSinglePredecessor();
- HNewInstance* materialization_ins_one =
- FindSingleInstruction<HNewInstance>(graph_, materialize_one);
- HNewInstance* materialization_ins_two =
- FindSingleInstruction<HNewInstance>(graph_, materialize_two);
- HPhi* new_phi = FindSingleInstruction<HPhi>(graph_, set_two);
- ASSERT_NE(new_phi, nullptr);
- ASSERT_NE(materialization_ins_one, nullptr);
- ASSERT_NE(materialization_ins_two, nullptr);
- EXPECT_INS_EQ(materialization_ins_one, new_phi->InputAt(0));
- EXPECT_INS_EQ(materialization_ins_two, new_phi->InputAt(1));
-
- EXPECT_INS_EQ(store_one->InputAt(0), materialization_ins_one);
- EXPECT_INS_EQ(store_two->InputAt(0), new_phi) << *store_two << " vs " << *new_phi;
- EXPECT_INS_EQ(escape_instruction->InputAt(0), new_phi);
- EXPECT_INS_RETAINED(escape_one);
- EXPECT_INS_EQ(escape_one->InputAt(0), materialization_ins_one);
- EXPECT_INS_RETAINED(escape_instruction);
- EXPECT_INS_RETAINED(read_escape);
- EXPECT_EQ(read_escape->InputAt(0), new_phi) << *new_phi << " vs " << *read_escape->InputAt(0);
-}
-
-// // ENTRY
-// // To be moved
-// obj = new Obj();
-// switch(args) {
-// case a:
-// // set_one
-// obj.a = 5;
-// // nb passthrough
-// case c:
-// // set_two_and_escape
-// obj.a += 4;
-// escape(obj);
-// break;
-// default:
-// obj.a = 10;
-// }
-// return obj.a;
-// EXIT
-TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc5) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- // Break the critical edge between entry and set_two with the
- // set_two_critical_break node. Graph simplification would do this for us if
- // we didn't do it manually. This way we have a nice-name for debugging and
- // testing.
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "set_noescape"},
- {"entry", "set_one"},
- {"entry", "set_two_critical_break"},
- {"set_two_critical_break", "set_two_and_escape"},
- {"set_noescape", "breturn"},
- {"set_one", "set_two_and_escape"},
- {"set_two_and_escape", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(set_noescape);
- GET_BLOCK(set_one);
- GET_BLOCK(set_two_and_escape);
- GET_BLOCK(set_two_critical_break);
-#undef GET_BLOCK
- EnsurePredecessorOrder(set_two_and_escape, {set_one, set_two_critical_break});
- EnsurePredecessorOrder(breturn, {set_two_and_escape, set_noescape});
- HInstruction* int_val = MakeParam(DataType::Type::kInt32);
- HInstruction* c0 = graph_->GetIntConstant(0);
- HInstruction* c4 = graph_->GetIntConstant(4);
- HInstruction* c5 = graph_->GetIntConstant(5);
- HInstruction* c10 = graph_->GetIntConstant(10);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, int_val);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(switch_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* store_one = MakeIFieldSet(new_inst, c5, MemberOffset(32));
- HInstruction* goto_one = new (GetAllocator()) HGoto();
- set_one->AddInstruction(store_one);
- set_one->AddInstruction(goto_one);
-
- HInstruction* goto_crit_break = new (GetAllocator()) HGoto();
- set_two_critical_break->AddInstruction(goto_crit_break);
-
- HInstruction* get_two = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* add_two = new (GetAllocator()) HAdd(DataType::Type::kInt32, get_two, c4);
- HInstruction* store_two = MakeIFieldSet(new_inst, add_two, MemberOffset(32));
- HInstruction* escape_two = MakeInvoke(DataType::Type::kVoid, {new_inst});
- HInstruction* goto_two = new (GetAllocator()) HGoto();
- set_two_and_escape->AddInstruction(get_two);
- set_two_and_escape->AddInstruction(add_two);
- set_two_and_escape->AddInstruction(store_two);
- set_two_and_escape->AddInstruction(escape_two);
- set_two_and_escape->AddInstruction(goto_two);
- escape_two->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* store_noescape = MakeIFieldSet(new_inst, c10, MemberOffset(32));
- HInstruction* goto_noescape = new (GetAllocator()) HGoto();
- set_noescape->AddInstruction(store_noescape);
- set_noescape->AddInstruction(goto_noescape);
-
- HInstruction* read_breturn = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_breturn = new (GetAllocator()) HReturn(read_breturn);
- breturn->AddInstruction(read_breturn);
- breturn->AddInstruction(return_breturn);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- // Normal LSE can get rid of these two.
- EXPECT_INS_REMOVED(store_one);
- EXPECT_INS_REMOVED(get_two);
- EXPECT_INS_RETAINED(add_two);
- EXPECT_TRUE(add_two->InputAt(0)->IsPhi());
- EXPECT_INS_EQ(add_two->InputAt(0)->InputAt(0), c5);
- EXPECT_INS_EQ(add_two->InputAt(0)->InputAt(1), c0);
- EXPECT_INS_EQ(add_two->InputAt(1), c4);
-
- HBasicBlock* materialize_one = set_one->GetSinglePredecessor();
- HBasicBlock* materialize_two = set_two_critical_break->GetSinglePredecessor();
- HNewInstance* materialization_ins_one =
- FindSingleInstruction<HNewInstance>(graph_, materialize_one);
- HNewInstance* materialization_ins_two =
- FindSingleInstruction<HNewInstance>(graph_, materialize_two);
- std::vector<HPhi*> phis;
- std::tie(phis) = FindAllInstructions<HPhi>(graph_, set_two_and_escape);
- HPhi* new_phi = FindOrNull(
- phis.begin(), phis.end(), [&](auto p) { return p->GetType() == DataType::Type::kReference; });
- ASSERT_NE(new_phi, nullptr);
- ASSERT_NE(materialization_ins_one, nullptr);
- ASSERT_NE(materialization_ins_two, nullptr);
- EXPECT_INS_EQ(materialization_ins_one, new_phi->InputAt(0));
- EXPECT_INS_EQ(materialization_ins_two, new_phi->InputAt(1));
-
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- EXPECT_TRUE(pred_get->GetTarget()->IsPhi());
- EXPECT_INS_EQ(pred_get->GetTarget()->InputAt(0), new_phi);
- EXPECT_INS_EQ(pred_get->GetTarget()->InputAt(1), graph_->GetNullConstant());
-
- EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(0), c0);
- EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(1), c10);
-}
-
-// // ENTRY
-// obj = new Obj();
-// if (parameter_value) {
-// // LEFT
-// // DO NOT ELIMINATE
-// obj.field = 1;
-// escape(obj);
-// return obj.field;
-// } else {
-// // RIGHT
-// // ELIMINATE
-// obj.field = 2;
-// return obj.field;
-// }
-// EXIT
-TEST_F(LoadStoreEliminationTest, PartialLoadElimination3) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList(
- "entry",
- "exit",
- {{"entry", "left"}, {"entry", "right"}, {"left", "exit"}, {"right", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(left);
- GET_BLOCK(right);
-#undef GET_BLOCK
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c1 = graph_->GetIntConstant(1);
- HInstruction* c2 = graph_->GetIntConstant(2);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_left = MakeIFieldSet(new_inst, c1, MemberOffset(32));
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* read_left = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_left = new (GetAllocator()) HReturn(read_left);
- left->AddInstruction(write_left);
- left->AddInstruction(call_left);
- left->AddInstruction(read_left);
- left->AddInstruction(return_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* read_right = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_right = new (GetAllocator()) HReturn(read_right);
- right->AddInstruction(write_right);
- right->AddInstruction(read_right);
- right->AddInstruction(return_right);
-
- SetupExit(exit);
-
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- PerformLSE();
-
- EXPECT_INS_REMOVED(read_right);
- EXPECT_INS_REMOVED(write_right);
- EXPECT_INS_RETAINED(write_left);
- EXPECT_INS_RETAINED(call_left);
- EXPECT_INS_RETAINED(read_left);
-}
-
-// // ENTRY
-// obj = new Obj();
-// if (parameter_value) {
-// // LEFT
-// // DO NOT ELIMINATE
-// obj.field = 1;
-// while (true) {
-// bool esc = escape(obj);
-// // DO NOT ELIMINATE
-// obj.field = 3;
-// if (esc) break;
-// }
-// // ELIMINATE.
-// return obj.field;
-// } else {
-// // RIGHT
-// // ELIMINATE
-// obj.field = 2;
-// return obj.field;
-// }
-// EXIT
-TEST_F(LoadStoreEliminationTest, PartialLoadElimination4) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "entry_post"},
- {"entry_post", "right"},
- {"right", "exit"},
- {"entry_post", "left_pre"},
- {"left_pre", "left_loop"},
- {"left_loop", "left_loop"},
- {"left_loop", "left_finish"},
- {"left_finish", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(entry_post);
- GET_BLOCK(exit);
- GET_BLOCK(left_pre);
- GET_BLOCK(left_loop);
- GET_BLOCK(left_finish);
- GET_BLOCK(right);
-#undef GET_BLOCK
- // Left-loops first successor is the break.
- if (left_loop->GetSuccessors()[0] != left_finish) {
- left_loop->SwapSuccessors();
- }
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c1 = graph_->GetIntConstant(1);
- HInstruction* c2 = graph_->GetIntConstant(2);
- HInstruction* c3 = graph_->GetIntConstant(3);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* goto_entry = new (GetAllocator()) HGoto();
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(goto_entry);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry_post->AddInstruction(if_inst);
-
- HInstruction* write_left_pre = MakeIFieldSet(new_inst, c1, MemberOffset(32));
- HInstruction* goto_left_pre = new (GetAllocator()) HGoto();
- left_pre->AddInstruction(write_left_pre);
- left_pre->AddInstruction(goto_left_pre);
-
- HInstruction* suspend_left_loop = new (GetAllocator()) HSuspendCheck();
- HInstruction* call_left_loop = MakeInvoke(DataType::Type::kBool, { new_inst });
- HInstruction* write_left_loop = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* if_left_loop = new (GetAllocator()) HIf(call_left_loop);
- left_loop->AddInstruction(suspend_left_loop);
- left_loop->AddInstruction(call_left_loop);
- left_loop->AddInstruction(write_left_loop);
- left_loop->AddInstruction(if_left_loop);
- suspend_left_loop->CopyEnvironmentFrom(cls->GetEnvironment());
- call_left_loop->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* read_left_end = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_left_end = new (GetAllocator()) HReturn(read_left_end);
- left_finish->AddInstruction(read_left_end);
- left_finish->AddInstruction(return_left_end);
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* read_right = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_right = new (GetAllocator()) HReturn(read_right);
- right->AddInstruction(write_right);
- right->AddInstruction(read_right);
- right->AddInstruction(return_right);
-
- SetupExit(exit);
-
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- PerformLSE();
-
- EXPECT_INS_RETAINED(write_left_pre);
- EXPECT_INS_REMOVED(read_right);
- EXPECT_INS_REMOVED(write_right);
- EXPECT_INS_RETAINED(write_left_loop);
- EXPECT_INS_RETAINED(call_left_loop);
- EXPECT_INS_REMOVED(read_left_end);
-}
-
-// // ENTRY
-// obj = new Obj();
-// if (parameter_value) {
-// // LEFT
-// // DO NOT ELIMINATE
-// escape(obj);
-// obj.field = 1;
-// } else {
-// // RIGHT
-// // obj hasn't escaped so it's invisible.
-// // ELIMINATE
-// obj.field = 2;
-// noescape();
-// }
-// EXIT
-// ELIMINATE
-// return obj.field
-TEST_F(LoadStoreEliminationTest, PartialLoadElimination5) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
-#undef GET_BLOCK
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c1 = graph_->GetIntConstant(1);
- HInstruction* c2 = graph_->GetIntConstant(2);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* write_left = MakeIFieldSet(new_inst, c1, MemberOffset(32));
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left);
- left->AddInstruction(write_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* call_right = MakeInvoke(DataType::Type::kVoid, {});
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(call_right);
- right->AddInstruction(goto_right);
- call_right->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- PerformLSE();
-
- EXPECT_INS_REMOVED(read_bottom);
- EXPECT_INS_REMOVED(write_right);
- EXPECT_INS_RETAINED(write_left);
- EXPECT_INS_RETAINED(call_left);
- EXPECT_INS_RETAINED(call_right);
-}
-
-// // ENTRY
-// obj = new Obj();
-// // Eliminate this one. Object hasn't escaped yet so it's safe.
-// obj.field = 3;
-// noescape();
-// if (parameter_value) {
-// // LEFT
-// // DO NOT ELIMINATE
-// obj.field = 5;
-// escape(obj);
-// obj.field = 1;
-// } else {
-// // RIGHT
-// // ELIMINATE
-// obj.field = 2;
-// }
-// EXIT
-// ELIMINATE
-// return obj.fid
-TEST_F(LoadStoreEliminationTest, PartialLoadElimination6) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
-#undef GET_BLOCK
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c1 = graph_->GetIntConstant(1);
- HInstruction* c2 = graph_->GetIntConstant(2);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c5 = graph_->GetIntConstant(5);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* call_entry = MakeInvoke(DataType::Type::kVoid, {});
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_entry);
- entry->AddInstruction(call_entry);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
- call_entry->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_left_start = MakeIFieldSet(new_inst, c5, MemberOffset(32));
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* write_left = MakeIFieldSet(new_inst, c1, MemberOffset(32));
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(write_left_start);
- left->AddInstruction(call_left);
- left->AddInstruction(write_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- PerformLSE();
-
- EXPECT_INS_REMOVED(read_bottom);
- EXPECT_INS_REMOVED(write_right);
- EXPECT_INS_REMOVED(write_entry);
- EXPECT_INS_RETAINED(write_left_start);
- EXPECT_INS_RETAINED(write_left);
- EXPECT_INS_RETAINED(call_left);
- EXPECT_INS_RETAINED(call_entry);
-}
-
-// // ENTRY
-// obj = new Obj();
-// if (parameter_value) {
-// // LEFT
-// // DO NOT ELIMINATE
// obj.field = 1;
// while (true) {
// bool esc = escape(obj);
@@ -4471,7 +2119,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved3) {
left_pre->AddInstruction(goto_left_pre);
HInstruction* suspend_left_loop = new (GetAllocator()) HSuspendCheck();
- HInstruction* call_left_loop = MakeInvoke(DataType::Type::kBool, { new_inst });
+ HInstruction* call_left_loop = MakeInvoke(DataType::Type::kBool, {new_inst});
HInstruction* if_left_loop = new (GetAllocator()) HIf(call_left_loop);
left_loop->AddInstruction(suspend_left_loop);
left_loop->AddInstruction(call_left_loop);
@@ -4496,7 +2144,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved3) {
SetupExit(exit);
- PerformLSENoPartial(blks);
+ PerformLSE(blks);
EXPECT_INS_RETAINED(write_left_pre) << *write_left_pre;
EXPECT_INS_RETAINED(read_return) << *read_return;
@@ -4588,7 +2236,7 @@ TEST_F(LoadStoreEliminationTest, DISABLED_PartialLoadPreserved4) {
call_left_loop->CopyEnvironmentFrom(cls->GetEnvironment());
HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* call_right = MakeInvoke(DataType::Type::kBool, { new_inst });
+ HInstruction* call_right = MakeInvoke(DataType::Type::kBool, {new_inst});
HInstruction* goto_right = new (GetAllocator()) HGoto();
right->AddInstruction(write_right);
right->AddInstruction(call_right);
@@ -4602,7 +2250,7 @@ TEST_F(LoadStoreEliminationTest, DISABLED_PartialLoadPreserved4) {
SetupExit(exit);
- PerformLSENoPartial(blks);
+ PerformLSE(blks);
EXPECT_INS_RETAINED(read_return);
EXPECT_INS_RETAINED(write_right);
@@ -4688,7 +2336,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved5) {
SetupExit(exit);
- PerformLSENoPartial(blks);
+ PerformLSE(blks);
EXPECT_INS_RETAINED(read_bottom);
EXPECT_INS_RETAINED(write_right);
@@ -4771,7 +2419,7 @@ TEST_F(LoadStoreEliminationTest, DISABLED_PartialLoadPreserved6) {
SetupExit(exit);
- PerformLSENoPartial(blks);
+ PerformLSE(blks);
EXPECT_INS_REMOVED(read_bottom);
EXPECT_INS_REMOVED(write_right);
@@ -4780,3894 +2428,4 @@ TEST_F(LoadStoreEliminationTest, DISABLED_PartialLoadPreserved6) {
EXPECT_INS_RETAINED(call_left);
EXPECT_INS_RETAINED(call_entry);
}
-
-// // ENTRY
-// // MOVED TO MATERIALIZATION BLOCK
-// obj = new Obj();
-// ELIMINATE, moved to materialization block. Kept by escape.
-// obj.field = 3;
-// // Make sure this graph isn't broken
-// if (obj ==/!= (STATIC.VALUE|obj|null)) {
-// // partial_BLOCK
-// // REMOVE (either from unreachable or normal PHI creation)
-// obj.field = 4;
-// }
-// if (parameter_value) {
-// // LEFT
-// // DO NOT ELIMINATE
-// escape(obj);
-// } else {
-// // RIGHT
-// // ELIMINATE
-// obj.field = 2;
-// }
-// EXIT
-// PREDICATED GET
-// return obj.field
-TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "first_block"},
- {"first_block", "critical_break"},
- {"first_block", "partial"},
- {"partial", "merge"},
- {"critical_break", "merge"},
- {"merge", "left"},
- {"merge", "right"},
- {"left", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(first_block);
- GET_BLOCK(merge);
- GET_BLOCK(partial);
- GET_BLOCK(critical_break);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
-#undef GET_BLOCK
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c2 = graph_->GetIntConstant(2);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c4 = graph_->GetIntConstant(4);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- ComparisonInstructions cmp_instructions = GetComparisonInstructions(new_inst);
- HInstruction* if_inst = new (GetAllocator()) HIf(cmp_instructions.cmp_);
- first_block->AddInstruction(cls);
- first_block->AddInstruction(new_inst);
- first_block->AddInstruction(write_entry);
- cmp_instructions.AddSetup(first_block);
- first_block->AddInstruction(cmp_instructions.cmp_);
- first_block->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- cmp_instructions.AddEnvironment(cls->GetEnvironment());
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_partial = MakeIFieldSet(new_inst, c4, MemberOffset(32));
- HInstruction* goto_partial = new (GetAllocator()) HGoto();
- partial->AddInstruction(write_partial);
- partial->AddInstruction(goto_partial);
-
- HInstruction* goto_crit_break = new (GetAllocator()) HGoto();
- critical_break->AddInstruction(goto_crit_break);
-
- HInstruction* if_merge = new (GetAllocator()) HIf(bool_value);
- merge->AddInstruction(if_merge);
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- std::vector<HPhi*> merges;
- HPredicatedInstanceFieldGet* pred_get;
- HInstanceFieldSet* init_set;
- std::tie(pred_get, init_set) =
- FindSingleInstructions<HPredicatedInstanceFieldGet, HInstanceFieldSet>(graph_);
- std::tie(merges) = FindAllInstructions<HPhi>(graph_);
- ASSERT_EQ(merges.size(), 3u);
- HPhi* merge_value_return = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kInt32 && p->GetBlock() == breturn;
- });
- HPhi* merge_value_top = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kInt32 && p->GetBlock() != breturn;
- });
- HPhi* merge_alloc = FindOrNull(merges.begin(), merges.end(), [](HPhi* p) {
- return p->GetType() == DataType::Type::kReference;
- });
- EXPECT_INS_REMOVED(read_bottom);
- EXPECT_INS_REMOVED(write_entry);
- EXPECT_INS_REMOVED(write_partial);
- EXPECT_INS_RETAINED(call_left);
- CheckFinalInstruction(if_inst->InputAt(0), ComparisonPlacement::kBeforeEscape);
- EXPECT_INS_EQ(init_set->InputAt(1), merge_value_top);
- EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc);
- EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return);
-}
-
-// // ENTRY
-// // MOVED TO MATERIALIZATION BLOCK
-// obj = new Obj();
-// ELIMINATE, moved to materialization block. Kept by escape.
-// obj.field = 3;
-// // Make sure this graph isn't broken
-// if (parameter_value) {
-// if (obj ==/!= (STATIC.VALUE|obj|null)) {
-// // partial_BLOCK
-// obj.field = 4;
-// }
-// // LEFT
-// // DO NOT ELIMINATE
-// escape(obj);
-// } else {
-// // RIGHT
-// // ELIMINATE
-// obj.field = 2;
-// }
-// EXIT
-// PREDICATED GET
-// return obj.field
-TEST_P(PartialComparisonTestGroup, PartialComparisonInCohortBeforeEscape) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left_begin"},
- {"left_begin", "partial"},
- {"left_begin", "left_crit_break"},
- {"left_crit_break", "left"},
- {"partial", "left"},
- {"entry", "right"},
- {"left", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(partial);
- GET_BLOCK(left_begin);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(left_crit_break);
- GET_BLOCK(right);
-#undef GET_BLOCK
- EnsurePredecessorOrder(left, {left_crit_break, partial});
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c2 = graph_->GetIntConstant(2);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c4 = graph_->GetIntConstant(4);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_entry);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- ComparisonInstructions cmp_instructions = GetComparisonInstructions(new_inst);
- HInstruction* if_left_begin = new (GetAllocator()) HIf(cmp_instructions.cmp_);
- cmp_instructions.AddSetup(left_begin);
- left_begin->AddInstruction(cmp_instructions.cmp_);
- left_begin->AddInstruction(if_left_begin);
- cmp_instructions.AddEnvironment(cls->GetEnvironment());
-
- left_crit_break->AddInstruction(new (GetAllocator()) HGoto());
-
- HInstruction* write_partial = MakeIFieldSet(new_inst, c4, MemberOffset(32));
- HInstruction* goto_partial = new (GetAllocator()) HGoto();
- partial->AddInstruction(write_partial);
- partial->AddInstruction(goto_partial);
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- std::vector<HPhi*> merges;
- HInstanceFieldSet* init_set =
- FindSingleInstruction<HInstanceFieldSet>(graph_, left_begin->GetSinglePredecessor());
- HInstanceFieldSet* partial_set = FindSingleInstruction<HInstanceFieldSet>(graph_, partial);
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_);
- std::tie(merges) = FindAllInstructions<HPhi>(graph_);
- ASSERT_EQ(merges.size(), 2u);
- HPhi* merge_value_return = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kInt32;
- });
- HPhi* merge_alloc = FindOrNull(merges.begin(), merges.end(), [](HPhi* p) {
- return p->GetType() == DataType::Type::kReference;
- });
- EXPECT_EQ(merge_value_return->GetBlock(), breturn)
- << blks.GetName(merge_value_return->GetBlock());
- EXPECT_INS_REMOVED(read_bottom);
- EXPECT_INS_REMOVED(write_entry);
- EXPECT_INS_RETAINED(write_partial);
- EXPECT_INS_RETAINED(call_left);
- CheckFinalInstruction(if_left_begin->InputAt(0), ComparisonPlacement::kInEscape);
- EXPECT_INS_EQ(init_set->InputAt(1), c3);
- EXPECT_INS_EQ(partial_set->InputAt(0), init_set->InputAt(0));
- EXPECT_INS_EQ(partial_set->InputAt(1), c4);
- EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc);
- EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return);
-}
-
-// // ENTRY
-// // MOVED TO MATERIALIZATION BLOCK
-// obj = new Obj();
-// ELIMINATE, moved to materialization block. Kept by escape.
-// obj.field = 3;
-// // Make sure this graph isn't broken
-// if (parameter_value) {
-// // LEFT
-// // DO NOT ELIMINATE
-// escape(obj);
-// } else {
-// // RIGHT
-// // ELIMINATE
-// obj.field = 2;
-// }
-// if (obj ==/!= (STATIC.VALUE|obj|null)) {
-// // partial_BLOCK
-// obj.field = 4;
-// }
-// EXIT
-// PREDICATED GET
-// return obj.field
-TEST_P(PartialComparisonTestGroup, PartialComparisonAfterCohort) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "merge"},
- {"right", "merge"},
- {"merge", "critical_break"},
- {"critical_break", "breturn"},
- {"merge", "partial"},
- {"partial", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(partial);
- GET_BLOCK(critical_break);
- GET_BLOCK(merge);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {critical_break, partial});
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c2 = graph_->GetIntConstant(2);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c4 = graph_->GetIntConstant(4);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_entry);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- ComparisonInstructions cmp_instructions = GetComparisonInstructions(new_inst);
- HInstruction* if_merge = new (GetAllocator()) HIf(cmp_instructions.cmp_);
- cmp_instructions.AddSetup(merge);
- merge->AddInstruction(cmp_instructions.cmp_);
- merge->AddInstruction(if_merge);
- cmp_instructions.AddEnvironment(cls->GetEnvironment());
-
- HInstanceFieldSet* write_partial = MakeIFieldSet(new_inst, c4, MemberOffset(32));
- HInstruction* goto_partial = new (GetAllocator()) HGoto();
- partial->AddInstruction(write_partial);
- partial->AddInstruction(goto_partial);
-
- HInstruction* goto_crit_break = new (GetAllocator()) HGoto();
- critical_break->AddInstruction(goto_crit_break);
-
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- std::vector<HPhi*> merges;
- HInstanceFieldSet* init_set =
- FindSingleInstruction<HInstanceFieldSet>(graph_, left->GetSinglePredecessor());
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_);
- std::tie(merges) = FindAllInstructions<HPhi>(graph_);
- ASSERT_EQ(merges.size(), 3u);
- HPhi* merge_value_return = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kInt32 && p->GetBlock() == breturn;
- });
- HPhi* merge_alloc = FindOrNull(merges.begin(), merges.end(), [](HPhi* p) {
- return p->GetType() == DataType::Type::kReference;
- });
- EXPECT_INS_REMOVED(read_bottom);
- EXPECT_INS_REMOVED(write_entry);
- EXPECT_INS_RETAINED(write_partial);
- EXPECT_TRUE(write_partial->GetIsPredicatedSet());
- EXPECT_INS_RETAINED(call_left);
- CheckFinalInstruction(if_merge->InputAt(0), ComparisonPlacement::kAfterEscape);
- EXPECT_INS_EQ(init_set->InputAt(1), c3);
- ASSERT_TRUE(write_partial->InputAt(0)->IsPhi());
- EXPECT_INS_EQ(write_partial->InputAt(0)->AsPhi()->InputAt(0), init_set->InputAt(0));
- EXPECT_INS_EQ(write_partial->InputAt(1), c4);
- EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc);
- EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return);
-}
-
-// // ENTRY
-// // MOVED TO MATERIALIZATION BLOCK
-// obj = new Obj();
-// ELIMINATE, moved to materialization block. Kept by escape.
-// obj.field = 3;
-// // Make sure this graph isn't broken
-// if (parameter_value) {
-// // LEFT
-// // DO NOT ELIMINATE
-// escape(obj);
-// if (obj ==/!= (STATIC.VALUE|obj|null)) {
-// // partial_BLOCK
-// obj.field = 4;
-// }
-// } else {
-// // RIGHT
-// // ELIMINATE
-// obj.field = 2;
-// }
-// EXIT
-// PREDICATED GET
-// return obj.field
-TEST_P(PartialComparisonTestGroup, PartialComparisonInCohortAfterEscape) {
- PartialComparisonKind kind = GetParam();
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"left", "partial"},
- {"partial", "left_end"},
- {"left", "left_crit_break"},
- {"left_crit_break", "left_end"},
- {"left_end", "breturn"},
- {"entry", "right"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(partial);
- GET_BLOCK(left_end);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(left_crit_break);
- GET_BLOCK(right);
-#undef GET_BLOCK
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c2 = graph_->GetIntConstant(2);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c4 = graph_->GetIntConstant(4);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_entry);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- ComparisonInstructions cmp_instructions = GetComparisonInstructions(new_inst);
- HInstruction* if_left = new (GetAllocator()) HIf(cmp_instructions.cmp_);
- left->AddInstruction(call_left);
- cmp_instructions.AddSetup(left);
- left->AddInstruction(cmp_instructions.cmp_);
- left->AddInstruction(if_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
- cmp_instructions.AddEnvironment(cls->GetEnvironment());
- if (if_left->AsIf()->IfTrueSuccessor() != partial) {
- left->SwapSuccessors();
- }
-
- HInstruction* write_partial = MakeIFieldSet(new_inst, c4, MemberOffset(32));
- HInstruction* goto_partial = new (GetAllocator()) HGoto();
- partial->AddInstruction(write_partial);
- partial->AddInstruction(goto_partial);
-
- HInstruction* goto_left_crit_break = new (GetAllocator()) HGoto();
- left_crit_break->AddInstruction(goto_left_crit_break);
-
- HInstruction* goto_left_end = new (GetAllocator()) HGoto();
- left_end->AddInstruction(goto_left_end);
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- std::vector<HPhi*> merges;
- std::vector<HInstanceFieldSet*> sets;
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_);
- std::tie(merges, sets) = FindAllInstructions<HPhi, HInstanceFieldSet>(graph_);
- ASSERT_EQ(merges.size(), 2u);
- ASSERT_EQ(sets.size(), 2u);
- HInstanceFieldSet* init_set = FindOrNull(sets.begin(), sets.end(), [&](HInstanceFieldSet* s) {
- return s->GetBlock()->GetSingleSuccessor() == left;
- });
- EXPECT_INS_EQ(init_set->InputAt(1), c3);
- HPhi* merge_value_return = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kInt32 && p->GetBlock() == breturn;
- });
- HPhi* merge_alloc = FindOrNull(merges.begin(), merges.end(), [](HPhi* p) {
- return p->GetType() == DataType::Type::kReference;
- });
- EXPECT_INS_REMOVED(read_bottom);
- EXPECT_INS_REMOVED(write_entry);
- if (kind.IsPossiblyTrue()) {
- EXPECT_INS_RETAINED(write_partial);
- EXPECT_TRUE(std::find(sets.begin(), sets.end(), write_partial) != sets.end());
- }
- EXPECT_INS_RETAINED(call_left);
- CheckFinalInstruction(if_left->InputAt(0), ComparisonPlacement::kInEscape);
- EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc);
- EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return);
-}
-
-INSTANTIATE_TEST_SUITE_P(
- LoadStoreEliminationTest,
- PartialComparisonTestGroup,
- testing::Values(PartialComparisonKind{PartialComparisonKind::Type::kEquals,
- PartialComparisonKind::Target::kNull,
- PartialComparisonKind::Position::kLeft},
- PartialComparisonKind{PartialComparisonKind::Type::kEquals,
- PartialComparisonKind::Target::kNull,
- PartialComparisonKind::Position::kRight},
- PartialComparisonKind{PartialComparisonKind::Type::kEquals,
- PartialComparisonKind::Target::kValue,
- PartialComparisonKind::Position::kLeft},
- PartialComparisonKind{PartialComparisonKind::Type::kEquals,
- PartialComparisonKind::Target::kValue,
- PartialComparisonKind::Position::kRight},
- PartialComparisonKind{PartialComparisonKind::Type::kEquals,
- PartialComparisonKind::Target::kSelf,
- PartialComparisonKind::Position::kLeft},
- PartialComparisonKind{PartialComparisonKind::Type::kNotEquals,
- PartialComparisonKind::Target::kNull,
- PartialComparisonKind::Position::kLeft},
- PartialComparisonKind{PartialComparisonKind::Type::kNotEquals,
- PartialComparisonKind::Target::kNull,
- PartialComparisonKind::Position::kRight},
- PartialComparisonKind{PartialComparisonKind::Type::kNotEquals,
- PartialComparisonKind::Target::kSelf,
- PartialComparisonKind::Position::kLeft},
- PartialComparisonKind{PartialComparisonKind::Type::kNotEquals,
- PartialComparisonKind::Target::kValue,
- PartialComparisonKind::Position::kLeft},
- PartialComparisonKind{PartialComparisonKind::Type::kNotEquals,
- PartialComparisonKind::Target::kValue,
- PartialComparisonKind::Position::kRight}));
-
-// // ENTRY
-// obj = new Obj();
-// if (parameter_value) {
-// // LEFT
-// escape(obj);
-// } else {
-// // RIGHT
-// // ELIMINATE
-// obj.field = 2;
-// }
-// EXIT
-// predicated-ELIMINATE
-// obj.field = 3;
-TEST_F(LoadStoreEliminationTest, PredicatedStore1) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- InitGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {left, right});
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* null_const = graph_->GetNullConstant();
- HInstruction* c2 = graph_->GetIntConstant(2);
- HInstruction* c3 = graph_->GetIntConstant(3);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* write_bottom = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturnVoid();
- breturn->AddInstruction(write_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_RETAINED(write_bottom);
- EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet());
- EXPECT_INS_REMOVED(write_right);
- EXPECT_INS_RETAINED(call_left);
- HPhi* merge_alloc = FindSingleInstruction<HPhi>(graph_, breturn);
- ASSERT_NE(merge_alloc, nullptr);
- EXPECT_TRUE(merge_alloc->InputAt(0)->IsNewInstance()) << *merge_alloc;
- EXPECT_EQ(merge_alloc->InputAt(0)->InputAt(0), cls) << *merge_alloc << " cls? " << *cls;
- EXPECT_EQ(merge_alloc->InputAt(1), null_const);
-}
-
-// // ENTRY
-// obj = new Obj();
-// obj.field = 3;
-// if (parameter_value) {
-// // LEFT
-// escape(obj);
-// } else {
-// // RIGHT
-// // ELIMINATE
-// obj.field = 2;
-// }
-// // MERGE
-// if (second_param) {
-// // NON_ESCAPE
-// obj.field = 1;
-// noescape();
-// }
-// EXIT
-// predicated-ELIMINATE
-// obj.field = 4;
-TEST_F(LoadStoreEliminationTest, PredicatedStore2) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "merge"},
- {"right", "merge"},
- {"merge", "non_escape"},
- {"non_escape", "breturn"},
- {"merge", "merge_crit_break"},
- {"merge_crit_break", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
- GET_BLOCK(merge);
- GET_BLOCK(merge_crit_break);
- GET_BLOCK(non_escape);
-#undef GET_BLOCK
- EnsurePredecessorOrder(merge, {left, right});
- EnsurePredecessorOrder(breturn, {merge_crit_break, non_escape});
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* bool_value2 = MakeParam(DataType::Type::kBool);
- HInstruction* null_const = graph_->GetNullConstant();
- HInstruction* c1 = graph_->GetIntConstant(3);
- HInstruction* c2 = graph_->GetIntConstant(2);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c4 = graph_->GetIntConstant(4);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_entry);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* merge_if = new (GetAllocator()) HIf(bool_value2);
- merge->AddInstruction(merge_if);
-
- merge_crit_break->AddInstruction(new (GetAllocator()) HGoto());
-
- HInstruction* write_non_escape = MakeIFieldSet(new_inst, c1, MemberOffset(32));
- HInstruction* non_escape_call = MakeInvoke(DataType::Type::kVoid, {});
- HInstruction* non_escape_goto = new (GetAllocator()) HGoto();
- non_escape->AddInstruction(write_non_escape);
- non_escape->AddInstruction(non_escape_call);
- non_escape->AddInstruction(non_escape_goto);
- non_escape_call->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_bottom = MakeIFieldSet(new_inst, c4, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturnVoid();
- breturn->AddInstruction(write_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_RETAINED(write_bottom);
- EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_bottom;
- EXPECT_INS_REMOVED(write_right);
- EXPECT_INS_RETAINED(call_left);
- HInstanceFieldSet* pred_set = FindSingleInstruction<HInstanceFieldSet>(graph_, breturn);
- HPhi* merge_alloc = FindSingleInstruction<HPhi>(graph_);
- ASSERT_NE(merge_alloc, nullptr);
- EXPECT_TRUE(merge_alloc->InputAt(0)->IsNewInstance()) << *merge_alloc;
- EXPECT_INS_EQ(merge_alloc->InputAt(0)->InputAt(0), cls) << " phi is: " << *merge_alloc;
- EXPECT_INS_EQ(merge_alloc->InputAt(1), null_const);
- ASSERT_NE(pred_set, nullptr);
- EXPECT_TRUE(pred_set->GetIsPredicatedSet()) << *pred_set;
- EXPECT_INS_EQ(pred_set->InputAt(0), merge_alloc);
-}
-
-// // ENTRY
-// obj = new Obj();
-// obj.field = 3;
-// if (parameter_value) {
-// // LEFT
-// escape(obj);
-// } else {
-// // RIGHT
-// // ELIMINATE
-// obj.field = 2;
-// }
-// EXIT
-// predicated-ELIMINATE
-// return obj.field
-TEST_F(LoadStoreEliminationTest, PredicatedLoad1) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {left, right});
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* null_const = graph_->GetNullConstant();
- HInstruction* c2 = graph_->GetIntConstant(2);
- HInstruction* c3 = graph_->GetIntConstant(3);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_entry);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_REMOVED(read_bottom);
- EXPECT_INS_REMOVED(write_right);
- EXPECT_INS_RETAINED(call_left);
- std::vector<HPhi*> merges;
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- std::tie(merges) = FindAllInstructions<HPhi>(graph_, breturn);
- ASSERT_EQ(merges.size(), 2u);
- HPhi* merge_value_return = FindOrNull(
- merges.begin(), merges.end(), [](HPhi* p) { return p->GetType() == DataType::Type::kInt32; });
- HPhi* merge_alloc = FindOrNull(merges.begin(), merges.end(), [](HPhi* p) {
- return p->GetType() == DataType::Type::kReference;
- });
- ASSERT_NE(merge_alloc, nullptr);
- EXPECT_TRUE(merge_alloc->InputAt(0)->IsNewInstance()) << *merge_alloc;
- EXPECT_EQ(merge_alloc->InputAt(0)->InputAt(0), cls) << *merge_alloc << " cls? " << *cls;
- EXPECT_EQ(merge_alloc->InputAt(1), null_const);
- ASSERT_NE(pred_get, nullptr);
- EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc);
- EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return) << " pred-get is: " << *pred_get;
- EXPECT_INS_EQ(merge_value_return->InputAt(0), graph_->GetIntConstant(0))
- << " merge val is: " << *merge_value_return;
- EXPECT_INS_EQ(merge_value_return->InputAt(1), c2) << " merge val is: " << *merge_value_return;
-}
-
-// // ENTRY
-// obj1 = new Obj1();
-// obj2 = new Obj2();
-// obj1.field = 3;
-// obj2.field = 13;
-// if (parameter_value) {
-// // LEFT
-// escape(obj1);
-// escape(obj2);
-// } else {
-// // RIGHT
-// // ELIMINATE
-// obj1.field = 2;
-// obj2.field = 12;
-// }
-// EXIT
-// predicated-ELIMINATE
-// return obj1.field + obj2.field
-TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad1) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {left, right});
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c2 = graph_->GetIntConstant(2);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c12 = graph_->GetIntConstant(12);
- HInstruction* c13 = graph_->GetIntConstant(13);
-
- HInstruction* cls1 = MakeClassLoad();
- HInstruction* cls2 = MakeClassLoad();
- HInstruction* new_inst1 = MakeNewInstance(cls1);
- HInstruction* new_inst2 = MakeNewInstance(cls2);
- HInstruction* write_entry1 = MakeIFieldSet(new_inst1, c3, MemberOffset(32));
- HInstruction* write_entry2 = MakeIFieldSet(new_inst2, c13, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls1);
- entry->AddInstruction(cls2);
- entry->AddInstruction(new_inst1);
- entry->AddInstruction(new_inst2);
- entry->AddInstruction(write_entry1);
- entry->AddInstruction(write_entry2);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls1, {});
- cls2->CopyEnvironmentFrom(cls1->GetEnvironment());
- new_inst1->CopyEnvironmentFrom(cls1->GetEnvironment());
- new_inst2->CopyEnvironmentFrom(cls1->GetEnvironment());
-
- HInstruction* call_left1 = MakeInvoke(DataType::Type::kVoid, { new_inst1 });
- HInstruction* call_left2 = MakeInvoke(DataType::Type::kVoid, { new_inst2 });
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left1);
- left->AddInstruction(call_left2);
- left->AddInstruction(goto_left);
- call_left1->CopyEnvironmentFrom(cls1->GetEnvironment());
- call_left2->CopyEnvironmentFrom(cls1->GetEnvironment());
-
- HInstruction* write_right1 = MakeIFieldSet(new_inst1, c2, MemberOffset(32));
- HInstruction* write_right2 = MakeIFieldSet(new_inst2, c12, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right1);
- right->AddInstruction(write_right2);
- right->AddInstruction(goto_right);
-
- HInstruction* read_bottom1 = MakeIFieldGet(new_inst1, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* read_bottom2 = MakeIFieldGet(new_inst2, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* combine =
- new (GetAllocator()) HAdd(DataType::Type::kInt32, read_bottom1, read_bottom2);
- HInstruction* return_exit = new (GetAllocator()) HReturn(combine);
- breturn->AddInstruction(read_bottom1);
- breturn->AddInstruction(read_bottom2);
- breturn->AddInstruction(combine);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_REMOVED(read_bottom1);
- EXPECT_INS_REMOVED(read_bottom2);
- EXPECT_INS_REMOVED(write_right1);
- EXPECT_INS_REMOVED(write_right2);
- EXPECT_INS_RETAINED(call_left1);
- EXPECT_INS_RETAINED(call_left2);
- std::vector<HPhi*> merges;
- std::vector<HPredicatedInstanceFieldGet*> pred_gets;
- std::tie(merges, pred_gets) =
- FindAllInstructions<HPhi, HPredicatedInstanceFieldGet>(graph_, breturn);
- ASSERT_EQ(merges.size(), 4u);
- ASSERT_EQ(pred_gets.size(), 2u);
- HPhi* merge_value_return1 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kInt32 && p->InputAt(1) == c2;
- });
- HPhi* merge_value_return2 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kInt32 && p->InputAt(1) == c12;
- });
- HPhi* merge_alloc1 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kReference &&
- p->InputAt(0)->IsNewInstance() &&
- p->InputAt(0)->InputAt(0) == cls1;
- });
- HPhi* merge_alloc2 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kReference &&
- p->InputAt(0)->IsNewInstance() &&
- p->InputAt(0)->InputAt(0) == cls2;
- });
- ASSERT_NE(merge_alloc1, nullptr);
- ASSERT_NE(merge_alloc2, nullptr);
- EXPECT_EQ(merge_alloc1->InputAt(1), graph_->GetNullConstant());
- EXPECT_EQ(merge_alloc2->InputAt(1), graph_->GetNullConstant());
- HPredicatedInstanceFieldGet* pred_get1 =
- FindOrNull(pred_gets.begin(), pred_gets.end(), [&](HPredicatedInstanceFieldGet* pg) {
- return pg->GetTarget() == merge_alloc1;
- });
- HPredicatedInstanceFieldGet* pred_get2 =
- FindOrNull(pred_gets.begin(), pred_gets.end(), [&](HPredicatedInstanceFieldGet* pg) {
- return pg->GetTarget() == merge_alloc2;
- });
- ASSERT_NE(pred_get1, nullptr);
- EXPECT_INS_EQ(pred_get1->GetTarget(), merge_alloc1);
- EXPECT_INS_EQ(pred_get1->GetDefaultValue(), merge_value_return1)
- << " pred-get is: " << *pred_get1;
- EXPECT_INS_EQ(merge_value_return1->InputAt(0), graph_->GetIntConstant(0))
- << " merge val is: " << *merge_value_return1;
- EXPECT_INS_EQ(merge_value_return1->InputAt(1), c2) << " merge val is: " << *merge_value_return1;
- ASSERT_NE(pred_get2, nullptr);
- EXPECT_INS_EQ(pred_get2->GetTarget(), merge_alloc2);
- EXPECT_INS_EQ(pred_get2->GetDefaultValue(), merge_value_return2)
- << " pred-get is: " << *pred_get2;
- EXPECT_INS_EQ(merge_value_return2->InputAt(0), graph_->GetIntConstant(0))
- << " merge val is: " << *merge_value_return1;
- EXPECT_INS_EQ(merge_value_return2->InputAt(1), c12) << " merge val is: " << *merge_value_return1;
-}
-
-// // ENTRY
-// obj1 = new Obj1();
-// obj2 = new Obj2();
-// obj1.field = 3;
-// obj2.field = 13;
-// if (parameter_value) {
-// // LEFT
-// escape(obj1);
-// // ELIMINATE
-// obj2.field = 12;
-// } else {
-// // RIGHT
-// // ELIMINATE
-// obj1.field = 2;
-// escape(obj2);
-// }
-// EXIT
-// predicated-ELIMINATE
-// return obj1.field + obj2.field
-TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad2) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {left, right});
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c2 = graph_->GetIntConstant(2);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c12 = graph_->GetIntConstant(12);
- HInstruction* c13 = graph_->GetIntConstant(13);
-
- HInstruction* cls1 = MakeClassLoad();
- HInstruction* cls2 = MakeClassLoad();
- HInstruction* new_inst1 = MakeNewInstance(cls1);
- HInstruction* new_inst2 = MakeNewInstance(cls2);
- HInstruction* write_entry1 = MakeIFieldSet(new_inst1, c3, MemberOffset(32));
- HInstruction* write_entry2 = MakeIFieldSet(new_inst2, c13, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls1);
- entry->AddInstruction(cls2);
- entry->AddInstruction(new_inst1);
- entry->AddInstruction(new_inst2);
- entry->AddInstruction(write_entry1);
- entry->AddInstruction(write_entry2);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls1, {});
- cls2->CopyEnvironmentFrom(cls1->GetEnvironment());
- new_inst1->CopyEnvironmentFrom(cls1->GetEnvironment());
- new_inst2->CopyEnvironmentFrom(cls1->GetEnvironment());
-
- HInstruction* call_left1 = MakeInvoke(DataType::Type::kVoid, { new_inst1 });
- HInstruction* write_left2 = MakeIFieldSet(new_inst2, c12, MemberOffset(32));
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left1);
- left->AddInstruction(write_left2);
- left->AddInstruction(goto_left);
- call_left1->CopyEnvironmentFrom(cls1->GetEnvironment());
-
- HInstruction* write_right1 = MakeIFieldSet(new_inst1, c2, MemberOffset(32));
- HInstruction* call_right2 = MakeInvoke(DataType::Type::kVoid, { new_inst2 });
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right1);
- right->AddInstruction(call_right2);
- right->AddInstruction(goto_right);
- call_right2->CopyEnvironmentFrom(cls1->GetEnvironment());
-
- HInstruction* read_bottom1 = MakeIFieldGet(new_inst1, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* read_bottom2 = MakeIFieldGet(new_inst2, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* combine =
- new (GetAllocator()) HAdd(DataType::Type::kInt32, read_bottom1, read_bottom2);
- HInstruction* return_exit = new (GetAllocator()) HReturn(combine);
- breturn->AddInstruction(read_bottom1);
- breturn->AddInstruction(read_bottom2);
- breturn->AddInstruction(combine);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_REMOVED(read_bottom1);
- EXPECT_INS_REMOVED(read_bottom2);
- EXPECT_INS_REMOVED(write_right1);
- EXPECT_INS_REMOVED(write_left2);
- EXPECT_INS_RETAINED(call_left1);
- EXPECT_INS_RETAINED(call_right2);
- std::vector<HPhi*> merges;
- std::vector<HPredicatedInstanceFieldGet*> pred_gets;
- std::tie(merges, pred_gets) =
- FindAllInstructions<HPhi, HPredicatedInstanceFieldGet>(graph_, breturn);
- ASSERT_EQ(merges.size(), 4u);
- ASSERT_EQ(pred_gets.size(), 2u);
- HPhi* merge_value_return1 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kInt32 && p->InputAt(1) == c2;
- });
- HPhi* merge_value_return2 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kInt32 && p->InputAt(0) == c12;
- });
- HPhi* merge_alloc1 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kReference && p->InputAt(1)->IsNullConstant();
- });
- HPhi* merge_alloc2 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kReference && p->InputAt(0)->IsNullConstant();
- });
- ASSERT_NE(merge_alloc1, nullptr);
- ASSERT_NE(merge_alloc2, nullptr);
- EXPECT_TRUE(merge_alloc1->InputAt(0)->IsNewInstance()) << *merge_alloc1;
- EXPECT_INS_EQ(merge_alloc1->InputAt(0)->InputAt(0), cls1) << *merge_alloc1;
- EXPECT_INS_EQ(merge_alloc1->InputAt(1), graph_->GetNullConstant());
- EXPECT_TRUE(merge_alloc2->InputAt(1)->IsNewInstance()) << *merge_alloc2;
- EXPECT_INS_EQ(merge_alloc2->InputAt(1)->InputAt(0), cls2) << *merge_alloc2;
- EXPECT_INS_EQ(merge_alloc2->InputAt(0), graph_->GetNullConstant());
- HPredicatedInstanceFieldGet* pred_get1 =
- FindOrNull(pred_gets.begin(), pred_gets.end(), [&](HPredicatedInstanceFieldGet* pg) {
- return pg->GetTarget() == merge_alloc1;
- });
- HPredicatedInstanceFieldGet* pred_get2 =
- FindOrNull(pred_gets.begin(), pred_gets.end(), [&](HPredicatedInstanceFieldGet* pg) {
- return pg->GetTarget() == merge_alloc2;
- });
- ASSERT_NE(pred_get1, nullptr);
- EXPECT_INS_EQ(pred_get1->GetTarget(), merge_alloc1);
- EXPECT_INS_EQ(pred_get1->GetDefaultValue(), merge_value_return1)
- << " pred-get is: " << *pred_get1;
- EXPECT_INS_EQ(merge_value_return1->InputAt(0), graph_->GetIntConstant(0))
- << " merge val is: " << *merge_value_return1;
- EXPECT_INS_EQ(merge_value_return1->InputAt(1), c2) << " merge val is: " << *merge_value_return1;
- ASSERT_NE(pred_get2, nullptr);
- EXPECT_INS_EQ(pred_get2->GetTarget(), merge_alloc2);
- EXPECT_INS_EQ(pred_get2->GetDefaultValue(), merge_value_return2)
- << " pred-get is: " << *pred_get2;
- EXPECT_INS_EQ(merge_value_return2->InputAt(1), graph_->GetIntConstant(0))
- << " merge val is: " << *merge_value_return1;
- EXPECT_INS_EQ(merge_value_return2->InputAt(0), c12) << " merge val is: " << *merge_value_return1;
-}
-
-// Based on structure seen in `java.util.List
-// java.util.Collections.checkedList(java.util.List, java.lang.Class)`
-// Incorrect accounting would cause attempts to materialize both obj1 and obj2
-// in each of the materialization blocks.
-// // ENTRY
-// Obj obj;
-// if (param1) {
-// // needs to be moved after param2 check
-// obj1 = new Obj1();
-// obj1.foo = 33;
-// if (param2) {
-// return obj1.foo;
-// }
-// obj = obj1;
-// } else {
-// obj2 = new Obj2();
-// obj2.foo = 44;
-// if (param2) {
-// return obj2.foo;
-// }
-// obj = obj2;
-// }
-// EXIT
-// // obj = PHI[obj1, obj2]
-// // NB The phi acts as an escape for both obj1 and obj2 meaning as far as the
-// // LSA is concerned the escape frontier is left_crit_break->breturn and
-// // right_crit_break->breturn for both even though only one of the objects is
-// // actually live at each edge.
-// // TODO In the future we really should track liveness through PHIs which would
-// // allow us to entirely remove the allocation in this test.
-// return obj.foo;
-TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad3) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"left", "left_end"},
- {"left_end", "breturn"},
- {"left", "left_exit_early"},
- {"left_exit_early", "exit"},
- {"entry", "right"},
- {"right", "right_end"},
- {"right_end", "breturn"},
- {"right", "right_exit_early"},
- {"right_exit_early", "exit"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(left_end);
- GET_BLOCK(left_exit_early);
- GET_BLOCK(right);
- GET_BLOCK(right_end);
- GET_BLOCK(right_exit_early);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {left_end, right_end});
- HInstruction* param1 = MakeParam(DataType::Type::kBool);
- HInstruction* param2 = MakeParam(DataType::Type::kBool);
- HInstruction* c33 = graph_->GetIntConstant(33);
- HInstruction* c44 = graph_->GetIntConstant(44);
-
- HInstruction* if_inst = new (GetAllocator()) HIf(param1);
- entry->AddInstruction(if_inst);
-
- HInstruction* cls1 = MakeClassLoad();
- HInstruction* new_inst1 = MakeNewInstance(cls1);
- HInstruction* write1 = MakeIFieldSet(new_inst1, c33, MemberOffset(32));
- HInstruction* if_left = new (GetAllocator()) HIf(param2);
- left->AddInstruction(cls1);
- left->AddInstruction(new_inst1);
- left->AddInstruction(write1);
- left->AddInstruction(if_left);
- ManuallyBuildEnvFor(cls1, {});
- new_inst1->CopyEnvironmentFrom(cls1->GetEnvironment());
-
- left_end->AddInstruction(new (GetAllocator()) HGoto());
-
- HInstruction* early_exit_left_read =
- MakeIFieldGet(new_inst1, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* early_exit_left_return = new (GetAllocator()) HReturn(early_exit_left_read);
- left_exit_early->AddInstruction(early_exit_left_read);
- left_exit_early->AddInstruction(early_exit_left_return);
-
- HInstruction* cls2 = MakeClassLoad();
- HInstruction* new_inst2 = MakeNewInstance(cls2);
- HInstruction* write2 = MakeIFieldSet(new_inst2, c44, MemberOffset(32));
- HInstruction* if_right = new (GetAllocator()) HIf(param2);
- right->AddInstruction(cls2);
- right->AddInstruction(new_inst2);
- right->AddInstruction(write2);
- right->AddInstruction(if_right);
- cls2->CopyEnvironmentFrom(cls1->GetEnvironment());
- new_inst2->CopyEnvironmentFrom(cls2->GetEnvironment());
-
- right_end->AddInstruction(new (GetAllocator()) HGoto());
-
- HInstruction* early_exit_right_read =
- MakeIFieldGet(new_inst2, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* early_exit_right_return = new (GetAllocator()) HReturn(early_exit_right_read);
- right_exit_early->AddInstruction(early_exit_right_read);
- right_exit_early->AddInstruction(early_exit_right_return);
-
- HPhi* bottom_phi = MakePhi({new_inst1, new_inst2});
- HInstruction* read_bottom = MakeIFieldGet(bottom_phi, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddPhi(bottom_phi);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_REMOVED(early_exit_left_read);
- EXPECT_INS_REMOVED(early_exit_right_read);
- EXPECT_INS_RETAINED(bottom_phi);
- EXPECT_INS_RETAINED(read_bottom);
- EXPECT_INS_EQ(early_exit_left_return->InputAt(0), c33);
- EXPECT_INS_EQ(early_exit_right_return->InputAt(0), c44);
- // These assert there is only 1 HNewInstance in the given blocks.
- HNewInstance* moved_ni1 =
- FindSingleInstruction<HNewInstance>(graph_, left_end->GetSinglePredecessor());
- HNewInstance* moved_ni2 =
- FindSingleInstruction<HNewInstance>(graph_, right_end->GetSinglePredecessor());
- ASSERT_NE(moved_ni1, nullptr);
- ASSERT_NE(moved_ni2, nullptr);
- EXPECT_INS_EQ(bottom_phi->InputAt(0), moved_ni1);
- EXPECT_INS_EQ(bottom_phi->InputAt(1), moved_ni2);
-}
-
-// // ENTRY
-// obj = new Obj();
-// if (param1) {
-// obj.field = 3;
-// noescape();
-// } else {
-// obj.field = 2;
-// noescape();
-// }
-// int abc;
-// if (parameter_value) {
-// // LEFT
-// abc = 4;
-// escape(obj);
-// } else {
-// // RIGHT
-// // ELIMINATE
-// noescape();
-// abc = obj.field + 4;
-// }
-// abc = phi
-// EXIT
-// predicated-ELIMINATE
-// return obj.field + abc
-TEST_F(LoadStoreEliminationTest, PredicatedLoad4) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "start_left"},
- {"entry", "start_right"},
- {"start_left", "mid"},
- {"start_right", "mid"},
- {"mid", "left"},
- {"mid", "right"},
- {"left", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
- GET_BLOCK(mid);
- GET_BLOCK(start_left);
- GET_BLOCK(start_right);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {left, right});
- EnsurePredecessorOrder(mid, {start_left, start_right});
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* bool_value2 = MakeParam(DataType::Type::kBool);
- HInstruction* null_const = graph_->GetNullConstant();
- HInstruction* c2 = graph_->GetIntConstant(2);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c4 = graph_->GetIntConstant(4);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_start_left = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* call_start_left = MakeInvoke(DataType::Type::kVoid, { });
- start_left->AddInstruction(write_start_left);
- start_left->AddInstruction(call_start_left);
- start_left->AddInstruction(new (GetAllocator()) HGoto());
- call_start_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_start_right = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* call_start_right = MakeInvoke(DataType::Type::kVoid, { });
- start_right->AddInstruction(write_start_right);
- start_right->AddInstruction(call_start_right);
- start_right->AddInstruction(new (GetAllocator()) HGoto());
- call_start_right->CopyEnvironmentFrom(cls->GetEnvironment());
-
- mid->AddInstruction(new (GetAllocator()) HIf(bool_value2));
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_right = MakeInvoke(DataType::Type::kVoid, { });
- HInstruction* read_right = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* add_right = new (GetAllocator()) HAdd(DataType::Type::kInt32, read_right, c4);
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(call_right);
- right->AddInstruction(read_right);
- right->AddInstruction(add_right);
- right->AddInstruction(goto_right);
- call_right->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HPhi* phi_bottom = MakePhi({c4, add_right});
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* add_bottom =
- new (GetAllocator()) HAdd(DataType::Type::kInt32, read_bottom, phi_bottom);
- HInstruction* return_exit = new (GetAllocator()) HReturn(add_bottom);
- breturn->AddPhi(phi_bottom);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(add_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_REMOVED(read_bottom);
- EXPECT_INS_REMOVED(read_right);
- EXPECT_INS_RETAINED(call_left);
- EXPECT_INS_RETAINED(call_right);
- EXPECT_INS_RETAINED(call_start_left);
- EXPECT_INS_RETAINED(call_start_right);
- std::vector<HPhi*> merges;
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- std::tie(merges) = FindAllInstructions<HPhi>(graph_, breturn);
- ASSERT_EQ(merges.size(), 3u);
- HPhi* merge_value_return = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p != phi_bottom && p->GetType() == DataType::Type::kInt32;
- });
- HPhi* merge_alloc = FindOrNull(merges.begin(), merges.end(), [](HPhi* p) {
- return p->GetType() == DataType::Type::kReference;
- });
- ASSERT_NE(merge_alloc, nullptr);
- EXPECT_TRUE(merge_alloc->InputAt(0)->IsNewInstance()) << *merge_alloc;
- EXPECT_EQ(merge_alloc->InputAt(0)->InputAt(0), cls) << *merge_alloc << " cls? " << *cls;
- EXPECT_EQ(merge_alloc->InputAt(1), null_const);
- ASSERT_NE(pred_get, nullptr);
- EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc);
- EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return) << " pred-get is: " << *pred_get;
- EXPECT_INS_EQ(merge_value_return->InputAt(0), graph_->GetIntConstant(0))
- << " merge val is: " << *merge_value_return;
- EXPECT_INS_EQ(merge_value_return->InputAt(1), FindSingleInstruction<HPhi>(graph_, mid))
- << " merge val is: " << *merge_value_return;
-}
-
-// Based on structure seen in `java.util.Set java.util.Collections$UnmodifiableMap.entrySet()`
-// We end up having to update a PHI generated by normal LSE.
-// // ENTRY
-// Obj obj_init = param_obj.BAR;
-// if (param1) {
-// Obj other = new Obj();
-// other.foo = 42;
-// if (param2) {
-// return other.foo;
-// } else {
-// param_obj.BAR = other;
-// }
-// } else { }
-// EXIT
-// LSE Turns this into PHI[obj_init, other]
-// read_bottom = param_obj.BAR;
-// // won't be changed. The escape happens with .BAR set so this is in escaping cohort.
-// return read_bottom.foo;
-TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad4) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"left", "left_early_return"},
- {"left_early_return", "exit"},
- {"left", "left_write_escape"},
- {"left_write_escape", "breturn"},
- {"entry", "right"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(left_early_return);
- GET_BLOCK(left_write_escape);
- GET_BLOCK(right);
-#undef GET_BLOCK
- MemberOffset foo_offset = MemberOffset(32);
- MemberOffset bar_offset = MemberOffset(20);
- EnsurePredecessorOrder(breturn, {left_write_escape, right});
- HInstruction* c42 = graph_->GetIntConstant(42);
- HInstruction* param1 = MakeParam(DataType::Type::kBool);
- HInstruction* param2 = MakeParam(DataType::Type::kBool);
- HInstruction* param_obj = MakeParam(DataType::Type::kReference);
-
- HInstruction* get_initial = MakeIFieldGet(param_obj, DataType::Type::kReference, bar_offset);
- HInstruction* if_inst = new (GetAllocator()) HIf(param1);
- entry->AddInstruction(get_initial);
- entry->AddInstruction(if_inst);
-
- HInstruction* cls1 = MakeClassLoad();
- HInstruction* new_inst1 = MakeNewInstance(cls1);
- HInstruction* write1 = MakeIFieldSet(new_inst1, c42, foo_offset);
- HInstruction* if_left = new (GetAllocator()) HIf(param2);
- left->AddInstruction(cls1);
- left->AddInstruction(new_inst1);
- left->AddInstruction(write1);
- left->AddInstruction(if_left);
- ManuallyBuildEnvFor(cls1, {});
- new_inst1->CopyEnvironmentFrom(cls1->GetEnvironment());
-
- HInstruction* read_early_return = MakeIFieldGet(new_inst1, DataType::Type::kInt32, foo_offset);
- HInstruction* return_early = new (GetAllocator()) HReturn(read_early_return);
- left_early_return->AddInstruction(read_early_return);
- left_early_return->AddInstruction(return_early);
-
- HInstruction* write_escape = MakeIFieldSet(param_obj, new_inst1, bar_offset);
- HInstruction* write_goto = new (GetAllocator()) HGoto();
- left_write_escape->AddInstruction(write_escape);
- left_write_escape->AddInstruction(write_goto);
-
- right->AddInstruction(new (GetAllocator()) HGoto());
-
- HInstruction* read_bottom = MakeIFieldGet(param_obj, DataType::Type::kReference, bar_offset);
- HInstruction* final_read = MakeIFieldGet(read_bottom, DataType::Type::kInt32, foo_offset);
- HInstruction* return_exit = new (GetAllocator()) HReturn(final_read);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(final_read);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_REMOVED(read_bottom);
- EXPECT_INS_REMOVED(read_early_return);
- EXPECT_INS_EQ(return_early->InputAt(0), c42);
- EXPECT_INS_RETAINED(final_read);
- HNewInstance* moved_ni =
- FindSingleInstruction<HNewInstance>(graph_, left_write_escape->GetSinglePredecessor());
- EXPECT_TRUE(final_read->InputAt(0)->IsPhi());
- EXPECT_INS_EQ(final_read->InputAt(0)->InputAt(0), moved_ni);
- EXPECT_INS_EQ(final_read->InputAt(0)->InputAt(1), get_initial);
-}
-
-// // ENTRY
-// obj = new Obj();
-// obj.field = 3;
-// if (parameter_value) {
-// // LEFT
-// escape(obj);
-// } else {
-// // RIGHT
-// // ELIMINATE
-// obj.field = 2;
-// }
-// // MERGE
-// if (second_param) {
-// // NON_ESCAPE
-// obj.field = 1;
-// noescape();
-// }
-// EXIT
-// predicated-ELIMINATE
-// return obj.field
-TEST_F(LoadStoreEliminationTest, PredicatedLoad2) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "merge"},
- {"right", "merge"},
- {"merge", "non_escape"},
- {"non_escape", "breturn"},
- {"merge", "crit_break"},
- {"crit_break", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
- GET_BLOCK(merge);
- GET_BLOCK(non_escape);
- GET_BLOCK(crit_break);
-#undef GET_BLOCK
- EnsurePredecessorOrder(merge, {left, right});
- EnsurePredecessorOrder(breturn, {crit_break, non_escape});
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* bool_value2 = MakeParam(DataType::Type::kBool);
- HInstruction* null_const = graph_->GetNullConstant();
- HInstruction* c1 = graph_->GetIntConstant(1);
- HInstruction* c2 = graph_->GetIntConstant(2);
- HInstruction* c3 = graph_->GetIntConstant(3);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_entry);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* merge_if = new (GetAllocator()) HIf(bool_value2);
- merge->AddInstruction(merge_if);
-
- crit_break->AddInstruction(new (GetAllocator()) HGoto());
-
- HInstruction* write_non_escape = MakeIFieldSet(new_inst, c1, MemberOffset(32));
- HInstruction* non_escape_call = MakeInvoke(DataType::Type::kVoid, {});
- HInstruction* non_escape_goto = new (GetAllocator()) HGoto();
- non_escape->AddInstruction(write_non_escape);
- non_escape->AddInstruction(non_escape_call);
- non_escape->AddInstruction(non_escape_goto);
- non_escape_call->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_REMOVED(read_bottom);
- EXPECT_INS_REMOVED(write_right);
- EXPECT_INS_RETAINED(call_left);
- std::vector<HPhi*> merges;
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- std::tie(merges) = FindAllInstructions<HPhi>(graph_);
- ASSERT_EQ(merges.size(), 3u);
- HPhi* merge_value_return = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kInt32 && p->GetBlock() == breturn;
- });
- HPhi* merge_value_merge = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kInt32 && p->GetBlock() != breturn;
- });
- HPhi* merge_alloc = FindOrNull(merges.begin(), merges.end(), [](HPhi* p) {
- return p->GetType() == DataType::Type::kReference;
- });
- ASSERT_NE(merge_alloc, nullptr);
- EXPECT_TRUE(merge_alloc->InputAt(0)->IsNewInstance()) << *merge_alloc;
- EXPECT_INS_EQ(merge_alloc->InputAt(0)->InputAt(0), cls)
- << " phi is: " << merge_alloc->DumpWithArgs();
- EXPECT_INS_EQ(merge_alloc->InputAt(1), null_const);
- ASSERT_NE(pred_get, nullptr);
- EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc);
- EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return)
- << "get is " << pred_get->DumpWithArgs();
- EXPECT_INS_EQ(merge_value_return->InputAt(0), merge_value_merge)
- << " phi is: " << *merge_value_return;
- EXPECT_INS_EQ(merge_value_return->InputAt(1), c1)
- << " phi is: " << merge_value_return->DumpWithArgs();
- EXPECT_INS_EQ(merge_value_merge->InputAt(0), graph_->GetIntConstant(0))
- << " phi is: " << *merge_value_merge;
- EXPECT_INS_EQ(merge_value_merge->InputAt(1), c2)
- << " phi is: " << merge_value_merge->DumpWithArgs();
-}
-
-// // ENTRY
-// obj = new Obj();
-// obj.field = 3;
-// if (parameter_value) {
-// // LEFT
-// escape(obj);
-// } else {
-// // RIGHT
-// // ELIMINATE
-// obj.field = 2;
-// }
-// // MERGE
-// if (second_param) {
-// // NON_ESCAPE
-// obj.field = 1;
-// }
-// noescape();
-// EXIT
-// predicated-ELIMINATE
-// return obj.field
-TEST_F(LoadStoreEliminationTest, PredicatedLoad3) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "merge"},
- {"right", "merge"},
- {"merge", "non_escape"},
- {"non_escape", "breturn"},
- {"merge", "crit_break"},
- {"crit_break", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
- GET_BLOCK(merge);
- GET_BLOCK(crit_break);
- GET_BLOCK(non_escape);
-#undef GET_BLOCK
- EnsurePredecessorOrder(merge, {left, right});
- EnsurePredecessorOrder(breturn, {crit_break, non_escape});
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* bool_value2 = MakeParam(DataType::Type::kBool);
- HInstruction* null_const = graph_->GetNullConstant();
- HInstruction* c1 = graph_->GetIntConstant(1);
- HInstruction* c2 = graph_->GetIntConstant(2);
- HInstruction* c3 = graph_->GetIntConstant(3);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_entry);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* merge_if = new (GetAllocator()) HIf(bool_value2);
- merge->AddInstruction(merge_if);
-
- HInstruction* write_non_escape = MakeIFieldSet(new_inst, c1, MemberOffset(32));
- HInstruction* non_escape_goto = new (GetAllocator()) HGoto();
- non_escape->AddInstruction(write_non_escape);
- non_escape->AddInstruction(non_escape_goto);
-
- crit_break->AddInstruction(new (GetAllocator()) HGoto());
-
- HInstruction* bottom_call = MakeInvoke(DataType::Type::kVoid, {});
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddInstruction(bottom_call);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
- bottom_call->CopyEnvironmentFrom(cls->GetEnvironment());
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_REMOVED(read_bottom);
- EXPECT_INS_REMOVED(write_right);
- EXPECT_INS_RETAINED(call_left);
- std::vector<HPhi*> merges;
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- std::tie(merges) = FindAllInstructions<HPhi>(graph_);
- ASSERT_EQ(merges.size(), 3u);
- HPhi* merge_value_return = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kInt32 && p->GetBlock() == breturn;
- });
- HPhi* merge_value_merge = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) {
- return p->GetType() == DataType::Type::kInt32 && p->GetBlock() != breturn;
- });
- HPhi* merge_alloc = FindOrNull(merges.begin(), merges.end(), [](HPhi* p) {
- return p->GetType() == DataType::Type::kReference;
- });
- ASSERT_NE(merge_alloc, nullptr);
- EXPECT_TRUE(merge_alloc->InputAt(0)->IsNewInstance()) << merge_alloc->DumpWithArgs();
- EXPECT_INS_EQ(merge_alloc->InputAt(0)->InputAt(0), cls)
- << " phi is: " << merge_alloc->DumpWithArgs();
- EXPECT_INS_EQ(merge_alloc->InputAt(1), null_const);
- ASSERT_NE(pred_get, nullptr);
- EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc);
- EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return)
- << "get is " << pred_get->DumpWithArgs();
- EXPECT_INS_EQ(merge_value_return->InputAt(0), merge_value_merge)
- << " phi is: " << *merge_value_return;
- EXPECT_INS_EQ(merge_value_return->InputAt(1), c1) << " phi is: " << *merge_value_return;
- EXPECT_INS_EQ(merge_value_merge->InputAt(0), graph_->GetIntConstant(0))
- << " phi is: " << *merge_value_merge;
- EXPECT_INS_EQ(merge_value_merge->InputAt(1), c2) << " phi is: " << *merge_value_merge;
-}
-
-// // ENTRY
-// obj = new Obj();
-// if (parameter_value) {
-// // LEFT
-// obj.field = 3;
-// escape(obj);
-// } else {
-// // RIGHT - Leave it as default value
-// }
-// EXIT
-// predicated-ELIMINATE
-// return obj.field
-TEST_F(LoadStoreEliminationTest, PredicatedLoadDefaultValue) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {left, right});
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* null_const = graph_->GetNullConstant();
- HInstruction* c0 = graph_->GetIntConstant(0);
- HInstruction* c3 = graph_->GetIntConstant(3);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_left = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(write_left);
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(goto_right);
-
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_REMOVED(read_bottom);
- EXPECT_INS_RETAINED(write_left);
- EXPECT_INS_RETAINED(call_left);
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- HPhi* merge_alloc = FindSingleInstruction<HPhi>(graph_, breturn);
- ASSERT_NE(merge_alloc, nullptr);
- EXPECT_TRUE(merge_alloc->InputAt(0)->IsNewInstance()) << *merge_alloc;
- EXPECT_EQ(merge_alloc->InputAt(0)->InputAt(0), cls) << *merge_alloc << " cls? " << *cls;
- EXPECT_EQ(merge_alloc->InputAt(1), null_const);
- ASSERT_NE(pred_get, nullptr);
- EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc);
- EXPECT_INS_EQ(pred_get->GetDefaultValue(), c0) << " pred-get is: " << *pred_get;
-}
-
-// // ENTRY
-// obj = new Obj();
-// // ALL should be kept
-// switch (parameter_value) {
-// case 1:
-// // Case1
-// obj.field = 1;
-// call_func(obj);
-// break;
-// case 2:
-// // Case2
-// obj.field = 2;
-// call_func(obj);
-// break;
-// default:
-// // Case3
-// obj.field = 3;
-// do {
-// if (test2()) { } else { obj.field = 5; }
-// } while (test());
-// break;
-// }
-// EXIT
-// // predicated-ELIMINATE
-// return obj.field
-TEST_F(LoadStoreEliminationTest, PartialLoopPhis1) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "bswitch"},
- {"bswitch", "case1"},
- {"bswitch", "case2"},
- {"bswitch", "case3"},
- {"case1", "breturn"},
- {"case2", "breturn"},
- {"case3", "loop_pre_header"},
- {"loop_pre_header", "loop_header"},
- {"loop_header", "loop_body"},
- {"loop_body", "loop_if_left"},
- {"loop_body", "loop_if_right"},
- {"loop_if_left", "loop_merge"},
- {"loop_if_right", "loop_merge"},
- {"loop_merge", "loop_end"},
- {"loop_end", "loop_header"},
- {"loop_end", "critical_break"},
- {"critical_break", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(bswitch);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(case1);
- GET_BLOCK(case2);
- GET_BLOCK(case3);
-
- GET_BLOCK(loop_pre_header);
- GET_BLOCK(loop_header);
- GET_BLOCK(loop_body);
- GET_BLOCK(loop_if_left);
- GET_BLOCK(loop_if_right);
- GET_BLOCK(loop_merge);
- GET_BLOCK(loop_end);
- GET_BLOCK(critical_break);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {case1, case2, critical_break});
- EnsurePredecessorOrder(loop_header, {loop_pre_header, loop_end});
- EnsurePredecessorOrder(loop_merge, {loop_if_left, loop_if_right});
- CHECK_SUBROUTINE_FAILURE();
- HInstruction* switch_val = MakeParam(DataType::Type::kInt32);
- HInstruction* c1 = graph_->GetIntConstant(1);
- HInstruction* c2 = graph_->GetIntConstant(2);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c5 = graph_->GetIntConstant(5);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* entry_goto = new (GetAllocator()) HGoto();
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(entry_goto);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, switch_val);
- bswitch->AddInstruction(switch_inst);
-
- HInstruction* write_c1 = MakeIFieldSet(new_inst, c1, MemberOffset(32));
- HInstruction* call_c1 = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_c1 = new (GetAllocator()) HGoto();
- case1->AddInstruction(write_c1);
- case1->AddInstruction(call_c1);
- case1->AddInstruction(goto_c1);
- call_c1->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_c2 = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* call_c2 = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_c2 = new (GetAllocator()) HGoto();
- case2->AddInstruction(write_c2);
- case2->AddInstruction(call_c2);
- case2->AddInstruction(goto_c2);
- call_c2->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_c3 = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* goto_c3 = new (GetAllocator()) HGoto();
- case3->AddInstruction(write_c3);
- case3->AddInstruction(goto_c3);
-
- HInstruction* goto_preheader = new (GetAllocator()) HGoto();
- loop_pre_header->AddInstruction(goto_preheader);
-
- HInstruction* suspend_check_header = new (GetAllocator()) HSuspendCheck();
- HInstruction* goto_header = new (GetAllocator()) HGoto();
- loop_header->AddInstruction(suspend_check_header);
- loop_header->AddInstruction(goto_header);
- suspend_check_header->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_loop_body = MakeInvoke(DataType::Type::kBool, {});
- HInstruction* if_loop_body = new (GetAllocator()) HIf(call_loop_body);
- loop_body->AddInstruction(call_loop_body);
- loop_body->AddInstruction(if_loop_body);
- call_loop_body->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* goto_loop_left = new (GetAllocator()) HGoto();
- loop_if_left->AddInstruction(goto_loop_left);
-
- HInstruction* write_loop_right = MakeIFieldSet(new_inst, c5, MemberOffset(32));
- HInstruction* goto_loop_right = new (GetAllocator()) HGoto();
- loop_if_right->AddInstruction(write_loop_right);
- loop_if_right->AddInstruction(goto_loop_right);
-
- HInstruction* goto_loop_merge = new (GetAllocator()) HGoto();
- loop_merge->AddInstruction(goto_loop_merge);
-
- HInstruction* call_end = MakeInvoke(DataType::Type::kBool, {});
- HInstruction* if_end = new (GetAllocator()) HIf(call_end);
- loop_end->AddInstruction(call_end);
- loop_end->AddInstruction(if_end);
- call_end->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* goto_critical_break = new (GetAllocator()) HGoto();
- critical_break->AddInstruction(goto_critical_break);
-
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
- ASSERT_TRUE(pred_get != nullptr);
- HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
- ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
- EXPECT_INS_EQ(inst_return_phi->InputAt(0),
- FindSingleInstruction<HNewInstance>(graph_, case1->GetSinglePredecessor()));
- EXPECT_INS_EQ(inst_return_phi->InputAt(1),
- FindSingleInstruction<HNewInstance>(graph_, case2->GetSinglePredecessor()));
- EXPECT_INS_EQ(inst_return_phi->InputAt(2), graph_->GetNullConstant());
- HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi();
- ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs();
- EXPECT_INS_EQ(inst_value_phi->InputAt(0), graph_->GetIntConstant(0));
- EXPECT_INS_EQ(inst_value_phi->InputAt(1), graph_->GetIntConstant(0));
- HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge);
- ASSERT_TRUE(loop_merge_phi != nullptr);
- HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header);
- ASSERT_TRUE(loop_header_phi != nullptr);
- EXPECT_INS_EQ(loop_header_phi->InputAt(0), c3);
- EXPECT_INS_EQ(loop_header_phi->InputAt(1), loop_merge_phi);
- EXPECT_INS_EQ(loop_merge_phi->InputAt(0), loop_header_phi);
- EXPECT_INS_EQ(loop_merge_phi->InputAt(1), c5);
- EXPECT_INS_EQ(inst_value_phi->InputAt(2), loop_merge_phi);
- EXPECT_INS_RETAINED(write_c1) << *write_c1;
- EXPECT_INS_RETAINED(write_c2) << *write_c2;
- EXPECT_INS_REMOVED(write_c3) << *write_c3;
- EXPECT_INS_REMOVED(write_loop_right) << *write_loop_right;
-}
-
-// // ENTRY
-// obj = new Obj();
-// switch (parameter_value) {
-// case 1:
-// // Case1
-// obj.field = 1;
-// call_func(obj);
-// break;
-// case 2:
-// // Case2
-// obj.field = 2;
-// call_func(obj);
-// break;
-// default:
-// // Case3
-// obj.field = 3;
-// while (!test()) {
-// if (test2()) { } else { obj.field = 5; }
-// }
-// break;
-// }
-// EXIT
-// // predicated-ELIMINATE
-// return obj.field
-TEST_F(LoadStoreEliminationTest, PartialLoopPhis2) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "bswitch"},
- {"bswitch", "case1"},
- {"bswitch", "case2"},
- {"bswitch", "case3"},
- {"case1", "breturn"},
- {"case2", "breturn"},
- {"case3", "loop_pre_header"},
-
- {"loop_pre_header", "loop_header"},
- {"loop_header", "critical_break"},
- {"loop_header", "loop_body"},
- {"loop_body", "loop_if_left"},
- {"loop_body", "loop_if_right"},
- {"loop_if_left", "loop_merge"},
- {"loop_if_right", "loop_merge"},
- {"loop_merge", "loop_header"},
-
- {"critical_break", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(bswitch);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(case1);
- GET_BLOCK(case2);
- GET_BLOCK(case3);
-
- GET_BLOCK(loop_pre_header);
- GET_BLOCK(loop_header);
- GET_BLOCK(loop_body);
- GET_BLOCK(loop_if_left);
- GET_BLOCK(loop_if_right);
- GET_BLOCK(loop_merge);
- GET_BLOCK(critical_break);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {case1, case2, critical_break});
- EnsurePredecessorOrder(loop_header, {loop_pre_header, loop_merge});
- EnsurePredecessorOrder(loop_merge, {loop_if_left, loop_if_right});
- CHECK_SUBROUTINE_FAILURE();
- HInstruction* switch_val = MakeParam(DataType::Type::kInt32);
- HInstruction* c1 = graph_->GetIntConstant(1);
- HInstruction* c2 = graph_->GetIntConstant(2);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c5 = graph_->GetIntConstant(5);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* entry_goto = new (GetAllocator()) HGoto();
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(entry_goto);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, switch_val);
- bswitch->AddInstruction(switch_inst);
-
- HInstruction* write_c1 = MakeIFieldSet(new_inst, c1, MemberOffset(32));
- HInstruction* call_c1 = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_c1 = new (GetAllocator()) HGoto();
- case1->AddInstruction(write_c1);
- case1->AddInstruction(call_c1);
- case1->AddInstruction(goto_c1);
- call_c1->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_c2 = MakeIFieldSet(new_inst, c2, MemberOffset(32));
- HInstruction* call_c2 = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_c2 = new (GetAllocator()) HGoto();
- case2->AddInstruction(write_c2);
- case2->AddInstruction(call_c2);
- case2->AddInstruction(goto_c2);
- call_c2->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_c3 = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* goto_c3 = new (GetAllocator()) HGoto();
- case3->AddInstruction(write_c3);
- case3->AddInstruction(goto_c3);
-
- HInstruction* goto_preheader = new (GetAllocator()) HGoto();
- loop_pre_header->AddInstruction(goto_preheader);
-
- HInstruction* suspend_check_header = new (GetAllocator()) HSuspendCheck();
- HInstruction* call_header = MakeInvoke(DataType::Type::kBool, {});
- HInstruction* if_header = new (GetAllocator()) HIf(call_header);
- loop_header->AddInstruction(suspend_check_header);
- loop_header->AddInstruction(call_header);
- loop_header->AddInstruction(if_header);
- call_header->CopyEnvironmentFrom(cls->GetEnvironment());
- suspend_check_header->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_loop_body = MakeInvoke(DataType::Type::kBool, {});
- HInstruction* if_loop_body = new (GetAllocator()) HIf(call_loop_body);
- loop_body->AddInstruction(call_loop_body);
- loop_body->AddInstruction(if_loop_body);
- call_loop_body->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* goto_loop_left = new (GetAllocator()) HGoto();
- loop_if_left->AddInstruction(goto_loop_left);
-
- HInstruction* write_loop_right = MakeIFieldSet(new_inst, c5, MemberOffset(32));
- HInstruction* goto_loop_right = new (GetAllocator()) HGoto();
- loop_if_right->AddInstruction(write_loop_right);
- loop_if_right->AddInstruction(goto_loop_right);
-
- HInstruction* goto_loop_merge = new (GetAllocator()) HGoto();
- loop_merge->AddInstruction(goto_loop_merge);
-
- HInstruction* goto_critical_break = new (GetAllocator()) HGoto();
- critical_break->AddInstruction(goto_critical_break);
-
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
- ASSERT_TRUE(pred_get != nullptr);
- HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
- ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
- EXPECT_INS_EQ(inst_return_phi->InputAt(0),
- FindSingleInstruction<HNewInstance>(graph_, case1->GetSinglePredecessor()));
- EXPECT_INS_EQ(inst_return_phi->InputAt(1),
- FindSingleInstruction<HNewInstance>(graph_, case2->GetSinglePredecessor()));
- EXPECT_INS_EQ(inst_return_phi->InputAt(2), graph_->GetNullConstant());
- HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi();
- ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs();
- EXPECT_INS_EQ(inst_value_phi->InputAt(0), graph_->GetIntConstant(0));
- EXPECT_INS_EQ(inst_value_phi->InputAt(1), graph_->GetIntConstant(0));
- HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge);
- ASSERT_TRUE(loop_merge_phi != nullptr);
- HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header);
- ASSERT_TRUE(loop_header_phi != nullptr);
- EXPECT_INS_EQ(loop_header_phi->InputAt(0), c3);
- EXPECT_INS_EQ(loop_header_phi->InputAt(1), loop_merge_phi);
- EXPECT_INS_EQ(loop_merge_phi->InputAt(0), loop_header_phi);
- EXPECT_INS_EQ(loop_merge_phi->InputAt(1), c5);
- EXPECT_INS_EQ(inst_value_phi->InputAt(2), loop_header_phi);
- EXPECT_INS_RETAINED(write_c1) << *write_c1;
- EXPECT_INS_RETAINED(write_c2) << *write_c2;
- EXPECT_INS_REMOVED(write_c3) << *write_c3;
- EXPECT_INS_REMOVED(write_loop_right) << *write_loop_right;
-}
-
-// // ENTRY
-// obj = new Obj();
-// obj.field = 3;
-// while (!test()) {
-// if (test2()) { } else { obj.field = 5; }
-// }
-// if (parameter_value) {
-// escape(obj);
-// }
-// EXIT
-// return obj.field
-TEST_F(LoadStoreEliminationTest, PartialLoopPhis3) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "loop_pre_header"},
-
- {"loop_pre_header", "loop_header"},
- {"loop_header", "escape_check"},
- {"loop_header", "loop_body"},
- {"loop_body", "loop_if_left"},
- {"loop_body", "loop_if_right"},
- {"loop_if_left", "loop_merge"},
- {"loop_if_right", "loop_merge"},
- {"loop_merge", "loop_header"},
-
- {"escape_check", "escape"},
- {"escape_check", "no_escape"},
- {"no_escape", "breturn"},
- {"escape", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(no_escape);
- GET_BLOCK(escape);
- GET_BLOCK(escape_check);
-
- GET_BLOCK(loop_pre_header);
- GET_BLOCK(loop_header);
- GET_BLOCK(loop_body);
- GET_BLOCK(loop_if_left);
- GET_BLOCK(loop_if_right);
- GET_BLOCK(loop_merge);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {no_escape, escape});
- EnsurePredecessorOrder(loop_header, {loop_pre_header, loop_merge});
- EnsurePredecessorOrder(loop_merge, {loop_if_left, loop_if_right});
- CHECK_SUBROUTINE_FAILURE();
- HInstruction* bool_val = MakeParam(DataType::Type::kBool);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c5 = graph_->GetIntConstant(5);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* entry_goto = new (GetAllocator()) HGoto();
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(entry_goto);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_pre_header = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* goto_preheader = new (GetAllocator()) HGoto();
- loop_pre_header->AddInstruction(write_pre_header);
- loop_pre_header->AddInstruction(goto_preheader);
-
- HInstruction* suspend_check_header = new (GetAllocator()) HSuspendCheck();
- HInstruction* call_header = MakeInvoke(DataType::Type::kBool, {});
- HInstruction* if_header = new (GetAllocator()) HIf(call_header);
- loop_header->AddInstruction(suspend_check_header);
- loop_header->AddInstruction(call_header);
- loop_header->AddInstruction(if_header);
- call_header->CopyEnvironmentFrom(cls->GetEnvironment());
- suspend_check_header->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_loop_body = MakeInvoke(DataType::Type::kBool, {});
- HInstruction* if_loop_body = new (GetAllocator()) HIf(call_loop_body);
- loop_body->AddInstruction(call_loop_body);
- loop_body->AddInstruction(if_loop_body);
- call_loop_body->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* goto_loop_left = new (GetAllocator()) HGoto();
- loop_if_left->AddInstruction(goto_loop_left);
-
- HInstruction* write_loop_right = MakeIFieldSet(new_inst, c5, MemberOffset(32));
- HInstruction* goto_loop_right = new (GetAllocator()) HGoto();
- loop_if_right->AddInstruction(write_loop_right);
- loop_if_right->AddInstruction(goto_loop_right);
-
- HInstruction* goto_loop_merge = new (GetAllocator()) HGoto();
- loop_merge->AddInstruction(goto_loop_merge);
-
- HInstruction* if_esc_check = new (GetAllocator()) HIf(bool_val);
- escape_check->AddInstruction(if_esc_check);
-
- HInstruction* call_escape = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_escape = new (GetAllocator()) HGoto();
- escape->AddInstruction(call_escape);
- escape->AddInstruction(goto_escape);
- call_escape->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* goto_no_escape = new (GetAllocator()) HGoto();
- no_escape->AddInstruction(goto_no_escape);
-
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
- ASSERT_TRUE(pred_get != nullptr);
- HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
- ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
- EXPECT_INS_EQ(inst_return_phi->InputAt(0), graph_->GetNullConstant());
- EXPECT_INS_EQ(inst_return_phi->InputAt(1),
- FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor()));
- HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi();
- ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs();
- HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header);
- HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge);
- EXPECT_INS_EQ(inst_value_phi->InputAt(0), loop_header_phi);
- EXPECT_INS_EQ(inst_value_phi->InputAt(1), graph_->GetIntConstant(0));
- EXPECT_INS_EQ(loop_header_phi->InputAt(0), c3);
- EXPECT_INS_EQ(loop_header_phi->InputAt(1), loop_merge_phi);
- EXPECT_INS_EQ(loop_merge_phi->InputAt(0), loop_header_phi);
- EXPECT_INS_EQ(loop_merge_phi->InputAt(1), c5);
- HInstanceFieldSet* mat_set =
- FindSingleInstruction<HInstanceFieldSet>(graph_, escape->GetSinglePredecessor());
- ASSERT_NE(mat_set, nullptr);
- EXPECT_INS_EQ(mat_set->InputAt(1), loop_header_phi);
- EXPECT_INS_REMOVED(write_loop_right) << *write_loop_right;
- EXPECT_INS_REMOVED(write_pre_header) << *write_pre_header;
-}
-
-// // ENTRY
-// obj = new Obj();
-// if (parameter_value) {
-// escape(obj);
-// }
-// obj.field = 3;
-// while (!test()) {
-// if (test2()) { } else { obj.field = 5; }
-// }
-// EXIT
-// return obj.field
-TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "escape_check"},
- {"escape_check", "escape"},
- {"escape_check", "no_escape"},
- {"no_escape", "loop_pre_header"},
- {"escape", "loop_pre_header"},
-
- {"loop_pre_header", "loop_header"},
- {"loop_header", "breturn"},
- {"loop_header", "loop_body"},
- {"loop_body", "loop_if_left"},
- {"loop_body", "loop_if_right"},
- {"loop_if_left", "loop_merge"},
- {"loop_if_right", "loop_merge"},
- {"loop_merge", "loop_header"},
-
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(no_escape);
- GET_BLOCK(escape);
- GET_BLOCK(escape_check);
-
- GET_BLOCK(loop_pre_header);
- GET_BLOCK(loop_header);
- GET_BLOCK(loop_body);
- GET_BLOCK(loop_if_left);
- GET_BLOCK(loop_if_right);
- GET_BLOCK(loop_merge);
-#undef GET_BLOCK
- EnsurePredecessorOrder(loop_pre_header, {no_escape, escape});
- EnsurePredecessorOrder(loop_header, {loop_pre_header, loop_merge});
- EnsurePredecessorOrder(loop_merge, {loop_if_left, loop_if_right});
- CHECK_SUBROUTINE_FAILURE();
- HInstruction* bool_val = MakeParam(DataType::Type::kBool);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c5 = graph_->GetIntConstant(5);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* entry_goto = new (GetAllocator()) HGoto();
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(entry_goto);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* if_esc_check = new (GetAllocator()) HIf(bool_val);
- escape_check->AddInstruction(if_esc_check);
-
- HInstruction* call_escape = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_escape = new (GetAllocator()) HGoto();
- escape->AddInstruction(call_escape);
- escape->AddInstruction(goto_escape);
- call_escape->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* goto_no_escape = new (GetAllocator()) HGoto();
- no_escape->AddInstruction(goto_no_escape);
-
- HInstruction* write_pre_header = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* goto_preheader = new (GetAllocator()) HGoto();
- loop_pre_header->AddInstruction(write_pre_header);
- loop_pre_header->AddInstruction(goto_preheader);
-
- HInstruction* suspend_check_header = new (GetAllocator()) HSuspendCheck();
- HInstruction* call_header = MakeInvoke(DataType::Type::kBool, {});
- HInstruction* if_header = new (GetAllocator()) HIf(call_header);
- loop_header->AddInstruction(suspend_check_header);
- loop_header->AddInstruction(call_header);
- loop_header->AddInstruction(if_header);
- call_header->CopyEnvironmentFrom(cls->GetEnvironment());
- suspend_check_header->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_loop_body = MakeInvoke(DataType::Type::kBool, {});
- HInstruction* if_loop_body = new (GetAllocator()) HIf(call_loop_body);
- loop_body->AddInstruction(call_loop_body);
- loop_body->AddInstruction(if_loop_body);
- call_loop_body->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* goto_loop_left = new (GetAllocator()) HGoto();
- loop_if_left->AddInstruction(goto_loop_left);
-
- HInstruction* write_loop_right = MakeIFieldSet(new_inst, c5, MemberOffset(32));
- HInstruction* goto_loop_right = new (GetAllocator()) HGoto();
- loop_if_right->AddInstruction(write_loop_right);
- loop_if_right->AddInstruction(goto_loop_right);
-
- HInstruction* goto_loop_merge = new (GetAllocator()) HGoto();
- loop_merge->AddInstruction(goto_loop_merge);
-
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
- ASSERT_TRUE(pred_get != nullptr);
- HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
- ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
- EXPECT_INS_EQ(inst_return_phi->InputAt(0), graph_->GetNullConstant());
- EXPECT_INS_EQ(inst_return_phi->InputAt(1),
- FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor()));
- HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi();
- ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs();
- HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header);
- HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge);
- EXPECT_INS_EQ(inst_value_phi, loop_header_phi);
- EXPECT_INS_EQ(loop_header_phi->InputAt(0), c3);
- EXPECT_INS_EQ(loop_header_phi->InputAt(1), loop_merge_phi);
- EXPECT_INS_EQ(loop_merge_phi->InputAt(0), loop_header_phi);
- EXPECT_INS_EQ(loop_merge_phi->InputAt(1), c5);
- EXPECT_INS_RETAINED(write_loop_right) << *write_loop_right;
- EXPECT_TRUE(write_loop_right->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_loop_right;
- EXPECT_INS_RETAINED(write_pre_header) << *write_pre_header;
- EXPECT_TRUE(write_pre_header->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_pre_header;
-}
-
-// // ENTRY
-// obj = new Obj();
-// obj.field = 3;
-// while (!test()) {
-// if (test2()) { } else { obj.field += 5; }
-// }
-// if (parameter_value) {
-// escape(obj);
-// }
-// EXIT
-// return obj.field
-TEST_F(LoadStoreEliminationTest, PartialLoopPhis5) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "loop_pre_header"},
- {"loop_pre_header", "loop_header"},
- {"loop_header", "escape_check"},
- {"loop_header", "loop_body"},
- {"loop_body", "loop_if_left"},
- {"loop_body", "loop_if_right"},
- {"loop_if_left", "loop_merge"},
- {"loop_if_right", "loop_merge"},
- {"loop_merge", "loop_header"},
- {"escape_check", "escape"},
- {"escape_check", "no_escape"},
- {"no_escape", "breturn"},
- {"escape", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(no_escape);
- GET_BLOCK(escape);
- GET_BLOCK(escape_check);
-
- GET_BLOCK(loop_pre_header);
- GET_BLOCK(loop_header);
- GET_BLOCK(loop_body);
- GET_BLOCK(loop_if_left);
- GET_BLOCK(loop_if_right);
- GET_BLOCK(loop_merge);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {no_escape, escape});
- EnsurePredecessorOrder(loop_header, {loop_pre_header, loop_merge});
- EnsurePredecessorOrder(loop_merge, {loop_if_left, loop_if_right});
- CHECK_SUBROUTINE_FAILURE();
- HInstruction* bool_val = MakeParam(DataType::Type::kBool);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c5 = graph_->GetIntConstant(5);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* entry_goto = new (GetAllocator()) HGoto();
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(entry_goto);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_pre_header = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* goto_preheader = new (GetAllocator()) HGoto();
- loop_pre_header->AddInstruction(write_pre_header);
- loop_pre_header->AddInstruction(goto_preheader);
-
- HInstruction* suspend_check_header = new (GetAllocator()) HSuspendCheck();
- HInstruction* call_header = MakeInvoke(DataType::Type::kBool, {});
- HInstruction* if_header = new (GetAllocator()) HIf(call_header);
- loop_header->AddInstruction(suspend_check_header);
- loop_header->AddInstruction(call_header);
- loop_header->AddInstruction(if_header);
- call_header->CopyEnvironmentFrom(cls->GetEnvironment());
- suspend_check_header->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_loop_body = MakeInvoke(DataType::Type::kBool, {});
- HInstruction* if_loop_body = new (GetAllocator()) HIf(call_loop_body);
- loop_body->AddInstruction(call_loop_body);
- loop_body->AddInstruction(if_loop_body);
- call_loop_body->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* goto_loop_left = new (GetAllocator()) HGoto();
- loop_if_left->AddInstruction(goto_loop_left);
-
- HInstruction* read_loop_right = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* add_loop_right =
- new (GetAllocator()) HAdd(DataType::Type::kInt32, read_loop_right, c5);
- HInstruction* write_loop_right = MakeIFieldSet(new_inst, add_loop_right, MemberOffset(32));
- HInstruction* goto_loop_right = new (GetAllocator()) HGoto();
- loop_if_right->AddInstruction(read_loop_right);
- loop_if_right->AddInstruction(add_loop_right);
- loop_if_right->AddInstruction(write_loop_right);
- loop_if_right->AddInstruction(goto_loop_right);
-
- HInstruction* goto_loop_merge = new (GetAllocator()) HGoto();
- loop_merge->AddInstruction(goto_loop_merge);
-
- HInstruction* if_esc_check = new (GetAllocator()) HIf(bool_val);
- escape_check->AddInstruction(if_esc_check);
-
- HInstruction* call_escape = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_escape = new (GetAllocator()) HGoto();
- escape->AddInstruction(call_escape);
- escape->AddInstruction(goto_escape);
- call_escape->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* goto_no_escape = new (GetAllocator()) HGoto();
- no_escape->AddInstruction(goto_no_escape);
-
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
- ASSERT_TRUE(pred_get != nullptr);
- HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
- ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
- EXPECT_INS_EQ(inst_return_phi->InputAt(0), graph_->GetNullConstant());
- EXPECT_INS_EQ(inst_return_phi->InputAt(1),
- FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor()));
- HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi();
- ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs();
- HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header);
- HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge);
- EXPECT_INS_EQ(inst_value_phi->InputAt(0), loop_header_phi);
- EXPECT_INS_EQ(inst_value_phi->InputAt(1), graph_->GetIntConstant(0));
- EXPECT_INS_EQ(loop_header_phi->InputAt(0), c3);
- EXPECT_INS_EQ(loop_header_phi->InputAt(1), loop_merge_phi);
- EXPECT_INS_EQ(loop_merge_phi->InputAt(0), loop_header_phi);
- EXPECT_INS_EQ(loop_merge_phi->InputAt(1), add_loop_right);
- EXPECT_INS_EQ(add_loop_right->InputAt(0), loop_header_phi);
- EXPECT_INS_EQ(add_loop_right->InputAt(1), c5);
- HInstanceFieldSet* mat_set =
- FindSingleInstruction<HInstanceFieldSet>(graph_, escape->GetSinglePredecessor());
- ASSERT_NE(mat_set, nullptr);
- EXPECT_INS_EQ(mat_set->InputAt(1), loop_header_phi);
- EXPECT_INS_REMOVED(write_loop_right) << *write_loop_right;
- EXPECT_INS_REMOVED(write_pre_header) << *write_pre_header;
-}
-
-// // ENTRY
-// obj = new Obj();
-// obj.field = 3;
-// if (param) {
-// while (!test()) {
-// if (test2()) {
-// noescape();
-// } else {
-// abc = obj.field;
-// obj.field = abc + 5;
-// noescape();
-// }
-// }
-// escape(obj);
-// } else {
-// }
-// return obj.field
-TEST_F(LoadStoreEliminationTest, PartialLoopPhis6) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(/*handles=*/&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "start"},
- {"start", "left"},
- {"start", "right"},
- {"left", "loop_pre_header"},
-
- {"loop_pre_header", "loop_header"},
- {"loop_header", "escape"},
- {"loop_header", "loop_body"},
- {"loop_body", "loop_if_left"},
- {"loop_body", "loop_if_right"},
- {"loop_if_left", "loop_header"},
- {"loop_if_right", "loop_header"},
-
- {"escape", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
- GET_BLOCK(start);
- GET_BLOCK(escape);
-
- GET_BLOCK(loop_pre_header);
- GET_BLOCK(loop_header);
- GET_BLOCK(loop_body);
- GET_BLOCK(loop_if_left);
- GET_BLOCK(loop_if_right);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {escape, right});
- EnsurePredecessorOrder(loop_header, {loop_pre_header, loop_if_left, loop_if_right});
- CHECK_SUBROUTINE_FAILURE();
- HInstruction* bool_val = MakeParam(DataType::Type::kBool);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c5 = graph_->GetIntConstant(5);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* entry_goto = new (GetAllocator()) HGoto();
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_entry);
- entry->AddInstruction(entry_goto);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- start->AddInstruction(new (GetAllocator()) HIf(bool_val));
-
- HInstruction* left_goto = new (GetAllocator()) HGoto();
- left->AddInstruction(left_goto);
-
- HInstruction* goto_preheader = new (GetAllocator()) HGoto();
- loop_pre_header->AddInstruction(goto_preheader);
-
- HInstruction* suspend_check_header = new (GetAllocator()) HSuspendCheck();
- HInstruction* call_header = MakeInvoke(DataType::Type::kBool, {});
- HInstruction* if_header = new (GetAllocator()) HIf(call_header);
- loop_header->AddInstruction(suspend_check_header);
- loop_header->AddInstruction(call_header);
- loop_header->AddInstruction(if_header);
- call_header->CopyEnvironmentFrom(cls->GetEnvironment());
- suspend_check_header->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_loop_body = MakeInvoke(DataType::Type::kBool, {});
- HInstruction* if_loop_body = new (GetAllocator()) HIf(call_loop_body);
- loop_body->AddInstruction(call_loop_body);
- loop_body->AddInstruction(if_loop_body);
- call_loop_body->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_loop_left = MakeInvoke(DataType::Type::kVoid, {});
- HInstruction* goto_loop_left = new (GetAllocator()) HGoto();
- loop_if_left->AddInstruction(call_loop_left);
- loop_if_left->AddInstruction(goto_loop_left);
- call_loop_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* read_loop_right = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* add_loop_right =
- new (GetAllocator()) HAdd(DataType::Type::kInt32, c5, read_loop_right);
- HInstruction* write_loop_right = MakeIFieldSet(new_inst, add_loop_right, MemberOffset(32));
- HInstruction* call_loop_right = MakeInvoke(DataType::Type::kVoid, {});
- HInstruction* goto_loop_right = new (GetAllocator()) HGoto();
- loop_if_right->AddInstruction(read_loop_right);
- loop_if_right->AddInstruction(add_loop_right);
- loop_if_right->AddInstruction(write_loop_right);
- loop_if_right->AddInstruction(call_loop_right);
- loop_if_right->AddInstruction(goto_loop_right);
- call_loop_right->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_escape = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_escape = new (GetAllocator()) HGoto();
- escape->AddInstruction(call_escape);
- escape->AddInstruction(goto_escape);
- call_escape->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(goto_right);
-
- HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
- breturn->AddInstruction(read_bottom);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
- ASSERT_TRUE(pred_get != nullptr);
- HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
- ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
- EXPECT_INS_EQ(inst_return_phi->InputAt(0),
- FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor()));
- EXPECT_INS_EQ(inst_return_phi->InputAt(1), graph_->GetNullConstant());
- EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(0), graph_->GetIntConstant(0));
- EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(1), c3);
- HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header);
- ASSERT_NE(loop_header_phi, nullptr);
- EXPECT_INS_EQ(loop_header_phi->InputAt(0), c3);
- EXPECT_INS_EQ(loop_header_phi->InputAt(1), loop_header_phi);
- EXPECT_INS_EQ(loop_header_phi->InputAt(2), add_loop_right);
- EXPECT_INS_EQ(add_loop_right->InputAt(0), c5);
- EXPECT_INS_EQ(add_loop_right->InputAt(1), loop_header_phi);
- HInstanceFieldSet* mat_set =
- FindSingleInstruction<HInstanceFieldSet>(graph_, escape->GetSinglePredecessor());
- ASSERT_NE(mat_set, nullptr);
- EXPECT_INS_EQ(mat_set->InputAt(1), loop_header_phi);
- EXPECT_INS_REMOVED(write_loop_right);
- EXPECT_INS_REMOVED(write_entry);
- EXPECT_INS_RETAINED(call_header);
- EXPECT_INS_RETAINED(call_loop_left);
- EXPECT_INS_RETAINED(call_loop_right);
-}
-
-// TODO This should really be in an Instruction simplifier Gtest but (1) that
-// doesn't exist and (2) we should move this simplification to directly in the
-// LSE pass since there is more information then.
-// // ENTRY
-// obj = new Obj();
-// obj.field = 3;
-// if (param) {
-// escape(obj);
-// } else {
-// obj.field = 10;
-// }
-// return obj.field;
-TEST_F(LoadStoreEliminationTest, SimplifyTest) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {left, right});
-
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c10 = graph_->GetIntConstant(10);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* write_start = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_start);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_right = MakeIFieldSet(new_inst, c10, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
-
- HInstruction* read_end = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_end);
- breturn->AddInstruction(read_end);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- // Run the code-simplifier too
- PerformSimplifications(blks);
-
- EXPECT_INS_REMOVED(write_right);
- EXPECT_INS_REMOVED(write_start);
- EXPECT_INS_REMOVED(read_end);
- EXPECT_INS_RETAINED(call_left);
-
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- ASSERT_NE(pred_get, nullptr);
- EXPECT_INS_EQ(pred_get->GetDefaultValue(), c10);
-}
-
-
-// TODO This should really be in an Instruction simplifier Gtest but (1) that
-// doesn't exist and (2) we should move this simplification to directly in the
-// LSE pass since there is more information then.
-//
-// This checks that we don't replace phis when the replacement isn't valid at
-// that point (i.e. it doesn't dominate)
-// // ENTRY
-// obj = new Obj();
-// obj.field = 3;
-// if (param) {
-// escape(obj);
-// } else {
-// obj.field = noescape();
-// }
-// return obj.field;
-TEST_F(LoadStoreEliminationTest, SimplifyTest2) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left"},
- {"entry", "right"},
- {"left", "breturn"},
- {"right", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {left, right});
-
- HInstruction* bool_value = MakeParam(DataType::Type::kBool);
- HInstruction* c3 = graph_->GetIntConstant(3);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* write_start = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(bool_value);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_start);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, {new_inst});
- HInstruction* goto_left = new (GetAllocator()) HGoto();
- left->AddInstruction(call_left);
- left->AddInstruction(goto_left);
- call_left->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_right = MakeInvoke(DataType::Type::kInt32, {});
- HInstruction* write_right = MakeIFieldSet(new_inst, call_right, MemberOffset(32));
- HInstruction* goto_right = new (GetAllocator()) HGoto();
- right->AddInstruction(call_right);
- right->AddInstruction(write_right);
- right->AddInstruction(goto_right);
- call_right->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* read_end = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_end);
- breturn->AddInstruction(read_end);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- // Run the code-simplifier too
- PerformSimplifications(blks);
-
- EXPECT_INS_REMOVED(write_right);
- EXPECT_INS_REMOVED(write_start);
- EXPECT_INS_REMOVED(read_end);
- EXPECT_INS_RETAINED(call_left);
- EXPECT_INS_RETAINED(call_right);
- EXPECT_EQ(call_right->GetBlock(), right);
-
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- ASSERT_NE(pred_get, nullptr);
- EXPECT_TRUE(pred_get->GetDefaultValue()->IsPhi()) << pred_get->DumpWithArgs();
- EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(0), graph_->GetIntConstant(0))
- << pred_get->DumpWithArgs();
- EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(1), call_right) << pred_get->DumpWithArgs();
-}
-
-// TODO This should really be in an Instruction simplifier Gtest but (1) that
-// doesn't exist and (2) we should move this simplification to directly in the
-// LSE pass since there is more information then.
-//
-// This checks that we replace phis even when there are multiple replacements as
-// long as they are equal
-// // ENTRY
-// obj = new Obj();
-// obj.field = 3;
-// switch (param) {
-// case 1:
-// escape(obj);
-// break;
-// case 2:
-// obj.field = 10;
-// break;
-// case 3:
-// obj.field = 10;
-// break;
-// }
-// return obj.field;
-TEST_F(LoadStoreEliminationTest, SimplifyTest3) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "case1"},
- {"entry", "case2"},
- {"entry", "case3"},
- {"case1", "breturn"},
- {"case2", "breturn"},
- {"case3", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(case1);
- GET_BLOCK(case2);
- GET_BLOCK(case3);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {case1, case2, case3});
-
- HInstruction* int_val = MakeParam(DataType::Type::kInt32);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c10 = graph_->GetIntConstant(10);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* write_start = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, int_val);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_start);
- entry->AddInstruction(switch_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_case1 = MakeInvoke(DataType::Type::kVoid, {new_inst});
- HInstruction* goto_case1 = new (GetAllocator()) HGoto();
- case1->AddInstruction(call_case1);
- case1->AddInstruction(goto_case1);
- call_case1->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_case2 = MakeIFieldSet(new_inst, c10, MemberOffset(32));
- HInstruction* goto_case2 = new (GetAllocator()) HGoto();
- case2->AddInstruction(write_case2);
- case2->AddInstruction(goto_case2);
-
- HInstruction* write_case3 = MakeIFieldSet(new_inst, c10, MemberOffset(32));
- HInstruction* goto_case3 = new (GetAllocator()) HGoto();
- case3->AddInstruction(write_case3);
- case3->AddInstruction(goto_case3);
-
- HInstruction* read_end = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_end);
- breturn->AddInstruction(read_end);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- // Run the code-simplifier too
- PerformSimplifications(blks);
-
- EXPECT_INS_REMOVED(write_case2);
- EXPECT_INS_REMOVED(write_case3);
- EXPECT_INS_REMOVED(write_start);
- EXPECT_INS_REMOVED(read_end);
- EXPECT_INS_RETAINED(call_case1);
-
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- ASSERT_NE(pred_get, nullptr);
- EXPECT_INS_EQ(pred_get->GetDefaultValue(), c10)
- << pred_get->DumpWithArgs();
-}
-
-// TODO This should really be in an Instruction simplifier Gtest but (1) that
-// doesn't exist and (2) we should move this simplification to directly in the
-// LSE pass since there is more information then.
-//
-// This checks that we don't replace phis even when there are multiple
-// replacements if they are not equal
-// // ENTRY
-// obj = new Obj();
-// obj.field = 3;
-// switch (param) {
-// case 1:
-// escape(obj);
-// break;
-// case 2:
-// obj.field = 10;
-// break;
-// case 3:
-// obj.field = 20;
-// break;
-// }
-// return obj.field;
-TEST_F(LoadStoreEliminationTest, SimplifyTest4) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "case1"},
- {"entry", "case2"},
- {"entry", "case3"},
- {"case1", "breturn"},
- {"case2", "breturn"},
- {"case3", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(case1);
- GET_BLOCK(case2);
- GET_BLOCK(case3);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {case1, case2, case3});
-
- HInstruction* int_val = MakeParam(DataType::Type::kInt32);
- HInstruction* c3 = graph_->GetIntConstant(3);
- HInstruction* c10 = graph_->GetIntConstant(10);
- HInstruction* c20 = graph_->GetIntConstant(20);
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* write_start = MakeIFieldSet(new_inst, c3, MemberOffset(32));
- HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, int_val);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_start);
- entry->AddInstruction(switch_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* call_case1 = MakeInvoke(DataType::Type::kVoid, {new_inst});
- HInstruction* goto_case1 = new (GetAllocator()) HGoto();
- case1->AddInstruction(call_case1);
- case1->AddInstruction(goto_case1);
- call_case1->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* write_case2 = MakeIFieldSet(new_inst, c10, MemberOffset(32));
- HInstruction* goto_case2 = new (GetAllocator()) HGoto();
- case2->AddInstruction(write_case2);
- case2->AddInstruction(goto_case2);
-
- HInstruction* write_case3 = MakeIFieldSet(new_inst, c20, MemberOffset(32));
- HInstruction* goto_case3 = new (GetAllocator()) HGoto();
- case3->AddInstruction(write_case3);
- case3->AddInstruction(goto_case3);
-
- HInstruction* read_end = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_end);
- breturn->AddInstruction(read_end);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- // Run the code-simplifier too
- PerformSimplifications(blks);
-
- EXPECT_INS_REMOVED(write_case2);
- EXPECT_INS_REMOVED(write_case3);
- EXPECT_INS_REMOVED(write_start);
- EXPECT_INS_REMOVED(read_end);
- EXPECT_INS_RETAINED(call_case1);
-
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- ASSERT_NE(pred_get, nullptr);
- EXPECT_TRUE(pred_get->GetDefaultValue()->IsPhi())
- << pred_get->DumpWithArgs();
- EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(0), graph_->GetIntConstant(0));
- EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(1), c10);
- EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(2), c20);
-}
-
-// Make sure that irreducible loops don't screw up Partial LSE. We can't pull
-// phis through them so we need to treat them as escapes.
-// TODO We should be able to do better than this? Need to do some research.
-// // ENTRY
-// obj = new Obj();
-// obj.foo = 11;
-// if (param1) {
-// } else {
-// // irreducible loop here. NB the objdoesn't actually escape
-// obj.foo = 33;
-// if (param2) {
-// goto inner;
-// } else {
-// while (test()) {
-// if (test()) {
-// obj.foo = 66;
-// } else {
-// }
-// inner:
-// }
-// }
-// }
-// return obj.foo;
-TEST_F(LoadStoreEliminationTest, PartialIrreducibleLoop) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("start",
- "exit",
- {{"start", "entry"},
- {"entry", "left"},
- {"entry", "right"},
- {"left", "breturn"},
-
- {"right", "right_crit_break_loop"},
- {"right_crit_break_loop", "loop_header"},
- {"right", "right_crit_break_end"},
- {"right_crit_break_end", "loop_end"},
-
- {"loop_header", "loop_body"},
- {"loop_body", "loop_left"},
- {"loop_body", "loop_right"},
- {"loop_left", "loop_end"},
- {"loop_right", "loop_end"},
- {"loop_end", "loop_header"},
- {"loop_header", "loop_header_crit_break"},
- {"loop_header_crit_break", "breturn"},
-
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(start);
- GET_BLOCK(entry);
- GET_BLOCK(exit);
- GET_BLOCK(breturn);
- GET_BLOCK(left);
- GET_BLOCK(right);
- GET_BLOCK(right_crit_break_end);
- GET_BLOCK(right_crit_break_loop);
- GET_BLOCK(loop_header);
- GET_BLOCK(loop_header_crit_break);
- GET_BLOCK(loop_body);
- GET_BLOCK(loop_left);
- GET_BLOCK(loop_right);
- GET_BLOCK(loop_end);
-#undef GET_BLOCK
- EnsurePredecessorOrder(breturn, {left, loop_header_crit_break});
- HInstruction* c11 = graph_->GetIntConstant(11);
- HInstruction* c33 = graph_->GetIntConstant(33);
- HInstruction* c66 = graph_->GetIntConstant(66);
- HInstruction* param1 = MakeParam(DataType::Type::kBool);
- HInstruction* param2 = MakeParam(DataType::Type::kBool);
-
- HInstruction* suspend = new (GetAllocator()) HSuspendCheck();
- HInstruction* start_goto = new (GetAllocator()) HGoto();
- start->AddInstruction(suspend);
- start->AddInstruction(start_goto);
- ManuallyBuildEnvFor(suspend, {});
-
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* write_start = MakeIFieldSet(new_inst, c11, MemberOffset(32));
- HInstruction* if_inst = new (GetAllocator()) HIf(param1);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_start);
- entry->AddInstruction(if_inst);
- ManuallyBuildEnvFor(cls, {});
- new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
-
- left->AddInstruction(new (GetAllocator()) HGoto());
-
- right->AddInstruction(MakeIFieldSet(new_inst, c33, MemberOffset(32)));
- right->AddInstruction(new (GetAllocator()) HIf(param2));
-
- right_crit_break_end->AddInstruction(new (GetAllocator()) HGoto());
- right_crit_break_loop->AddInstruction(new (GetAllocator()) HGoto());
-
- HInstruction* header_suspend = new (GetAllocator()) HSuspendCheck();
- HInstruction* header_invoke = MakeInvoke(DataType::Type::kBool, {});
- HInstruction* header_if = new (GetAllocator()) HIf(header_invoke);
- loop_header->AddInstruction(header_suspend);
- loop_header->AddInstruction(header_invoke);
- loop_header->AddInstruction(header_if);
- header_suspend->CopyEnvironmentFrom(cls->GetEnvironment());
- header_invoke->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* body_invoke = MakeInvoke(DataType::Type::kBool, {});
- HInstruction* body_if = new (GetAllocator()) HIf(body_invoke);
- loop_body->AddInstruction(body_invoke);
- loop_body->AddInstruction(body_if);
- body_invoke->CopyEnvironmentFrom(cls->GetEnvironment());
-
- HInstruction* left_set = MakeIFieldSet(new_inst, c66, MemberOffset(32));
- HInstruction* left_goto = MakeIFieldSet(new_inst, c66, MemberOffset(32));
- loop_left->AddInstruction(left_set);
- loop_left->AddInstruction(left_goto);
-
- loop_right->AddInstruction(new (GetAllocator()) HGoto());
-
- loop_end->AddInstruction(new (GetAllocator()) HGoto());
-
- HInstruction* read_end = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* return_exit = new (GetAllocator()) HReturn(read_end);
- breturn->AddInstruction(read_end);
- breturn->AddInstruction(return_exit);
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_TRUE(loop_header->IsLoopHeader());
- EXPECT_TRUE(loop_header->GetLoopInformation()->IsIrreducible());
-
- EXPECT_INS_RETAINED(left_set);
- EXPECT_INS_REMOVED(write_start);
- EXPECT_INS_REMOVED(read_end);
-
- HPredicatedInstanceFieldGet* pred_get =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- ASSERT_NE(pred_get, nullptr);
- ASSERT_TRUE(pred_get->GetDefaultValue()->IsPhi()) << pred_get->DumpWithArgs();
- EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(0), c11);
- EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(1), graph_->GetIntConstant(0));
- ASSERT_TRUE(pred_get->GetTarget()->IsPhi()) << pred_get->DumpWithArgs();
- EXPECT_INS_EQ(pred_get->GetTarget()->InputAt(0), graph_->GetNullConstant());
- HNewInstance* mat = FindSingleInstruction<HNewInstance>(graph_, right->GetSinglePredecessor());
- ASSERT_NE(mat, nullptr);
- EXPECT_INS_EQ(pred_get->GetTarget()->InputAt(1), mat);
-}
-
-enum class UsesOrder { kDefaultOrder, kReverseOrder };
-std::ostream& operator<<(std::ostream& os, const UsesOrder& ord) {
- switch (ord) {
- case UsesOrder::kDefaultOrder:
- return os << "DefaultOrder";
- case UsesOrder::kReverseOrder:
- return os << "ReverseOrder";
- }
-}
-
-class UsesOrderDependentTestGroup
- : public LoadStoreEliminationTestBase<CommonCompilerTestWithParam<UsesOrder>> {};
-
-// Make sure that we record replacements by predicated loads and use them
-// instead of constructing Phis with inputs removed from the graph. Bug: 183897743
-// Note that the bug was hit only for a certain ordering of the NewInstance
-// uses, so we test both orderings.
-// // ENTRY
-// obj = new Obj();
-// obj.foo = 11;
-// if (param1) {
-// // LEFT1
-// escape(obj);
-// } else {
-// // RIGHT1
-// }
-// // MIDDLE
-// a = obj.foo;
-// if (param2) {
-// // LEFT2
-// obj.foo = 33;
-// } else {
-// // RIGHT2
-// }
-// // BRETURN
-// no_escape() // If `obj` escaped, the field value can change. (Avoid non-partial LSE.)
-// b = obj.foo;
-// return a + b;
-TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements1) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left1"},
- {"entry", "right1"},
- {"left1", "middle"},
- {"right1", "middle"},
- {"middle", "left2"},
- {"middle", "right2"},
- {"left2", "breturn"},
- {"right2", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(left1);
- GET_BLOCK(right1);
- GET_BLOCK(middle);
- GET_BLOCK(left2);
- GET_BLOCK(right2);
- GET_BLOCK(breturn);
- GET_BLOCK(exit);
-#undef GET_BLOCK
- EnsurePredecessorOrder(middle, {left1, right1});
- EnsurePredecessorOrder(breturn, {left2, right2});
- HInstruction* c0 = graph_->GetIntConstant(0);
- HInstruction* cnull = graph_->GetNullConstant();
- HInstruction* c11 = graph_->GetIntConstant(11);
- HInstruction* c33 = graph_->GetIntConstant(33);
- HInstruction* param1 = MakeParam(DataType::Type::kBool);
- HInstruction* param2 = MakeParam(DataType::Type::kBool);
-
- HInstruction* suspend = new (GetAllocator()) HSuspendCheck();
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* entry_write = MakeIFieldSet(new_inst, c11, MemberOffset(32));
- HInstruction* entry_if = new (GetAllocator()) HIf(param1);
- entry->AddInstruction(suspend);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(entry_write);
- entry->AddInstruction(entry_if);
- ManuallyBuildEnvFor(suspend, {});
- ManuallyBuildEnvFor(cls, {});
- ManuallyBuildEnvFor(new_inst, {});
-
- HInstruction* left1_call = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* left1_goto = new (GetAllocator()) HGoto();
- left1->AddInstruction(left1_call);
- left1->AddInstruction(left1_goto);
- ManuallyBuildEnvFor(left1_call, {});
-
- HInstruction* right1_goto = new (GetAllocator()) HGoto();
- right1->AddInstruction(right1_goto);
-
- HInstruction* middle_read = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* middle_if = new (GetAllocator()) HIf(param2);
- if (GetParam() == UsesOrder::kDefaultOrder) {
- middle->AddInstruction(middle_read);
- }
- middle->AddInstruction(middle_if);
-
- HInstanceFieldSet* left2_write = MakeIFieldSet(new_inst, c33, MemberOffset(32));
- HInstruction* left2_goto = new (GetAllocator()) HGoto();
- left2->AddInstruction(left2_write);
- left2->AddInstruction(left2_goto);
-
- HInstruction* right2_goto = new (GetAllocator()) HGoto();
- right2->AddInstruction(right2_goto);
-
- HInstruction* breturn_call = MakeInvoke(DataType::Type::kVoid, {});
- HInstruction* breturn_read = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* breturn_add =
- new (GetAllocator()) HAdd(DataType::Type::kInt32, middle_read, breturn_read);
- HInstruction* breturn_return = new (GetAllocator()) HReturn(breturn_add);
- breturn->AddInstruction(breturn_call);
- breturn->AddInstruction(breturn_read);
- breturn->AddInstruction(breturn_add);
- breturn->AddInstruction(breturn_return);
- ManuallyBuildEnvFor(breturn_call, {});
-
- if (GetParam() == UsesOrder::kReverseOrder) {
- // Insert `middle_read` in the same position as for the `kDefaultOrder` case.
- // The only difference is the order of entries in `new_inst->GetUses()` which
- // is used by `HeapReferenceData::CollectReplacements()` and defines the order
- // of instructions to process for `HeapReferenceData::PredicateInstructions()`.
- middle->InsertInstructionBefore(middle_read, middle_if);
- }
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_RETAINED(cls);
- EXPECT_INS_REMOVED(new_inst);
- HNewInstance* replacement_new_inst = FindSingleInstruction<HNewInstance>(graph_);
- ASSERT_NE(replacement_new_inst, nullptr);
- EXPECT_INS_REMOVED(entry_write);
- std::vector<HInstanceFieldSet*> all_writes;
- std::tie(all_writes) = FindAllInstructions<HInstanceFieldSet>(graph_);
- ASSERT_EQ(2u, all_writes.size());
- ASSERT_NE(all_writes[0] == left2_write, all_writes[1] == left2_write);
- HInstanceFieldSet* replacement_write = all_writes[(all_writes[0] == left2_write) ? 1u : 0u];
- ASSERT_FALSE(replacement_write->GetIsPredicatedSet());
- ASSERT_INS_EQ(replacement_write->InputAt(0), replacement_new_inst);
- ASSERT_INS_EQ(replacement_write->InputAt(1), c11);
-
- EXPECT_INS_RETAINED(left1_call);
-
- EXPECT_INS_REMOVED(middle_read);
- HPredicatedInstanceFieldGet* replacement_middle_read =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, middle);
- ASSERT_NE(replacement_middle_read, nullptr);
- ASSERT_TRUE(replacement_middle_read->GetTarget()->IsPhi());
- ASSERT_EQ(2u, replacement_middle_read->GetTarget()->AsPhi()->InputCount());
- ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(0), replacement_new_inst);
- ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(1), cnull);
- ASSERT_TRUE(replacement_middle_read->GetDefaultValue()->IsPhi());
- ASSERT_EQ(2u, replacement_middle_read->GetDefaultValue()->AsPhi()->InputCount());
- ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(0), c0);
- ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(1), c11);
-
- EXPECT_INS_RETAINED(left2_write);
- ASSERT_TRUE(left2_write->GetIsPredicatedSet());
-
- EXPECT_INS_REMOVED(breturn_read);
- HPredicatedInstanceFieldGet* replacement_breturn_read =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- ASSERT_NE(replacement_breturn_read, nullptr);
- ASSERT_INS_EQ(replacement_breturn_read->GetTarget(), replacement_middle_read->GetTarget());
- ASSERT_TRUE(replacement_breturn_read->GetDefaultValue()->IsPhi());
- ASSERT_EQ(2u, replacement_breturn_read->GetDefaultValue()->AsPhi()->InputCount());
- ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(0), c33);
- HInstruction* other_input = replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(1);
- ASSERT_NE(other_input->GetBlock(), nullptr) << GetParam();
- ASSERT_INS_EQ(other_input, replacement_middle_read);
-}
-
-// Regression test for a bad DCHECK() found while trying to write a test for b/188188275.
-// // ENTRY
-// obj = new Obj();
-// obj.foo = 11;
-// if (param1) {
-// // LEFT1
-// escape(obj);
-// } else {
-// // RIGHT1
-// }
-// // MIDDLE
-// a = obj.foo;
-// if (param2) {
-// // LEFT2
-// no_escape();
-// } else {
-// // RIGHT2
-// }
-// // BRETURN
-// b = obj.foo;
-// return a + b;
-TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements2) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left1"},
- {"entry", "right1"},
- {"left1", "middle"},
- {"right1", "middle"},
- {"middle", "left2"},
- {"middle", "right2"},
- {"left2", "breturn"},
- {"right2", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(left1);
- GET_BLOCK(right1);
- GET_BLOCK(middle);
- GET_BLOCK(left2);
- GET_BLOCK(right2);
- GET_BLOCK(breturn);
- GET_BLOCK(exit);
-#undef GET_BLOCK
- EnsurePredecessorOrder(middle, {left1, right1});
- EnsurePredecessorOrder(breturn, {left2, right2});
- HInstruction* c0 = graph_->GetIntConstant(0);
- HInstruction* cnull = graph_->GetNullConstant();
- HInstruction* c11 = graph_->GetIntConstant(11);
- HInstruction* param1 = MakeParam(DataType::Type::kBool);
- HInstruction* param2 = MakeParam(DataType::Type::kBool);
-
- HInstruction* suspend = new (GetAllocator()) HSuspendCheck();
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* entry_write = MakeIFieldSet(new_inst, c11, MemberOffset(32));
- HInstruction* entry_if = new (GetAllocator()) HIf(param1);
- entry->AddInstruction(suspend);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(entry_write);
- entry->AddInstruction(entry_if);
- ManuallyBuildEnvFor(suspend, {});
- ManuallyBuildEnvFor(cls, {});
- ManuallyBuildEnvFor(new_inst, {});
-
- HInstruction* left1_call = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* left1_goto = new (GetAllocator()) HGoto();
- left1->AddInstruction(left1_call);
- left1->AddInstruction(left1_goto);
- ManuallyBuildEnvFor(left1_call, {});
-
- HInstruction* right1_goto = new (GetAllocator()) HGoto();
- right1->AddInstruction(right1_goto);
-
- HInstruction* middle_read = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* middle_if = new (GetAllocator()) HIf(param2);
- if (GetParam() == UsesOrder::kDefaultOrder) {
- middle->AddInstruction(middle_read);
- }
- middle->AddInstruction(middle_if);
-
- HInstruction* left2_call = MakeInvoke(DataType::Type::kVoid, {});
- HInstruction* left2_goto = new (GetAllocator()) HGoto();
- left2->AddInstruction(left2_call);
- left2->AddInstruction(left2_goto);
- ManuallyBuildEnvFor(left2_call, {});
-
- HInstruction* right2_goto = new (GetAllocator()) HGoto();
- right2->AddInstruction(right2_goto);
-
- HInstruction* breturn_read = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* breturn_add =
- new (GetAllocator()) HAdd(DataType::Type::kInt32, middle_read, breturn_read);
- HInstruction* breturn_return = new (GetAllocator()) HReturn(breturn_add);
- breturn->AddInstruction(breturn_read);
- breturn->AddInstruction(breturn_add);
- breturn->AddInstruction(breturn_return);
-
- if (GetParam() == UsesOrder::kReverseOrder) {
- // Insert `middle_read` in the same position as for the `kDefaultOrder` case.
- // The only difference is the order of entries in `new_inst->GetUses()` which
- // is used by `HeapReferenceData::CollectReplacements()` and defines the order
- // of instructions to process for `HeapReferenceData::PredicateInstructions()`.
- middle->InsertInstructionBefore(middle_read, middle_if);
- }
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_RETAINED(cls);
- EXPECT_INS_REMOVED(new_inst);
- HNewInstance* replacement_new_inst = FindSingleInstruction<HNewInstance>(graph_);
- ASSERT_NE(replacement_new_inst, nullptr);
- EXPECT_INS_REMOVED(entry_write);
- HInstanceFieldSet* replacement_write = FindSingleInstruction<HInstanceFieldSet>(graph_);
- ASSERT_NE(replacement_write, nullptr);
- ASSERT_FALSE(replacement_write->GetIsPredicatedSet());
- ASSERT_INS_EQ(replacement_write->InputAt(0), replacement_new_inst);
- ASSERT_INS_EQ(replacement_write->InputAt(1), c11);
-
- EXPECT_INS_RETAINED(left1_call);
-
- EXPECT_INS_REMOVED(middle_read);
- HPredicatedInstanceFieldGet* replacement_middle_read =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, middle);
- ASSERT_NE(replacement_middle_read, nullptr);
- ASSERT_TRUE(replacement_middle_read->GetTarget()->IsPhi());
- ASSERT_EQ(2u, replacement_middle_read->GetTarget()->AsPhi()->InputCount());
- ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(0), replacement_new_inst);
- ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(1), cnull);
- ASSERT_TRUE(replacement_middle_read->GetDefaultValue()->IsPhi());
- ASSERT_EQ(2u, replacement_middle_read->GetDefaultValue()->AsPhi()->InputCount());
- ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(0), c0);
- ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(1), c11);
-
- EXPECT_INS_RETAINED(left2_call);
-
- EXPECT_INS_REMOVED(breturn_read);
- HPredicatedInstanceFieldGet* replacement_breturn_read =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- ASSERT_NE(replacement_breturn_read, nullptr);
- ASSERT_INS_EQ(replacement_breturn_read->GetTarget(), replacement_middle_read->GetTarget());
- ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue(), replacement_middle_read);
-}
-
-INSTANTIATE_TEST_SUITE_P(LoadStoreEliminationTest,
- UsesOrderDependentTestGroup,
- testing::Values(UsesOrder::kDefaultOrder, UsesOrder::kReverseOrder));
-
-// The parameter is the number of times we call `std::next_permutation` (from 0 to 5)
-// so that we test all 6 permutation of three items.
-class UsesOrderDependentTestGroupForThreeItems
- : public LoadStoreEliminationTestBase<CommonCompilerTestWithParam<size_t>> {};
-
-// Make sure that after we record replacements by predicated loads, we correctly
-// use that predicated load for Phi placeholders that were previously marked as
-// replaced by the now removed unpredicated load. (The fix for bug 183897743 was
-// not good enough.) Bug: 188188275
-// // ENTRY
-// obj = new Obj();
-// obj.foo = 11;
-// if (param1) {
-// // LEFT1
-// escape(obj);
-// } else {
-// // RIGHT1
-// }
-// // MIDDLE1
-// a = obj.foo;
-// if (param2) {
-// // LEFT2
-// no_escape1();
-// } else {
-// // RIGHT2
-// }
-// // MIDDLE2
-// if (param3) {
-// // LEFT3
-// x = obj.foo;
-// no_escape2();
-// } else {
-// // RIGHT3
-// x = 0;
-// }
-// // BRETURN
-// b = obj.foo;
-// return a + b + x;
-TEST_P(UsesOrderDependentTestGroupForThreeItems, RecordPredicatedReplacements3) {
- ScopedObjectAccess soa(Thread::Current());
- VariableSizedHandleScope vshs(soa.Self());
- CreateGraph(&vshs);
- AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
- "exit",
- {{"entry", "left1"},
- {"entry", "right1"},
- {"left1", "middle1"},
- {"right1", "middle1"},
- {"middle1", "left2"},
- {"middle1", "right2"},
- {"left2", "middle2"},
- {"right2", "middle2"},
- {"middle2", "left3"},
- {"middle2", "right3"},
- {"left3", "breturn"},
- {"right3", "breturn"},
- {"breturn", "exit"}}));
-#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
- GET_BLOCK(left1);
- GET_BLOCK(right1);
- GET_BLOCK(middle1);
- GET_BLOCK(left2);
- GET_BLOCK(right2);
- GET_BLOCK(middle2);
- GET_BLOCK(left3);
- GET_BLOCK(right3);
- GET_BLOCK(breturn);
- GET_BLOCK(exit);
-#undef GET_BLOCK
- EnsurePredecessorOrder(middle1, {left1, right1});
- EnsurePredecessorOrder(middle2, {left2, right2});
- EnsurePredecessorOrder(breturn, {left3, right3});
- HInstruction* c0 = graph_->GetIntConstant(0);
- HInstruction* cnull = graph_->GetNullConstant();
- HInstruction* c11 = graph_->GetIntConstant(11);
- HInstruction* param1 = MakeParam(DataType::Type::kBool);
- HInstruction* param2 = MakeParam(DataType::Type::kBool);
- HInstruction* param3 = MakeParam(DataType::Type::kBool);
-
- HInstruction* suspend = new (GetAllocator()) HSuspendCheck();
- HInstruction* cls = MakeClassLoad();
- HInstruction* new_inst = MakeNewInstance(cls);
- HInstruction* entry_write = MakeIFieldSet(new_inst, c11, MemberOffset(32));
- HInstruction* entry_if = new (GetAllocator()) HIf(param1);
- entry->AddInstruction(suspend);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(entry_write);
- entry->AddInstruction(entry_if);
- ManuallyBuildEnvFor(suspend, {});
- ManuallyBuildEnvFor(cls, {});
- ManuallyBuildEnvFor(new_inst, {});
-
- HInstruction* left1_call = MakeInvoke(DataType::Type::kVoid, { new_inst });
- HInstruction* left1_goto = new (GetAllocator()) HGoto();
- left1->AddInstruction(left1_call);
- left1->AddInstruction(left1_goto);
- ManuallyBuildEnvFor(left1_call, {});
-
- HInstruction* right1_goto = new (GetAllocator()) HGoto();
- right1->AddInstruction(right1_goto);
-
- HInstruction* middle1_read = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* middle1_if = new (GetAllocator()) HIf(param2);
- // Delay inserting `middle1_read`, do that later with ordering based on `GetParam()`.
- middle1->AddInstruction(middle1_if);
-
- HInstruction* left2_call = MakeInvoke(DataType::Type::kVoid, {});
- HInstruction* left2_goto = new (GetAllocator()) HGoto();
- left2->AddInstruction(left2_call);
- left2->AddInstruction(left2_goto);
- ManuallyBuildEnvFor(left2_call, {});
-
- HInstruction* right2_goto = new (GetAllocator()) HGoto();
- right2->AddInstruction(right2_goto);
-
- HInstruction* middle2_if = new (GetAllocator()) HIf(param3);
- middle2->AddInstruction(middle2_if);
-
- HInstruction* left3_read = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* left3_call = MakeInvoke(DataType::Type::kVoid, {});
- HInstruction* left3_goto = new (GetAllocator()) HGoto();
- // Delay inserting `left3_read`, do that later with ordering based on `GetParam()`.
- left3->AddInstruction(left3_call);
- left3->AddInstruction(left3_goto);
- ManuallyBuildEnvFor(left3_call, {});
-
- HInstruction* right3_goto = new (GetAllocator()) HGoto();
- right3->AddInstruction(right3_goto);
-
- HPhi* breturn_phi = MakePhi({left3_read, c0});
- HInstruction* breturn_read = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32));
- HInstruction* breturn_add1 =
- new (GetAllocator()) HAdd(DataType::Type::kInt32, middle1_read, breturn_read);
- HInstruction* breturn_add2 =
- new (GetAllocator()) HAdd(DataType::Type::kInt32, breturn_add1, breturn_phi);
- HInstruction* breturn_return = new (GetAllocator()) HReturn(breturn_add2);
- breturn->AddPhi(breturn_phi);
- // Delay inserting `breturn_read`, do that later with ordering based on `GetParam()`.
- breturn->AddInstruction(breturn_add1);
- breturn->AddInstruction(breturn_add2);
- breturn->AddInstruction(breturn_return);
-
- // Insert reads in the same positions but in different insertion orders.
- // The only difference is the order of entries in `new_inst->GetUses()` which
- // is used by `HeapReferenceData::CollectReplacements()` and defines the order
- // of instructions to process for `HeapReferenceData::PredicateInstructions()`.
- std::tuple<size_t, HInstruction*, HInstruction*> read_insertions[] = {
- { 0u, middle1_read, middle1_if },
- { 1u, left3_read, left3_call },
- { 2u, breturn_read, breturn_add1 },
- };
- for (size_t i = 0, num = GetParam(); i != num; ++i) {
- std::next_permutation(read_insertions, read_insertions + std::size(read_insertions));
- }
- for (auto [order, read, cursor] : read_insertions) {
- cursor->GetBlock()->InsertInstructionBefore(read, cursor);
- }
-
- SetupExit(exit);
-
- PerformLSEWithPartial(blks);
-
- EXPECT_INS_RETAINED(cls);
- EXPECT_INS_REMOVED(new_inst);
- HNewInstance* replacement_new_inst = FindSingleInstruction<HNewInstance>(graph_);
- ASSERT_NE(replacement_new_inst, nullptr);
- EXPECT_INS_REMOVED(entry_write);
- HInstanceFieldSet* replacement_write = FindSingleInstruction<HInstanceFieldSet>(graph_);
- ASSERT_NE(replacement_write, nullptr);
- ASSERT_FALSE(replacement_write->GetIsPredicatedSet());
- ASSERT_INS_EQ(replacement_write->InputAt(0), replacement_new_inst);
- ASSERT_INS_EQ(replacement_write->InputAt(1), c11);
-
- EXPECT_INS_RETAINED(left1_call);
-
- EXPECT_INS_REMOVED(middle1_read);
- HPredicatedInstanceFieldGet* replacement_middle1_read =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, middle1);
- ASSERT_NE(replacement_middle1_read, nullptr);
- ASSERT_TRUE(replacement_middle1_read->GetTarget()->IsPhi());
- ASSERT_EQ(2u, replacement_middle1_read->GetTarget()->AsPhi()->InputCount());
- ASSERT_INS_EQ(replacement_middle1_read->GetTarget()->AsPhi()->InputAt(0), replacement_new_inst);
- ASSERT_INS_EQ(replacement_middle1_read->GetTarget()->AsPhi()->InputAt(1), cnull);
- ASSERT_TRUE(replacement_middle1_read->GetDefaultValue()->IsPhi());
- ASSERT_EQ(2u, replacement_middle1_read->GetDefaultValue()->AsPhi()->InputCount());
- ASSERT_INS_EQ(replacement_middle1_read->GetDefaultValue()->AsPhi()->InputAt(0), c0);
- ASSERT_INS_EQ(replacement_middle1_read->GetDefaultValue()->AsPhi()->InputAt(1), c11);
-
- EXPECT_INS_RETAINED(left2_call);
-
- EXPECT_INS_REMOVED(left3_read);
- HPredicatedInstanceFieldGet* replacement_left3_read =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, left3);
- ASSERT_NE(replacement_left3_read, nullptr);
- ASSERT_TRUE(replacement_left3_read->GetTarget()->IsPhi());
- ASSERT_INS_EQ(replacement_left3_read->GetTarget(), replacement_middle1_read->GetTarget());
- ASSERT_INS_EQ(replacement_left3_read->GetDefaultValue(), replacement_middle1_read);
- EXPECT_INS_RETAINED(left3_call);
-
- EXPECT_INS_RETAINED(breturn_phi);
- EXPECT_INS_REMOVED(breturn_read);
- HPredicatedInstanceFieldGet* replacement_breturn_read =
- FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
- ASSERT_NE(replacement_breturn_read, nullptr);
- ASSERT_INS_EQ(replacement_breturn_read->GetTarget(), replacement_middle1_read->GetTarget());
- ASSERT_EQ(2u, replacement_breturn_read->GetDefaultValue()->AsPhi()->InputCount());
- ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(0),
- replacement_left3_read);
- ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(1),
- replacement_middle1_read);
- EXPECT_INS_RETAINED(breturn_add1);
- ASSERT_INS_EQ(breturn_add1->InputAt(0), replacement_middle1_read);
- ASSERT_INS_EQ(breturn_add1->InputAt(1), replacement_breturn_read);
- EXPECT_INS_RETAINED(breturn_add2);
- ASSERT_INS_EQ(breturn_add2->InputAt(0), breturn_add1);
- ASSERT_INS_EQ(breturn_add2->InputAt(1), breturn_phi);
- EXPECT_INS_RETAINED(breturn_return);
-}
-
-INSTANTIATE_TEST_SUITE_P(LoadStoreEliminationTest,
- UsesOrderDependentTestGroupForThreeItems,
- testing::Values(0u, 1u, 2u, 3u, 4u, 5u));
-
} // namespace art
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index f40b7f4f0c..4189bc4053 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -62,7 +62,7 @@ Location Location::RegisterOrConstant(HInstruction* instruction) {
}
Location Location::RegisterOrInt32Constant(HInstruction* instruction) {
- HConstant* constant = instruction->AsConstant();
+ HConstant* constant = instruction->AsConstantOrNull();
if (constant != nullptr) {
int64_t value = CodeGenerator::GetInt64ValueOf(constant);
if (IsInt<32>(value)) {
@@ -73,7 +73,7 @@ Location Location::RegisterOrInt32Constant(HInstruction* instruction) {
}
Location Location::FpuRegisterOrInt32Constant(HInstruction* instruction) {
- HConstant* constant = instruction->AsConstant();
+ HConstant* constant = instruction->AsConstantOrNull();
if (constant != nullptr) {
int64_t value = CodeGenerator::GetInt64ValueOf(constant);
if (IsInt<32>(value)) {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 7ee076f442..20099ebbc2 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -79,7 +79,7 @@ class Location : public ValueObject {
kUnallocated = 11,
};
- Location() : ValueObject(), value_(kInvalid) {
+ constexpr Location() : ValueObject(), value_(kInvalid) {
// Verify that non-constant location kinds do not interfere with kConstant.
static_assert((kInvalid & kLocationConstantMask) != kConstant, "TagError");
static_assert((kUnallocated & kLocationConstantMask) != kConstant, "TagError");
@@ -95,7 +95,7 @@ class Location : public ValueObject {
DCHECK(!IsValid());
}
- Location(const Location& other) = default;
+ constexpr Location(const Location& other) = default;
Location& operator=(const Location& other) = default;
@@ -126,24 +126,24 @@ class Location : public ValueObject {
}
// Empty location. Used if there the location should be ignored.
- static Location NoLocation() {
+ static constexpr Location NoLocation() {
return Location();
}
// Register locations.
- static Location RegisterLocation(int reg) {
+ static constexpr Location RegisterLocation(int reg) {
return Location(kRegister, reg);
}
- static Location FpuRegisterLocation(int reg) {
+ static constexpr Location FpuRegisterLocation(int reg) {
return Location(kFpuRegister, reg);
}
- static Location RegisterPairLocation(int low, int high) {
+ static constexpr Location RegisterPairLocation(int low, int high) {
return Location(kRegisterPair, low << 16 | high);
}
- static Location FpuRegisterPairLocation(int low, int high) {
+ static constexpr Location FpuRegisterPairLocation(int low, int high) {
return Location(kFpuRegisterPair, low << 16 | high);
}
@@ -423,7 +423,7 @@ class Location : public ValueObject {
explicit Location(uintptr_t value) : value_(value) {}
- Location(Kind kind, uintptr_t payload)
+ constexpr Location(Kind kind, uintptr_t payload)
: value_(KindField::Encode(kind) | PayloadField::Encode(payload)) {}
uintptr_t GetPayload() const {
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
index 95e81533da..b3f9e835de 100644
--- a/compiler/optimizing/loop_analysis.cc
+++ b/compiler/optimizing/loop_analysis.cc
@@ -42,7 +42,7 @@ void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
// not cause loop peeling to happen as they either cannot be inside a loop, or by
// definition cannot be loop exits (unconditional instructions), or are not beneficial for
// the optimization.
- HIf* hif = block->GetLastInstruction()->AsIf();
+ HIf* hif = block->GetLastInstruction()->AsIfOrNull();
if (hif != nullptr && !loop_info->Contains(*hif->InputAt(0)->GetBlock())) {
analysis_results->invariant_exits_num_++;
}
@@ -221,9 +221,6 @@ class X86_64LoopHelper : public ArchDefaultLoopHelper {
return 3;
case HInstruction::InstructionKind::kIf:
return 2;
- case HInstruction::InstructionKind::kPredicatedInstanceFieldGet:
- // test + cond-jump + IFieldGet
- return 4;
case HInstruction::InstructionKind::kInstanceFieldGet:
return 2;
case HInstruction::InstructionKind::kInstanceFieldSet:
@@ -259,7 +256,7 @@ class X86_64LoopHelper : public ArchDefaultLoopHelper {
case HInstruction::InstructionKind::kVecReplicateScalar:
return 2;
case HInstruction::InstructionKind::kVecExtractScalar:
- return 1;
+ return 1;
case HInstruction::InstructionKind::kVecReduce:
return 4;
case HInstruction::InstructionKind::kVecNeg:
diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h
index cec00fecf4..cd8f00588d 100644
--- a/compiler/optimizing/loop_analysis.h
+++ b/compiler/optimizing/loop_analysis.h
@@ -148,13 +148,15 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> {
//
// Returns 'true' by default, should be overridden by particular target loop helper.
virtual bool IsLoopNonBeneficialForScalarOpts(
- LoopAnalysisInfo* loop_analysis_info ATTRIBUTE_UNUSED) const { return true; }
+ [[maybe_unused]] LoopAnalysisInfo* loop_analysis_info) const {
+ return true;
+ }
// Returns optimal scalar unrolling factor for the loop.
//
// Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
virtual uint32_t GetScalarUnrollingFactor(
- const LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const {
+ [[maybe_unused]] const LoopAnalysisInfo* analysis_info) const {
return LoopAnalysisInfo::kNoUnrollingFactor;
}
@@ -166,17 +168,17 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> {
// Returns whether it is beneficial to fully unroll the loop.
//
// Returns 'false' by default, should be overridden by particular target loop helper.
- virtual bool IsFullUnrollingBeneficial(LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const {
+ virtual bool IsFullUnrollingBeneficial([[maybe_unused]] LoopAnalysisInfo* analysis_info) const {
return false;
}
// Returns optimal SIMD unrolling factor for the loop.
//
// Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
- virtual uint32_t GetSIMDUnrollingFactor(HBasicBlock* block ATTRIBUTE_UNUSED,
- int64_t trip_count ATTRIBUTE_UNUSED,
- uint32_t max_peel ATTRIBUTE_UNUSED,
- uint32_t vector_length ATTRIBUTE_UNUSED) const {
+ virtual uint32_t GetSIMDUnrollingFactor([[maybe_unused]] HBasicBlock* block,
+ [[maybe_unused]] int64_t trip_count,
+ [[maybe_unused]] uint32_t max_peel,
+ [[maybe_unused]] uint32_t vector_length) const {
return LoopAnalysisInfo::kNoUnrollingFactor;
}
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 7a52502562..f6d69ca789 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -366,8 +366,8 @@ static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
return (restrictions & tested) != 0;
}
-// Insert an instruction.
-static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
+// Insert an instruction at the end of the block, with safe checks.
+inline HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
DCHECK(block != nullptr);
DCHECK(instruction != nullptr);
block->InsertInstructionBefore(instruction, block->GetLastInstruction());
@@ -418,7 +418,7 @@ static void TryToEvaluateIfCondition(HIf* instruction, HGraph* graph) {
++it;
if (true_succ->Dominates(user_block)) {
user->ReplaceInput(graph->GetIntConstant(1), index);
- } else if (false_succ->Dominates(user_block)) {
+ } else if (false_succ->Dominates(user_block)) {
user->ReplaceInput(graph->GetIntConstant(0), index);
}
}
@@ -453,6 +453,54 @@ static DataType::Type GetNarrowerType(HInstruction* a, HInstruction* b) {
return type;
}
+// Returns whether the loop is of a diamond structure:
+//
+// header <----------------+
+// | |
+// diamond_hif |
+// / \ |
+// diamond_true diamond_false |
+// \ / |
+// back_edge |
+// | |
+// +---------------------+
+static bool HasLoopDiamondStructure(HLoopInformation* loop_info) {
+ HBasicBlock* header = loop_info->GetHeader();
+ if (loop_info->NumberOfBackEdges() != 1 || header->GetSuccessors().size() != 2) {
+ return false;
+ }
+ HBasicBlock* header_succ_0 = header->GetSuccessors()[0];
+ HBasicBlock* header_succ_1 = header->GetSuccessors()[1];
+ HBasicBlock* diamond_top = loop_info->Contains(*header_succ_0) ?
+ header_succ_0 :
+ header_succ_1;
+ if (!diamond_top->GetLastInstruction()->IsIf()) {
+ return false;
+ }
+
+ HIf* diamond_hif = diamond_top->GetLastInstruction()->AsIf();
+ HBasicBlock* diamond_true = diamond_hif->IfTrueSuccessor();
+ HBasicBlock* diamond_false = diamond_hif->IfFalseSuccessor();
+
+ if (diamond_true->GetSuccessors().size() != 1 || diamond_false->GetSuccessors().size() != 1) {
+ return false;
+ }
+
+ HBasicBlock* back_edge = diamond_true->GetSingleSuccessor();
+ if (back_edge != diamond_false->GetSingleSuccessor() ||
+ back_edge != loop_info->GetBackEdges()[0]) {
+ return false;
+ }
+
+ DCHECK_EQ(loop_info->GetBlocks().NumSetBits(), 5u);
+ return true;
+}
+
+static bool IsPredicatedLoopControlFlowSupported(HLoopInformation* loop_info) {
+ size_t num_of_blocks = loop_info->GetBlocks().NumSetBits();
+ return num_of_blocks == 2 || HasLoopDiamondStructure(loop_info);
+}
+
//
// Public methods.
//
@@ -482,6 +530,8 @@ HLoopOptimization::HLoopOptimization(HGraph* graph,
vector_runtime_test_b_(nullptr),
vector_map_(nullptr),
vector_permanent_map_(nullptr),
+ vector_external_set_(nullptr),
+ predicate_info_map_(nullptr),
vector_mode_(kSequential),
vector_preheader_(nullptr),
vector_header_(nullptr),
@@ -542,12 +592,17 @@ bool HLoopOptimization::LocalRun() {
std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
ScopedArenaSafeMap<HInstruction*, HInstruction*> perm(
std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+ ScopedArenaSet<HInstruction*> ext_set(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+ ScopedArenaSafeMap<HBasicBlock*, BlockPredicateInfo*> pred(
+ std::less<HBasicBlock*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
// Attach.
iset_ = &iset;
reductions_ = &reds;
vector_refs_ = &refs;
vector_map_ = &map;
vector_permanent_map_ = &perm;
+ vector_external_set_ = &ext_set;
+ predicate_info_map_ = &pred;
// Traverse.
const bool did_loop_opt = TraverseLoopsInnerToOuter(top_loop_);
// Detach.
@@ -556,6 +611,9 @@ bool HLoopOptimization::LocalRun() {
vector_refs_ = nullptr;
vector_map_ = nullptr;
vector_permanent_map_ = nullptr;
+ vector_external_set_ = nullptr;
+ predicate_info_map_ = nullptr;
+
return did_loop_opt;
}
@@ -787,6 +845,37 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
}
}
+// Checks whether the loop has exit structure suitable for InnerLoopFinite optimization:
+// - has single loop exit.
+// - the exit block has only single predecessor - a block inside the loop.
+//
+// In that case returns single exit basic block (outside the loop); otherwise nullptr.
+static HBasicBlock* GetInnerLoopFiniteSingleExit(HLoopInformation* loop_info) {
+ HBasicBlock* exit = nullptr;
+ for (HBlocksInLoopIterator block_it(*loop_info);
+ !block_it.Done();
+ block_it.Advance()) {
+ HBasicBlock* block = block_it.Current();
+
+ // Check whether one of the successor is loop exit.
+ for (HBasicBlock* successor : block->GetSuccessors()) {
+ if (!loop_info->Contains(*successor)) {
+ if (exit != nullptr) {
+ // The loop has more than one exit.
+ return nullptr;
+ }
+ exit = successor;
+
+ // Ensure exit can only be reached by exiting loop.
+ if (successor->GetPredecessors().size() != 1) {
+ return nullptr;
+ }
+ }
+ }
+ }
+ return exit;
+}
+
bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
@@ -795,33 +884,22 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
if (!induction_range_.IsFinite(node->loop_info, &trip_count)) {
return false;
}
- // Ensure there is only a single loop-body (besides the header).
- HBasicBlock* body = nullptr;
- for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
- if (it.Current() != header) {
- if (body != nullptr) {
- return false;
- }
- body = it.Current();
- }
- }
- CHECK(body != nullptr);
- // Ensure there is only a single exit point.
- if (header->GetSuccessors().size() != 2) {
- return false;
- }
- HBasicBlock* exit = (header->GetSuccessors()[0] == body)
- ? header->GetSuccessors()[1]
- : header->GetSuccessors()[0];
- // Ensure exit can only be reached by exiting loop.
- if (exit->GetPredecessors().size() != 1) {
+ // Check loop exits.
+ HBasicBlock* exit = GetInnerLoopFiniteSingleExit(node->loop_info);
+ if (exit == nullptr) {
return false;
}
+
+ HBasicBlock* body = (header->GetSuccessors()[0] == exit)
+ ? header->GetSuccessors()[1]
+ : header->GetSuccessors()[0];
// Detect either an empty loop (no side effects other than plain iteration) or
// a trivial loop (just iterating once). Replace subsequent index uses, if any,
// with the last value and remove the loop, possibly after unrolling its body.
HPhi* main_phi = nullptr;
- if (TrySetSimpleLoopHeader(header, &main_phi)) {
+ size_t num_of_blocks = header->GetLoopInformation()->GetBlocks().NumSetBits();
+
+ if (num_of_blocks == 2 && TrySetSimpleLoopHeader(header, &main_phi)) {
bool is_empty = IsEmptyBody(body);
if (reductions_->empty() && // TODO: possible with some effort
(is_empty || trip_count == 1) &&
@@ -845,21 +923,61 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
}
}
// Vectorize loop, if possible and valid.
- if (kEnableVectorization &&
+ if (!kEnableVectorization ||
// Disable vectorization for debuggable graphs: this is a workaround for the bug
// in 'GenerateNewLoop' which caused the SuspendCheck environment to be invalid.
// TODO: b/138601207, investigate other possible cases with wrong environment values and
// possibly switch back vectorization on for debuggable graphs.
- !graph_->IsDebuggable() &&
- TrySetSimpleLoopHeader(header, &main_phi) &&
- ShouldVectorize(node, body, trip_count) &&
- TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
- Vectorize(node, body, exit, trip_count);
- graph_->SetHasSIMD(true); // flag SIMD usage
- MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized);
- return true;
+ graph_->IsDebuggable()) {
+ return false;
+ }
+
+ if (IsInPredicatedVectorizationMode()) {
+ return TryVectorizePredicated(node, body, exit, main_phi, trip_count);
+ } else {
+ return TryVectorizedTraditional(node, body, exit, main_phi, trip_count);
}
- return false;
+}
+
+bool HLoopOptimization::TryVectorizePredicated(LoopNode* node,
+ HBasicBlock* body,
+ HBasicBlock* exit,
+ HPhi* main_phi,
+ int64_t trip_count) {
+ if (!IsPredicatedLoopControlFlowSupported(node->loop_info) ||
+ !ShouldVectorizeCommon(node, main_phi, trip_count)) {
+ return false;
+ }
+
+ // Currently we can only generate cleanup loops for loops with 2 basic block.
+ //
+ // TODO: Support array disambiguation tests for CF loops.
+ if (NeedsArrayRefsDisambiguationTest() &&
+ node->loop_info->GetBlocks().NumSetBits() != 2) {
+ return false;
+ }
+
+ VectorizePredicated(node, body, exit);
+ MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized);
+ graph_->SetHasPredicatedSIMD(true); // flag SIMD usage
+ return true;
+}
+
+bool HLoopOptimization::TryVectorizedTraditional(LoopNode* node,
+ HBasicBlock* body,
+ HBasicBlock* exit,
+ HPhi* main_phi,
+ int64_t trip_count) {
+ HBasicBlock* header = node->loop_info->GetHeader();
+ size_t num_of_blocks = header->GetLoopInformation()->GetBlocks().NumSetBits();
+
+ if (num_of_blocks != 2 || !ShouldVectorizeCommon(node, main_phi, trip_count)) {
+ return false;
+ }
+ VectorizeTraditional(node, body, exit, trip_count);
+ MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized);
+ graph_->SetHasTraditionalSIMD(true); // flag SIMD usage
+ return true;
}
bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
@@ -1006,7 +1124,10 @@ bool HLoopOptimization::TryLoopScalarOpts(LoopNode* node) {
// Intel Press, June, 2004 (http://www.aartbik.com/).
//
-bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count) {
+
+bool HLoopOptimization::CanVectorizeDataFlow(LoopNode* node,
+ HBasicBlock* header,
+ bool collect_alignment_info) {
// Reset vector bookkeeping.
vector_length_ = 0;
vector_refs_->clear();
@@ -1015,16 +1136,30 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6
vector_runtime_test_a_ =
vector_runtime_test_b_ = nullptr;
- // Phis in the loop-body prevent vectorization.
- if (!block->GetPhis().IsEmpty()) {
- return false;
- }
+ // Traverse the data flow of the loop, in the original program order.
+ for (HBlocksInLoopReversePostOrderIterator block_it(*header->GetLoopInformation());
+ !block_it.Done();
+ block_it.Advance()) {
+ HBasicBlock* block = block_it.Current();
- // Scan the loop-body, starting a right-hand-side tree traversal at each left-hand-side
- // occurrence, which allows passing down attributes down the use tree.
- for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
- if (!VectorizeDef(node, it.Current(), /*generate_code*/ false)) {
- return false; // failure to vectorize a left-hand-side
+ if (block == header) {
+ // The header is of a certain structure (TrySetSimpleLoopHeader) and doesn't need to be
+ // processed here.
+ continue;
+ }
+
+ // Phis in the loop-body prevent vectorization.
+ // TODO: Enable vectorization of CF loops with Phis.
+ if (!block->GetPhis().IsEmpty()) {
+ return false;
+ }
+
+ // Scan the loop-body instructions, starting a right-hand-side tree traversal at each
+ // left-hand-side occurrence, which allows passing down attributes down the use tree.
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ if (!VectorizeDef(node, it.Current(), /*generate_code*/ false)) {
+ return false; // failure to vectorize a left-hand-side
+ }
}
}
@@ -1111,24 +1246,123 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6
}
} // for i
- if (!IsInPredicatedVectorizationMode()) {
- // Find a suitable alignment strategy.
+ if (collect_alignment_info) {
+ // Update the info on alignment strategy.
SetAlignmentStrategy(peeling_votes, peeling_candidate);
}
- // Does vectorization seem profitable?
- if (!IsVectorizationProfitable(trip_count)) {
+ // Success!
+ return true;
+}
+
+bool HLoopOptimization::ShouldVectorizeCommon(LoopNode* node,
+ HPhi* main_phi,
+ int64_t trip_count) {
+ HBasicBlock* header = node->loop_info->GetHeader();
+ HBasicBlock* preheader = node->loop_info->GetPreHeader();
+
+ bool enable_alignment_strategies = !IsInPredicatedVectorizationMode();
+ if (!TrySetSimpleLoopHeader(header, &main_phi) ||
+ !CanVectorizeDataFlow(node, header, enable_alignment_strategies) ||
+ !IsVectorizationProfitable(trip_count) ||
+ !TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
return false;
}
- // Success!
return true;
}
-void HLoopOptimization::Vectorize(LoopNode* node,
- HBasicBlock* block,
- HBasicBlock* exit,
- int64_t trip_count) {
+void HLoopOptimization::VectorizePredicated(LoopNode* node,
+ HBasicBlock* block,
+ HBasicBlock* exit) {
+ DCHECK(IsInPredicatedVectorizationMode());
+
+ HBasicBlock* header = node->loop_info->GetHeader();
+ HBasicBlock* preheader = node->loop_info->GetPreHeader();
+
+ // Adjust vector bookkeeping.
+ HPhi* main_phi = nullptr;
+ bool is_simple_loop_header = TrySetSimpleLoopHeader(header, &main_phi); // refills sets
+ DCHECK(is_simple_loop_header);
+ vector_header_ = header;
+ vector_body_ = block;
+
+ // Loop induction type.
+ DataType::Type induc_type = main_phi->GetType();
+ DCHECK(induc_type == DataType::Type::kInt32 || induc_type == DataType::Type::kInt64)
+ << induc_type;
+
+ // Generate loop control:
+ // stc = <trip-count>;
+ // vtc = <vector trip-count>
+ HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader);
+ HInstruction* vtc = stc;
+ vector_index_ = graph_->GetConstant(induc_type, 0);
+ bool needs_disambiguation_test = false;
+ // Generate runtime disambiguation test:
+ // vtc = a != b ? vtc : 0;
+ if (NeedsArrayRefsDisambiguationTest()) {
+ HInstruction* rt = Insert(
+ preheader,
+ new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_));
+ vtc = Insert(preheader,
+ new (global_allocator_)
+ HSelect(rt, vtc, graph_->GetConstant(induc_type, 0), kNoDexPc));
+ needs_disambiguation_test = true;
+ }
+
+ // Generate vector loop:
+ // for ( ; i < vtc; i += vector_length)
+ // <vectorized-loop-body>
+ HBasicBlock* preheader_for_vector_loop =
+ graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit);
+ vector_mode_ = kVector;
+ GenerateNewLoopPredicated(node,
+ preheader_for_vector_loop,
+ vector_index_,
+ vtc,
+ graph_->GetConstant(induc_type, vector_length_));
+
+ // Generate scalar loop, if needed:
+ // for ( ; i < stc; i += 1)
+ // <loop-body>
+ if (needs_disambiguation_test) {
+ vector_mode_ = kSequential;
+ HBasicBlock* preheader_for_cleanup_loop =
+ graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit);
+ // Use "Traditional" version for the sequential loop.
+ GenerateNewLoopScalarOrTraditional(node,
+ preheader_for_cleanup_loop,
+ vector_index_,
+ stc,
+ graph_->GetConstant(induc_type, 1),
+ LoopAnalysisInfo::kNoUnrollingFactor);
+ }
+
+ FinalizeVectorization(node);
+
+ // Assign governing predicates for the predicated instructions inserted during vectorization
+ // outside the loop.
+ for (auto it : *vector_external_set_) {
+ DCHECK(it->IsVecOperation());
+ HVecOperation* vec_op = it->AsVecOperation();
+
+ HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
+ graph_->GetIntConstant(1),
+ vec_op->GetPackedType(),
+ vec_op->GetVectorLength(),
+ 0u);
+ vec_op->GetBlock()->InsertInstructionBefore(set_pred, vec_op);
+ vec_op->SetMergingGoverningPredicate(set_pred);
+ }
+}
+
+void HLoopOptimization::VectorizeTraditional(LoopNode* node,
+ HBasicBlock* block,
+ HBasicBlock* exit,
+ int64_t trip_count) {
+ DCHECK(!IsInPredicatedVectorizationMode());
+
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
@@ -1141,7 +1375,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
// A cleanup loop is needed, at least, for any unknown trip count or
// for a known trip count with remainder iterations after vectorization.
- bool needs_cleanup = !IsInPredicatedVectorizationMode() &&
+ bool needs_cleanup =
(trip_count == 0 || ((trip_count - vector_static_peeling_factor_) % chunk) != 0);
// Adjust vector bookkeeping.
@@ -1160,13 +1394,11 @@ void HLoopOptimization::Vectorize(LoopNode* node,
// ptc = <peeling factor>;
HInstruction* ptc = nullptr;
if (vector_static_peeling_factor_ != 0) {
- DCHECK(!IsInPredicatedVectorizationMode());
// Static loop peeling for SIMD alignment (using the most suitable
// fixed peeling factor found during prior alignment analysis).
DCHECK(vector_dynamic_peeling_candidate_ == nullptr);
ptc = graph_->GetConstant(induc_type, vector_static_peeling_factor_);
} else if (vector_dynamic_peeling_candidate_ != nullptr) {
- DCHECK(!IsInPredicatedVectorizationMode());
// Dynamic loop peeling for SIMD alignment (using the most suitable
// candidate found during prior alignment analysis):
// rem = offset % ALIGN; // adjusted as #elements
@@ -1197,7 +1429,6 @@ void HLoopOptimization::Vectorize(LoopNode* node,
HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader);
HInstruction* vtc = stc;
if (needs_cleanup) {
- DCHECK(!IsInPredicatedVectorizationMode());
DCHECK(IsPowerOfTwo(chunk));
HInstruction* diff = stc;
if (ptc != nullptr) {
@@ -1217,7 +1448,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
// Generate runtime disambiguation test:
// vtc = a != b ? vtc : 0;
- if (vector_runtime_test_a_ != nullptr) {
+ if (NeedsArrayRefsDisambiguationTest()) {
HInstruction* rt = Insert(
preheader,
new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_));
@@ -1235,45 +1466,52 @@ void HLoopOptimization::Vectorize(LoopNode* node,
// moved around during suspend checks, since all analysis was based on
// nothing more than the Android runtime alignment conventions.
if (ptc != nullptr) {
- DCHECK(!IsInPredicatedVectorizationMode());
vector_mode_ = kSequential;
- GenerateNewLoop(node,
- block,
- graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
- vector_index_,
- ptc,
- graph_->GetConstant(induc_type, 1),
- LoopAnalysisInfo::kNoUnrollingFactor);
+ HBasicBlock* preheader_for_peeling_loop =
+ graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit);
+ GenerateNewLoopScalarOrTraditional(node,
+ preheader_for_peeling_loop,
+ vector_index_,
+ ptc,
+ graph_->GetConstant(induc_type, 1),
+ LoopAnalysisInfo::kNoUnrollingFactor);
}
// Generate vector loop, possibly further unrolled:
// for ( ; i < vtc; i += chunk)
// <vectorized-loop-body>
vector_mode_ = kVector;
- GenerateNewLoop(node,
- block,
- graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
- vector_index_,
- vtc,
- graph_->GetConstant(induc_type, vector_length_), // increment per unroll
- unroll);
- HLoopInformation* vloop = vector_header_->GetLoopInformation();
+ HBasicBlock* preheader_for_vector_loop =
+ graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit);
+ GenerateNewLoopScalarOrTraditional(node,
+ preheader_for_vector_loop,
+ vector_index_,
+ vtc,
+ graph_->GetConstant(induc_type, vector_length_), // per unroll
+ unroll);
// Generate cleanup loop, if needed:
// for ( ; i < stc; i += 1)
// <loop-body>
if (needs_cleanup) {
- DCHECK_IMPLIES(IsInPredicatedVectorizationMode(), vector_runtime_test_a_ != nullptr);
vector_mode_ = kSequential;
- GenerateNewLoop(node,
- block,
- graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
- vector_index_,
- stc,
- graph_->GetConstant(induc_type, 1),
- LoopAnalysisInfo::kNoUnrollingFactor);
+ HBasicBlock* preheader_for_cleanup_loop =
+ graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit);
+ GenerateNewLoopScalarOrTraditional(node,
+ preheader_for_cleanup_loop,
+ vector_index_,
+ stc,
+ graph_->GetConstant(induc_type, 1),
+ LoopAnalysisInfo::kNoUnrollingFactor);
}
+ FinalizeVectorization(node);
+}
+
+void HLoopOptimization::FinalizeVectorization(LoopNode* node) {
+ HBasicBlock* header = node->loop_info->GetHeader();
+ HBasicBlock* preheader = node->loop_info->GetPreHeader();
+ HLoopInformation* vloop = vector_header_->GetLoopInformation();
// Link reductions to their final uses.
for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
if (i->first->IsPhi()) {
@@ -1287,9 +1525,17 @@ void HLoopOptimization::Vectorize(LoopNode* node,
}
}
- // Remove the original loop by disconnecting the body block
- // and removing all instructions from the header.
- block->DisconnectAndDelete();
+ // Remove the original loop.
+ for (HBlocksInLoopPostOrderIterator it_loop(*node->loop_info);
+ !it_loop.Done();
+ it_loop.Advance()) {
+ HBasicBlock* cur_block = it_loop.Current();
+ if (cur_block == node->loop_info->GetHeader()) {
+ continue;
+ }
+ cur_block->DisconnectAndDelete();
+ }
+
while (!header->GetFirstInstruction()->IsGoto()) {
header->RemoveInstruction(header->GetFirstInstruction());
}
@@ -1301,14 +1547,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
node->loop_info = vloop;
}
-void HLoopOptimization::GenerateNewLoop(LoopNode* node,
- HBasicBlock* block,
- HBasicBlock* new_preheader,
- HInstruction* lo,
- HInstruction* hi,
- HInstruction* step,
- uint32_t unroll) {
- DCHECK(unroll == 1 || vector_mode_ == kVector);
+HPhi* HLoopOptimization::InitializeForNewLoop(HBasicBlock* new_preheader, HInstruction* lo) {
DataType::Type induc_type = lo->GetType();
// Prepare new loop.
vector_preheader_ = new_preheader,
@@ -1318,68 +1557,160 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node,
kNoRegNumber,
0,
HPhi::ToPhiType(induc_type));
- // Generate header and prepare body.
- // for (i = lo; i < hi; i += step)
- // <loop-body>
- HInstruction* cond = nullptr;
- HInstruction* set_pred = nullptr;
- if (IsInPredicatedVectorizationMode()) {
- HVecPredWhile* pred_while =
- new (global_allocator_) HVecPredWhile(global_allocator_,
- phi,
- hi,
- HVecPredWhile::CondKind::kLO,
- DataType::Type::kInt32,
- vector_length_,
- 0u);
-
- cond = new (global_allocator_) HVecPredCondition(global_allocator_,
- pred_while,
- HVecPredCondition::PCondKind::kNFirst,
- DataType::Type::kInt32,
- vector_length_,
- 0u);
-
- vector_header_->AddPhi(phi);
- vector_header_->AddInstruction(pred_while);
- vector_header_->AddInstruction(cond);
- set_pred = pred_while;
- } else {
- cond = new (global_allocator_) HAboveOrEqual(phi, hi);
- vector_header_->AddPhi(phi);
- vector_header_->AddInstruction(cond);
- }
+ vector_header_->AddPhi(phi);
+ vector_index_ = phi;
+ vector_permanent_map_->clear();
+ vector_external_set_->clear();
+ predicate_info_map_->clear();
+
+ return phi;
+}
+void HLoopOptimization::GenerateNewLoopScalarOrTraditional(LoopNode* node,
+ HBasicBlock* new_preheader,
+ HInstruction* lo,
+ HInstruction* hi,
+ HInstruction* step,
+ uint32_t unroll) {
+ DCHECK(unroll == 1 || vector_mode_ == kVector);
+ DataType::Type induc_type = lo->GetType();
+ HPhi* phi = InitializeForNewLoop(new_preheader, lo);
+
+ // Generate loop exit check.
+ HInstruction* cond = new (global_allocator_) HAboveOrEqual(phi, hi);
+ vector_header_->AddInstruction(cond);
vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
- vector_index_ = phi;
- vector_permanent_map_->clear(); // preserved over unrolling
+
for (uint32_t u = 0; u < unroll; u++) {
- // Generate instruction map.
- vector_map_->clear();
- for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ GenerateNewLoopBodyOnce(node, induc_type, step);
+ }
+
+ FinalizePhisForNewLoop(phi, lo);
+}
+
+void HLoopOptimization::GenerateNewLoopPredicated(LoopNode* node,
+ HBasicBlock* new_preheader,
+ HInstruction* lo,
+ HInstruction* hi,
+ HInstruction* step) {
+ DCHECK(IsInPredicatedVectorizationMode());
+ DCHECK_EQ(vector_mode_, kVector);
+ DataType::Type induc_type = lo->GetType();
+ HPhi* phi = InitializeForNewLoop(new_preheader, lo);
+
+ // Generate loop exit check.
+ HVecPredWhile* pred_while =
+ new (global_allocator_) HVecPredWhile(global_allocator_,
+ phi,
+ hi,
+ HVecPredWhile::CondKind::kLO,
+ DataType::Type::kInt32,
+ vector_length_,
+ 0u);
+
+ HInstruction* cond =
+ new (global_allocator_) HVecPredToBoolean(global_allocator_,
+ pred_while,
+ HVecPredToBoolean::PCondKind::kNFirst,
+ DataType::Type::kInt32,
+ vector_length_,
+ 0u);
+
+ vector_header_->AddInstruction(pred_while);
+ vector_header_->AddInstruction(cond);
+ vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
+
+ PreparePredicateInfoMap(node);
+ GenerateNewLoopBodyOnce(node, induc_type, step);
+ InitPredicateInfoMap(node, pred_while);
+
+ // Assign governing predicates for instructions in the loop; the traversal order doesn't matter.
+ for (HBlocksInLoopIterator block_it(*node->loop_info);
+ !block_it.Done();
+ block_it.Advance()) {
+ HBasicBlock* cur_block = block_it.Current();
+
+ for (HInstructionIterator it(cur_block->GetInstructions()); !it.Done(); it.Advance()) {
+ auto i = vector_map_->find(it.Current());
+ if (i != vector_map_->end()) {
+ HInstruction* instr = i->second;
+
+ if (!instr->IsVecOperation()) {
+ continue;
+ }
+ // There are cases when a vector instruction, which corresponds to some instruction in the
+ // original scalar loop, is located not in the newly created vector loop but
+ // in the vector loop preheader (and hence recorded in vector_external_set_).
+ //
+ // Governing predicates will be set for such instructions separately.
+ bool in_vector_loop = vector_header_->GetLoopInformation()->Contains(*instr->GetBlock());
+ DCHECK_IMPLIES(!in_vector_loop,
+ vector_external_set_->find(instr) != vector_external_set_->end());
+
+ if (in_vector_loop &&
+ !instr->AsVecOperation()->IsPredicated()) {
+ HVecOperation* op = instr->AsVecOperation();
+ HVecPredSetOperation* pred = predicate_info_map_->Get(cur_block)->GetControlPredicate();
+ op->SetMergingGoverningPredicate(pred);
+ }
+ }
+ }
+ }
+
+ FinalizePhisForNewLoop(phi, lo);
+}
+
+void HLoopOptimization::GenerateNewLoopBodyOnce(LoopNode* node,
+ DataType::Type induc_type,
+ HInstruction* step) {
+ // Generate instruction map.
+ vector_map_->clear();
+ HLoopInformation* loop_info = node->loop_info;
+
+ // Traverse the data flow of the loop, in the original program order.
+ for (HBlocksInLoopReversePostOrderIterator block_it(*loop_info);
+ !block_it.Done();
+ block_it.Advance()) {
+ HBasicBlock* cur_block = block_it.Current();
+
+ if (cur_block == loop_info->GetHeader()) {
+ continue;
+ }
+
+ for (HInstructionIterator it(cur_block->GetInstructions()); !it.Done(); it.Advance()) {
bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true);
DCHECK(vectorized_def);
}
- // Generate body from the instruction map, but in original program order.
- HEnvironment* env = vector_header_->GetFirstInstruction()->GetEnvironment();
- for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ }
+
+ // Generate body from the instruction map, in the original program order.
+ HEnvironment* env = vector_header_->GetFirstInstruction()->GetEnvironment();
+ for (HBlocksInLoopReversePostOrderIterator block_it(*loop_info);
+ !block_it.Done();
+ block_it.Advance()) {
+ HBasicBlock* cur_block = block_it.Current();
+
+ if (cur_block == loop_info->GetHeader()) {
+ continue;
+ }
+
+ for (HInstructionIterator it(cur_block->GetInstructions()); !it.Done(); it.Advance()) {
auto i = vector_map_->find(it.Current());
if (i != vector_map_->end() && !i->second->IsInBlock()) {
Insert(vector_body_, i->second);
- if (IsInPredicatedVectorizationMode() && i->second->IsVecOperation()) {
- HVecOperation* op = i->second->AsVecOperation();
- op->SetMergingGoverningPredicate(set_pred);
- }
// Deal with instructions that need an environment, such as the scalar intrinsics.
if (i->second->NeedsEnvironment()) {
i->second->CopyEnvironmentFromWithLoopPhiAdjustment(env, vector_header_);
}
}
}
- // Generate the induction.
- vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step);
- Insert(vector_body_, vector_index_);
}
+ // Generate the induction.
+ vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step);
+ Insert(vector_body_, vector_index_);
+}
+
+void HLoopOptimization::FinalizePhisForNewLoop(HPhi* phi, HInstruction* lo) {
// Finalize phi inputs for the reductions (if any).
for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
if (!i->first->IsPhi()) {
@@ -1442,10 +1773,13 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
VectorizeDotProdIdiom(node, instruction, generate_code, type, restrictions) ||
(TrySetVectorType(type, &restrictions) &&
VectorizeUse(node, instruction, generate_code, type, restrictions))) {
+ DCHECK(!instruction->IsPhi());
if (generate_code) {
- HInstruction* new_red = vector_map_->Get(instruction);
- vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second));
- vector_permanent_map_->Overwrite(redit->second, new_red);
+ HInstruction* new_red_vec_op = vector_map_->Get(instruction);
+ HInstruction* original_phi = redit->second;
+ DCHECK(original_phi->IsPhi());
+ vector_permanent_map_->Put(new_red_vec_op, vector_map_->Get(original_phi));
+ vector_permanent_map_->Overwrite(original_phi, new_red_vec_op);
}
return true;
}
@@ -1455,6 +1789,10 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
if (instruction->IsGoto()) {
return true;
}
+
+ if (instruction->IsIf()) {
+ return VectorizeIfCondition(node, instruction, generate_code, restrictions);
+ }
// Otherwise accept only expressions with no effects outside the immediate loop-body.
// Note that actual uses are inspected during right-hand-side tree traversal.
return !IsUsedOutsideLoop(node->loop_info, instruction)
@@ -1485,9 +1823,7 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
// Deal with vector restrictions.
bool is_string_char_at = instruction->AsArrayGet()->IsStringCharAt();
- if (is_string_char_at && (HasVectorRestrictions(restrictions, kNoStringCharAt) ||
- IsInPredicatedVectorizationMode())) {
- // TODO: Support CharAt for predicated mode.
+ if (is_string_char_at && (HasVectorRestrictions(restrictions, kNoStringCharAt))) {
return false;
}
// Accept a right-hand-side array base[index] for
@@ -1676,6 +2012,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
case InstructionSet::kThumb2:
// Allow vectorization for all ARM devices, because Android assumes that
// ARM 32-bit always supports advanced SIMD (64-bit SIMD).
+ *restrictions |= kNoIfCond;
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
@@ -1701,6 +2038,13 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
DCHECK_EQ(simd_register_size_ % DataType::Size(type), 0u);
switch (type) {
case DataType::Type::kBool:
+ *restrictions |= kNoDiv |
+ kNoSignedHAdd |
+ kNoUnsignedHAdd |
+ kNoUnroundedHAdd |
+ kNoSAD |
+ kNoIfCond;
+ return TrySetVectorLength(type, vector_length);
case DataType::Type::kUint8:
case DataType::Type::kInt8:
*restrictions |= kNoDiv |
@@ -1712,6 +2056,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
case DataType::Type::kUint16:
case DataType::Type::kInt16:
*restrictions |= kNoDiv |
+ kNoStringCharAt | // TODO: support in predicated mode.
kNoSignedHAdd |
kNoUnsignedHAdd |
kNoUnroundedHAdd |
@@ -1722,13 +2067,13 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
*restrictions |= kNoDiv | kNoSAD;
return TrySetVectorLength(type, vector_length);
case DataType::Type::kInt64:
- *restrictions |= kNoDiv | kNoSAD;
+ *restrictions |= kNoDiv | kNoSAD | kNoIfCond;
return TrySetVectorLength(type, vector_length);
case DataType::Type::kFloat32:
- *restrictions |= kNoReduction;
+ *restrictions |= kNoReduction | kNoIfCond;
return TrySetVectorLength(type, vector_length);
case DataType::Type::kFloat64:
- *restrictions |= kNoReduction;
+ *restrictions |= kNoReduction | kNoIfCond;
return TrySetVectorLength(type, vector_length);
default:
break;
@@ -1737,6 +2082,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
} else {
// Allow vectorization for all ARM devices, because Android assumes that
// ARMv8 AArch64 always supports advanced SIMD (128-bit SIMD).
+ *restrictions |= kNoIfCond;
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
@@ -1767,6 +2113,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
case InstructionSet::kX86:
case InstructionSet::kX86_64:
// Allow vectorization for SSE4.1-enabled X86 devices only (128-bit SIMD).
+ *restrictions |= kNoIfCond;
if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) {
switch (type) {
case DataType::Type::kBool:
@@ -1855,15 +2202,7 @@ void HLoopOptimization::GenerateVecInv(HInstruction* org, DataType::Type type) {
vector = new (global_allocator_)
HVecReplicateScalar(global_allocator_, input, type, vector_length_, kNoDexPc);
vector_permanent_map_->Put(org, Insert(vector_preheader_, vector));
- if (IsInPredicatedVectorizationMode()) {
- HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
- graph_->GetIntConstant(1),
- type,
- vector_length_,
- 0u);
- vector_preheader_->InsertInstructionBefore(set_pred, vector);
- vector->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
- }
+ vector_external_set_->insert(vector);
}
vector_map_->Put(org, vector);
}
@@ -1936,18 +2275,18 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org,
vector_map_->Put(org, vector);
}
-void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) {
- DCHECK(reductions_->find(phi) != reductions_->end());
- DCHECK(reductions_->Get(phi->InputAt(1)) == phi);
+void HLoopOptimization::GenerateVecReductionPhi(HPhi* orig_phi) {
+ DCHECK(reductions_->find(orig_phi) != reductions_->end());
+ DCHECK(reductions_->Get(orig_phi->InputAt(1)) == orig_phi);
HInstruction* vector = nullptr;
if (vector_mode_ == kSequential) {
HPhi* new_phi = new (global_allocator_) HPhi(
- global_allocator_, kNoRegNumber, 0, phi->GetType());
+ global_allocator_, kNoRegNumber, 0, orig_phi->GetType());
vector_header_->AddPhi(new_phi);
vector = new_phi;
} else {
// Link vector reduction back to prior unrolled update, or a first phi.
- auto it = vector_permanent_map_->find(phi);
+ auto it = vector_permanent_map_->find(orig_phi);
if (it != vector_permanent_map_->end()) {
vector = it->second;
} else {
@@ -1957,7 +2296,7 @@ void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) {
vector = new_phi;
}
}
- vector_map_->Put(phi, vector);
+ vector_map_->Put(orig_phi, vector);
}
void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction) {
@@ -1992,15 +2331,7 @@ void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* r
vector_length,
kNoDexPc));
}
- if (IsInPredicatedVectorizationMode()) {
- HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
- graph_->GetIntConstant(1),
- type,
- vector_length,
- 0u);
- vector_preheader_->InsertInstructionBefore(set_pred, new_init);
- new_init->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
- }
+ vector_external_set_->insert(new_init);
} else {
new_init = ReduceAndExtractIfNeeded(new_init);
}
@@ -2026,23 +2357,15 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct
// x = REDUCE( [x_1, .., x_n] )
// y = x_1
// along the exit of the defining loop.
- HInstruction* reduce = new (global_allocator_) HVecReduce(
+ HVecReduce* reduce = new (global_allocator_) HVecReduce(
global_allocator_, instruction, type, vector_length, kind, kNoDexPc);
exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction());
+ vector_external_set_->insert(reduce);
instruction = new (global_allocator_) HVecExtractScalar(
global_allocator_, reduce, type, vector_length, 0, kNoDexPc);
exit->InsertInstructionAfter(instruction, reduce);
- if (IsInPredicatedVectorizationMode()) {
- HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
- graph_->GetIntConstant(1),
- type,
- vector_length,
- 0u);
- exit->InsertInstructionBefore(set_pred, reduce);
- reduce->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
- instruction->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
- }
+ vector_external_set_->insert(instruction);
}
}
return instruction;
@@ -2057,10 +2380,10 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct
} \
break;
-void HLoopOptimization::GenerateVecOp(HInstruction* org,
- HInstruction* opa,
- HInstruction* opb,
- DataType::Type type) {
+HInstruction* HLoopOptimization::GenerateVecOp(HInstruction* org,
+ HInstruction* opa,
+ HInstruction* opb,
+ DataType::Type type) {
uint32_t dex_pc = org->GetDexPc();
HInstruction* vector = nullptr;
DataType::Type org_type = org->GetType();
@@ -2130,11 +2453,23 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
GENERATE_VEC(
new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc),
new (global_allocator_) HAbs(org_type, opa, dex_pc));
+ case HInstruction::kEqual: {
+ // Special case.
+ if (vector_mode_ == kVector) {
+ vector = new (global_allocator_) HVecCondition(
+ global_allocator_, opa, opb, type, vector_length_, dex_pc);
+ } else {
+ DCHECK(vector_mode_ == kSequential);
+ UNREACHABLE();
+ }
+ }
+ break;
default:
break;
} // switch
CHECK(vector != nullptr) << "Unsupported SIMD operator";
vector_map_->Put(org, vector);
+ return vector;
}
#undef GENERATE_VEC
@@ -2374,6 +2709,89 @@ bool HLoopOptimization::VectorizeDotProdIdiom(LoopNode* node,
return false;
}
+bool HLoopOptimization::VectorizeIfCondition(LoopNode* node,
+ HInstruction* hif,
+ bool generate_code,
+ uint64_t restrictions) {
+ DCHECK(hif->IsIf());
+ HInstruction* if_input = hif->InputAt(0);
+
+ if (!if_input->HasOnlyOneNonEnvironmentUse()) {
+ // Avoid the complications of the condition used as materialized boolean.
+ return false;
+ }
+
+ if (!if_input->IsEqual()) {
+ // TODO: Support other condition types.
+ return false;
+ }
+
+ HCondition* cond = if_input->AsCondition();
+ HInstruction* opa = cond->InputAt(0);
+ HInstruction* opb = cond->InputAt(1);
+ DataType::Type type = GetNarrowerType(opa, opb);
+
+ if (!DataType::IsIntegralType(type)) {
+ return false;
+ }
+
+ bool is_unsigned = false;
+ HInstruction* opa_promoted = opa;
+ HInstruction* opb_promoted = opb;
+ bool is_int_case = DataType::Type::kInt32 == opa->GetType() &&
+ DataType::Type::kInt32 == opb->GetType();
+
+ // Condition arguments should be either both int32 or consistently extended signed/unsigned
+ // narrower operands.
+ if (!is_int_case &&
+ !IsNarrowerOperands(opa, opb, type, &opa_promoted, &opb_promoted, &is_unsigned)) {
+ return false;
+ }
+ type = HVecOperation::ToProperType(type, is_unsigned);
+
+ // For narrow types, explicit type conversion may have been
+ // optimized way, so set the no hi bits restriction here.
+ if (DataType::Size(type) <= 2) {
+ restrictions |= kNoHiBits;
+ }
+
+ if (!TrySetVectorType(type, &restrictions) ||
+ HasVectorRestrictions(restrictions, kNoIfCond)) {
+ return false;
+ }
+
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ opa_promoted = opa;
+ opb_promoted = opb;
+ }
+
+ if (VectorizeUse(node, opa_promoted, generate_code, type, restrictions) &&
+ VectorizeUse(node, opb_promoted, generate_code, type, restrictions)) {
+ if (generate_code) {
+ HInstruction* vec_cond = GenerateVecOp(cond,
+ vector_map_->Get(opa_promoted),
+ vector_map_->Get(opb_promoted),
+ type);
+
+ if (vector_mode_ == kVector) {
+ HInstruction* vec_pred_not = new (global_allocator_) HVecPredNot(
+ global_allocator_, vec_cond, type, vector_length_, hif->GetDexPc());
+
+ vector_map_->Put(hif, vec_pred_not);
+ BlockPredicateInfo* pred_info = predicate_info_map_->Get(hif->GetBlock());
+ pred_info->SetControlFlowInfo(vec_cond->AsVecPredSetOperation(),
+ vec_pred_not->AsVecPredSetOperation());
+ } else {
+ DCHECK(vector_mode_ == kSequential);
+ UNREACHABLE();
+ }
+ }
+ return true;
+ }
+
+ return false;
+}
+
//
// Vectorization heuristics.
//
@@ -2423,6 +2841,8 @@ bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) {
// TODO: trip count is really unsigned entity, provided the guarding test
// is satisfied; deal with this more carefully later
uint32_t max_peel = MaxNumberPeeled();
+ // Peeling is not supported in predicated mode.
+ DCHECK_IMPLIES(IsInPredicatedVectorizationMode(), max_peel == 0u);
if (vector_length_ == 0) {
return false; // nothing found
} else if (trip_count < 0) {
@@ -2686,4 +3106,67 @@ bool HLoopOptimization::CanRemoveCycle() {
return true;
}
+void HLoopOptimization::PreparePredicateInfoMap(LoopNode* node) {
+ HLoopInformation* loop_info = node->loop_info;
+
+ DCHECK(IsPredicatedLoopControlFlowSupported(loop_info));
+
+ for (HBlocksInLoopIterator block_it(*loop_info);
+ !block_it.Done();
+ block_it.Advance()) {
+ HBasicBlock* cur_block = block_it.Current();
+ BlockPredicateInfo* pred_info = new (loop_allocator_) BlockPredicateInfo();
+
+ predicate_info_map_->Put(cur_block, pred_info);
+ }
+}
+
+void HLoopOptimization::InitPredicateInfoMap(LoopNode* node,
+ HVecPredSetOperation* loop_main_pred) {
+ HLoopInformation* loop_info = node->loop_info;
+ HBasicBlock* header = loop_info->GetHeader();
+ BlockPredicateInfo* header_info = predicate_info_map_->Get(header);
+ // Loop header is a special case; it doesn't have a false predicate because we
+ // would just exit the loop then.
+ header_info->SetControlFlowInfo(loop_main_pred, loop_main_pred);
+
+ size_t blocks_in_loop = header->GetLoopInformation()->GetBlocks().NumSetBits();
+ if (blocks_in_loop == 2) {
+ for (HBasicBlock* successor : header->GetSuccessors()) {
+ if (loop_info->Contains(*successor)) {
+ // This is loop second block - body.
+ BlockPredicateInfo* body_info = predicate_info_map_->Get(successor);
+ body_info->SetControlPredicate(loop_main_pred);
+ return;
+ }
+ }
+ UNREACHABLE();
+ }
+
+ // TODO: support predicated vectorization of CF loop of more complex structure.
+ DCHECK(HasLoopDiamondStructure(loop_info));
+ HBasicBlock* header_succ_0 = header->GetSuccessors()[0];
+ HBasicBlock* header_succ_1 = header->GetSuccessors()[1];
+ HBasicBlock* diamond_top = loop_info->Contains(*header_succ_0) ?
+ header_succ_0 :
+ header_succ_1;
+
+ HIf* diamond_hif = diamond_top->GetLastInstruction()->AsIf();
+ HBasicBlock* diamond_true = diamond_hif->IfTrueSuccessor();
+ HBasicBlock* diamond_false = diamond_hif->IfFalseSuccessor();
+ HBasicBlock* back_edge = diamond_true->GetSingleSuccessor();
+
+ BlockPredicateInfo* diamond_top_info = predicate_info_map_->Get(diamond_top);
+ BlockPredicateInfo* diamond_true_info = predicate_info_map_->Get(diamond_true);
+ BlockPredicateInfo* diamond_false_info = predicate_info_map_->Get(diamond_false);
+ BlockPredicateInfo* back_edge_info = predicate_info_map_->Get(back_edge);
+
+ diamond_top_info->SetControlPredicate(header_info->GetTruePredicate());
+
+ diamond_true_info->SetControlPredicate(diamond_top_info->GetTruePredicate());
+ diamond_false_info->SetControlPredicate(diamond_top_info->GetFalsePredicate());
+
+ back_edge_info->SetControlPredicate(header_info->GetTruePredicate());
+}
+
} // namespace art
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 6dd778ba74..86a9f0fcb8 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -101,6 +101,7 @@ class HLoopOptimization : public HOptimization {
kNoSAD = 1 << 11, // no sum of absolute differences (SAD)
kNoWideSAD = 1 << 12, // no sum of absolute differences (SAD) with operand widening
kNoDotProd = 1 << 13, // no dot product
+ kNoIfCond = 1 << 14, // no if condition conversion
};
/*
@@ -136,6 +137,95 @@ class HLoopOptimization : public HOptimization {
bool is_string_char_at; // compressed string read
};
+ // This structure describes the control flow (CF) -> data flow (DF) conversion of the loop
+ // with control flow (see below) for the purpose of predicated autovectorization.
+ //
+ // Lets define "loops without control-flow" (or non-CF loops) as loops with two consecutive
+ // blocks and without the branching structure except for the loop exit. And
+ // "loop with control-flow" (or CF-loops) - all other loops.
+ //
+ // In the execution of the original CF-loop on each iteration some basic block Y will be
+ // either executed or not executed, depending on the control flow of the loop. More
+ // specifically, a block will be executed if all the conditional branches of the nodes in
+ // the control dependency graph for that block Y are taken according to the path from the loop
+ // header to that basic block.
+ //
+ // This is the key idea of CF->DF conversion: a boolean value
+ // 'ctrl_pred == cond1 && cond2 && ...' will determine whether the basic block Y will be
+ // executed, where cond_K is whether the branch of the node K in the control dependency
+ // graph upward traversal was taken in the 'right' direction.
+ //
+ // Def.: BB Y is control dependent on BB X iff
+ // (1) there exists a directed path P from X to Y with any basic block Z in P (excluding X
+ // and Y) post-dominated by Y and
+ // (2) X is not post-dominated by Y.
+ // ...
+ // X
+ // false / \ true
+ // / \
+ // ...
+ // |
+ // Y
+ // ...
+ //
+ // When doing predicated autovectorization of a CF loop, we use the CF->DF conversion approach:
+ // 1) do the data analysis and vector operation creation as if it was a non-CF loop.
+ // 2) for each HIf block create two vector predicate setting instructions - for True and False
+ // edges/paths.
+ // 3) assign a governing vector predicate (see comments near HVecPredSetOperation)
+ // to each vector operation Alpha in the loop (including to those vector predicate setting
+ // instructions created in #2); do this by:
+ // - finding the immediate control dependent block of the instruction Alpha's block.
+ // - choosing the True or False predicate setting instruction (created in #2) depending
+ // on the path to the instruction.
+ //
+ // For more information check the papers:
+ //
+ // - Allen, John R and Kennedy, Ken and Porterfield, Carrie and Warren, Joe,
+ // “Conversion of Control Dependence to Data Dependence,” in Proceedings of the 10th ACM
+ // SIGACT-SIGPLAN Symposium on Principles of Programming Languages, 1983, pp. 177–189.
+ // - JEANNE FERRANTE, KARL J. OTTENSTEIN, JOE D. WARREN,
+ // "The Program Dependence Graph and Its Use in Optimization"
+ //
+ class BlockPredicateInfo : public ArenaObject<kArenaAllocLoopOptimization> {
+ public:
+ BlockPredicateInfo() :
+ control_predicate_(nullptr),
+ true_predicate_(nullptr),
+ false_predicate_(nullptr) {}
+
+ void SetControlFlowInfo(HVecPredSetOperation* true_predicate,
+ HVecPredSetOperation* false_predicate) {
+ DCHECK(!HasControlFlowOps());
+ true_predicate_ = true_predicate;
+ false_predicate_ = false_predicate;
+ }
+
+ bool HasControlFlowOps() const {
+ // Note: a block must have both T/F predicates set or none of them.
+ DCHECK_EQ(true_predicate_ == nullptr, false_predicate_ == nullptr);
+ return true_predicate_ != nullptr;
+ }
+
+ HVecPredSetOperation* GetControlPredicate() const { return control_predicate_; }
+ void SetControlPredicate(HVecPredSetOperation* control_predicate) {
+ control_predicate_ = control_predicate;
+ }
+
+ HVecPredSetOperation* GetTruePredicate() const { return true_predicate_; }
+ HVecPredSetOperation* GetFalsePredicate() const { return false_predicate_; }
+
+ private:
+ // Vector control predicate operation, associated with the block which will determine
+ // the active lanes for all vector operations, originated from this block.
+ HVecPredSetOperation* control_predicate_;
+
+ // Vector predicate instruction, associated with the true sucessor of the block.
+ HVecPredSetOperation* true_predicate_;
+ // Vector predicate instruction, associated with the false sucessor of the block.
+ HVecPredSetOperation* false_predicate_;
+ };
+
//
// Loop setup and traversal.
//
@@ -203,15 +293,95 @@ class HLoopOptimization : public HOptimization {
// Vectorization analysis and synthesis.
//
- bool ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
- void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
- void GenerateNewLoop(LoopNode* node,
- HBasicBlock* block,
- HBasicBlock* new_preheader,
- HInstruction* lo,
- HInstruction* hi,
- HInstruction* step,
- uint32_t unroll);
+ // Returns whether the data flow requirements are met for vectorization.
+ //
+ // - checks whether instructions are vectorizable for the target.
+ // - conducts data dependence analysis for array references.
+ // - additionally, collects info on peeling and aligment strategy.
+ bool CanVectorizeDataFlow(LoopNode* node, HBasicBlock* header, bool collect_alignment_info);
+
+ // Does the checks (common for predicated and traditional mode) for the loop.
+ bool ShouldVectorizeCommon(LoopNode* node, HPhi* main_phi, int64_t trip_count);
+
+ // Try to vectorize the loop, returns whether it was successful.
+ //
+ // There are two versions/algorithms:
+ // - Predicated: all the vector operations have governing predicates which control
+ // which individual vector lanes will be active (see HVecPredSetOperation for more details).
+ // Example: vectorization using AArch64 SVE.
+ // - Traditional: a regular mode in which all vector operations lanes are unconditionally
+ // active.
+ // Example: vectoriation using AArch64 NEON.
+ bool TryVectorizePredicated(LoopNode* node,
+ HBasicBlock* body,
+ HBasicBlock* exit,
+ HPhi* main_phi,
+ int64_t trip_count);
+
+ bool TryVectorizedTraditional(LoopNode* node,
+ HBasicBlock* body,
+ HBasicBlock* exit,
+ HPhi* main_phi,
+ int64_t trip_count);
+
+ // Vectorizes the loop for which all checks have been already done.
+ void VectorizePredicated(LoopNode* node,
+ HBasicBlock* block,
+ HBasicBlock* exit);
+ void VectorizeTraditional(LoopNode* node,
+ HBasicBlock* block,
+ HBasicBlock* exit,
+ int64_t trip_count);
+
+ // Performs final steps for whole vectorization process: links reduction, removes the original
+ // scalar loop, updates loop info.
+ void FinalizeVectorization(LoopNode* node);
+
+ // Helpers that do the vector instruction synthesis for the previously created loop; create
+ // and fill the loop body with instructions.
+ //
+ // A version to generate a vector loop in predicated mode.
+ void GenerateNewLoopPredicated(LoopNode* node,
+ HBasicBlock* new_preheader,
+ HInstruction* lo,
+ HInstruction* hi,
+ HInstruction* step);
+
+ // A version to generate a vector loop in traditional mode or to generate
+ // a scalar loop for both modes.
+ void GenerateNewLoopScalarOrTraditional(LoopNode* node,
+ HBasicBlock* new_preheader,
+ HInstruction* lo,
+ HInstruction* hi,
+ HInstruction* step,
+ uint32_t unroll);
+
+ //
+ // Helpers for GenerateNewLoop*.
+ //
+
+ // Updates vectorization bookkeeping date for the new loop, creates and returns
+ // its main induction Phi.
+ HPhi* InitializeForNewLoop(HBasicBlock* new_preheader, HInstruction* lo);
+
+ // Finalizes reduction and induction phis' inputs for the newly created loop.
+ void FinalizePhisForNewLoop(HPhi* phi, HInstruction* lo);
+
+ // Creates empty predicate info object for each basic block and puts it into the map.
+ void PreparePredicateInfoMap(LoopNode* node);
+
+ // Set up block true/false predicates using info, collected through data flow and control
+ // dependency analysis.
+ void InitPredicateInfoMap(LoopNode* node, HVecPredSetOperation* loop_main_pred);
+
+ // Performs instruction synthesis for the loop body.
+ void GenerateNewLoopBodyOnce(LoopNode* node,
+ DataType::Type induc_type,
+ HInstruction* step);
+
+ // Returns whether the vector loop needs runtime disambiguation test for array refs.
+ bool NeedsArrayRefsDisambiguationTest() const { return vector_runtime_test_a_ != nullptr; }
+
bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code);
bool VectorizeUse(LoopNode* node,
HInstruction* instruction,
@@ -239,10 +409,10 @@ class HLoopOptimization : public HOptimization {
void GenerateVecReductionPhi(HPhi* phi);
void GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction);
HInstruction* ReduceAndExtractIfNeeded(HInstruction* instruction);
- void GenerateVecOp(HInstruction* org,
- HInstruction* opa,
- HInstruction* opb,
- DataType::Type type);
+ HInstruction* GenerateVecOp(HInstruction* org,
+ HInstruction* opa,
+ HInstruction* opb,
+ DataType::Type type);
// Vectorization idioms.
bool VectorizeSaturationIdiom(LoopNode* node,
@@ -265,6 +435,10 @@ class HLoopOptimization : public HOptimization {
bool generate_code,
DataType::Type type,
uint64_t restrictions);
+ bool VectorizeIfCondition(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ uint64_t restrictions);
// Vectorization heuristics.
Alignment ComputeAlignment(HInstruction* offset,
@@ -369,6 +543,16 @@ class HLoopOptimization : public HOptimization {
// Contents reside in phase-local heap memory.
ScopedArenaSafeMap<HInstruction*, HInstruction*>* vector_permanent_map_;
+ // Tracks vector operations that are inserted outside of the loop (preheader, exit)
+ // as part of vectorization (e.g. replicate scalar for loop invariants and reduce ops
+ // for loop reductions).
+ ScopedArenaSet<HInstruction*>* vector_external_set_;
+
+ // A mapping between a basic block of the original loop and its associated PredicateInfo.
+ //
+ // Only used in predicated loop vectorization mode.
+ ScopedArenaSafeMap<HBasicBlock*, BlockPredicateInfo*>* predicate_info_map_;
+
// Temporary vectorization bookkeeping.
VectorMode vector_mode_; // synthesis mode
HBasicBlock* vector_preheader_; // preheader of the new loop
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index 7f694fb655..49e3c0418f 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -30,6 +30,7 @@ namespace art HIDDEN {
class LoopOptimizationTest : public OptimizingUnitTest {
protected:
void SetUp() override {
+ TEST_SETUP_DISABLED_FOR_RISCV64();
OptimizingUnitTest::SetUp();
graph_ = CreateGraph();
@@ -44,6 +45,7 @@ class LoopOptimizationTest : public OptimizingUnitTest {
}
void TearDown() override {
+ TEST_TEARDOWN_DISABLED_FOR_RISCV64();
codegen_.reset();
compiler_options_.reset();
graph_ = nullptr;
@@ -134,17 +136,20 @@ class LoopOptimizationTest : public OptimizingUnitTest {
//
TEST_F(LoopOptimizationTest, NoLoops) {
+ TEST_DISABLED_FOR_RISCV64();
PerformAnalysis();
EXPECT_EQ("", LoopStructure());
}
TEST_F(LoopOptimizationTest, SingleLoop) {
+ TEST_DISABLED_FOR_RISCV64();
AddLoop(entry_block_, return_block_);
PerformAnalysis();
EXPECT_EQ("[]", LoopStructure());
}
TEST_F(LoopOptimizationTest, LoopNest10) {
+ TEST_DISABLED_FOR_RISCV64();
HBasicBlock* b = entry_block_;
HBasicBlock* s = return_block_;
for (int i = 0; i < 10; i++) {
@@ -156,6 +161,7 @@ TEST_F(LoopOptimizationTest, LoopNest10) {
}
TEST_F(LoopOptimizationTest, LoopSequence10) {
+ TEST_DISABLED_FOR_RISCV64();
HBasicBlock* b = entry_block_;
HBasicBlock* s = return_block_;
for (int i = 0; i < 10; i++) {
@@ -167,6 +173,7 @@ TEST_F(LoopOptimizationTest, LoopSequence10) {
}
TEST_F(LoopOptimizationTest, LoopSequenceOfNests) {
+ TEST_DISABLED_FOR_RISCV64();
HBasicBlock* b = entry_block_;
HBasicBlock* s = return_block_;
for (int i = 0; i < 10; i++) {
@@ -194,6 +201,7 @@ TEST_F(LoopOptimizationTest, LoopSequenceOfNests) {
}
TEST_F(LoopOptimizationTest, LoopNestWithSequence) {
+ TEST_DISABLED_FOR_RISCV64();
HBasicBlock* b = entry_block_;
HBasicBlock* s = return_block_;
for (int i = 0; i < 10; i++) {
@@ -215,6 +223,7 @@ TEST_F(LoopOptimizationTest, LoopNestWithSequence) {
//
// This is a test for nodes.cc functionality - HGraph::SimplifyLoop.
TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) {
+ TEST_DISABLED_FOR_RISCV64();
// Can't use AddLoop as we want special order for blocks predecessors.
HBasicBlock* header = new (GetAllocator()) HBasicBlock(graph_);
HBasicBlock* body = new (GetAllocator()) HBasicBlock(graph_);
@@ -260,6 +269,7 @@ TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) {
//
// This is a test for nodes.cc functionality - HGraph::SimplifyLoop.
TEST_F(LoopOptimizationTest, SimplifyLoopSinglePreheader) {
+ TEST_DISABLED_FOR_RISCV64();
HBasicBlock* header = AddLoop(entry_block_, return_block_);
header->InsertInstructionBefore(
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 3790058879..2cfe5b3ae2 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -35,7 +35,9 @@
#include "class_root-inl.h"
#include "code_generator.h"
#include "common_dominator.h"
+#include "intrinsic_objects.h"
#include "intrinsics.h"
+#include "intrinsics_list.h"
#include "mirror/class-inl.h"
#include "scoped_thread_state_change-inl.h"
#include "ssa_builder.h"
@@ -254,6 +256,14 @@ GraphAnalysisResult HGraph::BuildDominatorTree() {
return kAnalysisSuccess;
}
+GraphAnalysisResult HGraph::RecomputeDominatorTree() {
+ DCHECK(!HasIrreducibleLoops()) << "Recomputing loop information in graphs with irreducible loops "
+ << "is unsupported, as it could lead to loop header changes";
+ ClearLoopInformation();
+ ClearDominanceInformation();
+ return BuildDominatorTree();
+}
+
void HGraph::ClearDominanceInformation() {
for (HBasicBlock* block : GetActiveBlocks()) {
block->ClearDominanceInformation();
@@ -1488,12 +1498,12 @@ bool HInstructionList::FoundBefore(const HInstruction* instruction1,
const HInstruction* instruction2) const {
DCHECK_EQ(instruction1->GetBlock(), instruction2->GetBlock());
for (HInstructionIterator it(*this); !it.Done(); it.Advance()) {
- if (it.Current() == instruction1) {
- return true;
- }
if (it.Current() == instruction2) {
return false;
}
+ if (it.Current() == instruction1) {
+ return true;
+ }
}
LOG(FATAL) << "Did not find an order between two instructions of the same block.";
UNREACHABLE();
@@ -1815,10 +1825,12 @@ void HGraphVisitor::VisitBasicBlock(HBasicBlock* block) {
}
}
-HConstant* HTypeConversion::TryStaticEvaluation() const {
- HGraph* graph = GetBlock()->GetGraph();
- if (GetInput()->IsIntConstant()) {
- int32_t value = GetInput()->AsIntConstant()->GetValue();
+HConstant* HTypeConversion::TryStaticEvaluation() const { return TryStaticEvaluation(GetInput()); }
+
+HConstant* HTypeConversion::TryStaticEvaluation(HInstruction* input) const {
+ HGraph* graph = input->GetBlock()->GetGraph();
+ if (input->IsIntConstant()) {
+ int32_t value = input->AsIntConstant()->GetValue();
switch (GetResultType()) {
case DataType::Type::kInt8:
return graph->GetIntConstant(static_cast<int8_t>(value), GetDexPc());
@@ -1837,8 +1849,8 @@ HConstant* HTypeConversion::TryStaticEvaluation() const {
default:
return nullptr;
}
- } else if (GetInput()->IsLongConstant()) {
- int64_t value = GetInput()->AsLongConstant()->GetValue();
+ } else if (input->IsLongConstant()) {
+ int64_t value = input->AsLongConstant()->GetValue();
switch (GetResultType()) {
case DataType::Type::kInt8:
return graph->GetIntConstant(static_cast<int8_t>(value), GetDexPc());
@@ -1857,8 +1869,8 @@ HConstant* HTypeConversion::TryStaticEvaluation() const {
default:
return nullptr;
}
- } else if (GetInput()->IsFloatConstant()) {
- float value = GetInput()->AsFloatConstant()->GetValue();
+ } else if (input->IsFloatConstant()) {
+ float value = input->AsFloatConstant()->GetValue();
switch (GetResultType()) {
case DataType::Type::kInt32:
if (std::isnan(value))
@@ -1881,8 +1893,8 @@ HConstant* HTypeConversion::TryStaticEvaluation() const {
default:
return nullptr;
}
- } else if (GetInput()->IsDoubleConstant()) {
- double value = GetInput()->AsDoubleConstant()->GetValue();
+ } else if (input->IsDoubleConstant()) {
+ double value = input->AsDoubleConstant()->GetValue();
switch (GetResultType()) {
case DataType::Type::kInt32:
if (std::isnan(value))
@@ -1909,41 +1921,47 @@ HConstant* HTypeConversion::TryStaticEvaluation() const {
return nullptr;
}
-HConstant* HUnaryOperation::TryStaticEvaluation() const {
- if (GetInput()->IsIntConstant()) {
- return Evaluate(GetInput()->AsIntConstant());
- } else if (GetInput()->IsLongConstant()) {
- return Evaluate(GetInput()->AsLongConstant());
+HConstant* HUnaryOperation::TryStaticEvaluation() const { return TryStaticEvaluation(GetInput()); }
+
+HConstant* HUnaryOperation::TryStaticEvaluation(HInstruction* input) const {
+ if (input->IsIntConstant()) {
+ return Evaluate(input->AsIntConstant());
+ } else if (input->IsLongConstant()) {
+ return Evaluate(input->AsLongConstant());
} else if (kEnableFloatingPointStaticEvaluation) {
- if (GetInput()->IsFloatConstant()) {
- return Evaluate(GetInput()->AsFloatConstant());
- } else if (GetInput()->IsDoubleConstant()) {
- return Evaluate(GetInput()->AsDoubleConstant());
+ if (input->IsFloatConstant()) {
+ return Evaluate(input->AsFloatConstant());
+ } else if (input->IsDoubleConstant()) {
+ return Evaluate(input->AsDoubleConstant());
}
}
return nullptr;
}
HConstant* HBinaryOperation::TryStaticEvaluation() const {
- if (GetLeft()->IsIntConstant() && GetRight()->IsIntConstant()) {
- return Evaluate(GetLeft()->AsIntConstant(), GetRight()->AsIntConstant());
- } else if (GetLeft()->IsLongConstant()) {
- if (GetRight()->IsIntConstant()) {
+ return TryStaticEvaluation(GetLeft(), GetRight());
+}
+
+HConstant* HBinaryOperation::TryStaticEvaluation(HInstruction* left, HInstruction* right) const {
+ if (left->IsIntConstant() && right->IsIntConstant()) {
+ return Evaluate(left->AsIntConstant(), right->AsIntConstant());
+ } else if (left->IsLongConstant()) {
+ if (right->IsIntConstant()) {
// The binop(long, int) case is only valid for shifts and rotations.
DCHECK(IsShl() || IsShr() || IsUShr() || IsRor()) << DebugName();
- return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsIntConstant());
- } else if (GetRight()->IsLongConstant()) {
- return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsLongConstant());
+ return Evaluate(left->AsLongConstant(), right->AsIntConstant());
+ } else if (right->IsLongConstant()) {
+ return Evaluate(left->AsLongConstant(), right->AsLongConstant());
}
- } else if (GetLeft()->IsNullConstant() && GetRight()->IsNullConstant()) {
+ } else if (left->IsNullConstant() && right->IsNullConstant()) {
// The binop(null, null) case is only valid for equal and not-equal conditions.
DCHECK(IsEqual() || IsNotEqual()) << DebugName();
- return Evaluate(GetLeft()->AsNullConstant(), GetRight()->AsNullConstant());
+ return Evaluate(left->AsNullConstant(), right->AsNullConstant());
} else if (kEnableFloatingPointStaticEvaluation) {
- if (GetLeft()->IsFloatConstant() && GetRight()->IsFloatConstant()) {
- return Evaluate(GetLeft()->AsFloatConstant(), GetRight()->AsFloatConstant());
- } else if (GetLeft()->IsDoubleConstant() && GetRight()->IsDoubleConstant()) {
- return Evaluate(GetLeft()->AsDoubleConstant(), GetRight()->AsDoubleConstant());
+ if (left->IsFloatConstant() && right->IsFloatConstant()) {
+ return Evaluate(left->AsFloatConstant(), right->AsFloatConstant());
+ } else if (left->IsDoubleConstant() && right->IsDoubleConstant()) {
+ return Evaluate(left->AsDoubleConstant(), right->AsDoubleConstant());
}
}
return nullptr;
@@ -2797,8 +2815,11 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
if (HasMonitorOperations()) {
outer_graph->SetHasMonitorOperations(true);
}
- if (HasSIMD()) {
- outer_graph->SetHasSIMD(true);
+ if (HasTraditionalSIMD()) {
+ outer_graph->SetHasTraditionalSIMD(true);
+ }
+ if (HasPredicatedSIMD()) {
+ outer_graph->SetHasPredicatedSIMD(true);
}
if (HasAlwaysThrowingInvokes()) {
outer_graph->SetHasAlwaysThrowingInvokes(true);
@@ -2989,12 +3010,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
}
}
if (rerun_loop_analysis) {
- DCHECK(!outer_graph->HasIrreducibleLoops())
- << "Recomputing loop information in graphs with irreducible loops "
- << "is unsupported, as it could lead to loop header changes";
- outer_graph->ClearLoopInformation();
- outer_graph->ClearDominanceInformation();
- outer_graph->BuildDominatorTree();
+ outer_graph->RecomputeDominatorTree();
} else if (rerun_dominance) {
outer_graph->ClearDominanceInformation();
outer_graph->ComputeDominanceInformation();
@@ -3026,9 +3042,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
replacement = outer_graph->GetDoubleConstant(
current->AsDoubleConstant()->GetValue(), current->GetDexPc());
} else if (current->IsParameterValue()) {
- if (kIsDebugBuild
- && invoke->IsInvokeStaticOrDirect()
- && invoke->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()) {
+ if (kIsDebugBuild &&
+ invoke->IsInvokeStaticOrDirect() &&
+ invoke->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()) {
// Ensure we do not use the last input of `invoke`, as it
// contains a clinit check which is not an actual argument.
size_t last_input_index = invoke->InputCount() - 1;
@@ -3125,6 +3141,8 @@ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) {
new_pre_header, old_pre_header, /* replace_if_back_edge= */ false);
}
+// Creates a new two-basic-block loop and inserts it between original loop header and
+// original loop exit; also adjusts dominators, post order and new LoopInformation.
HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header,
HBasicBlock* body,
HBasicBlock* exit) {
@@ -3346,6 +3364,21 @@ std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckReq
}
}
+bool HInvokeStaticOrDirect::CanBeNull() const {
+ if (GetType() != DataType::Type::kReference || IsStringInit()) {
+ return false;
+ }
+ switch (GetIntrinsic()) {
+#define DEFINE_BOXED_CASE(name, unused1, unused2, unused3, unused4) \
+ case Intrinsics::k##name##ValueOf: \
+ return false;
+ BOXED_TYPES(DEFINE_BOXED_CASE)
+#undef DEFINE_BOXED_CASE
+ default:
+ return true;
+ }
+}
+
bool HInvokeVirtual::CanDoImplicitNullCheckOn(HInstruction* obj) const {
if (obj != InputAt(0)) {
return false;
@@ -3518,9 +3551,7 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) {
static_assert( \
static_cast<uint32_t>(Intrinsics::k ## Name) <= (kAccIntrinsicBits >> CTZ(kAccIntrinsicBits)), \
"Instrinsics enumeration space overflow.");
-#include "intrinsics_list.h"
- INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES)
#undef CHECK_INTRINSICS_ENUM_VALUES
// Function that returns whether an intrinsic needs an environment or not.
@@ -3531,9 +3562,7 @@ static inline IntrinsicNeedsEnvironment NeedsEnvironmentIntrinsic(Intrinsics i)
#define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnv, SideEffects, Exceptions, ...) \
case Intrinsics::k ## Name: \
return NeedsEnv;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
}
return kNeedsEnvironment;
@@ -3547,9 +3576,7 @@ static inline IntrinsicSideEffects GetSideEffectsIntrinsic(Intrinsics i) {
#define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnv, SideEffects, Exceptions, ...) \
case Intrinsics::k ## Name: \
return SideEffects;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
}
return kAllSideEffects;
@@ -3563,9 +3590,7 @@ static inline IntrinsicExceptions GetExceptionsIntrinsic(Intrinsics i) {
#define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnv, SideEffects, Exceptions, ...) \
case Intrinsics::k ## Name: \
return Exceptions;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+ ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef OPTIMIZING_INTRINSICS
}
return kCanThrow;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 28112d176a..0efe8f4335 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -253,7 +253,7 @@ class ReferenceTypeInfo : ValueObject {
bool IsNonPrimitiveArrayClass() const REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(IsValid());
- return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray();
+ return IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray();
}
bool CanArrayHold(ReferenceTypeInfo rti) const REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -403,7 +403,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
has_bounds_checks_(false),
has_try_catch_(false),
has_monitor_operations_(false),
- has_simd_(false),
+ has_traditional_simd_(false),
+ has_predicated_simd_(false),
has_loops_(false),
has_irreducible_loops_(false),
has_direct_critical_native_call_(false),
@@ -466,6 +467,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
void ClearLoopInformation();
void FindBackEdges(ArenaBitVector* visited);
GraphAnalysisResult BuildDominatorTree();
+ GraphAnalysisResult RecomputeDominatorTree();
void SimplifyCFG();
void SimplifyCatchBlocks();
@@ -708,8 +710,13 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
bool HasMonitorOperations() const { return has_monitor_operations_; }
void SetHasMonitorOperations(bool value) { has_monitor_operations_ = value; }
- bool HasSIMD() const { return has_simd_; }
- void SetHasSIMD(bool value) { has_simd_ = value; }
+ bool HasTraditionalSIMD() { return has_traditional_simd_; }
+ void SetHasTraditionalSIMD(bool value) { has_traditional_simd_ = value; }
+
+ bool HasPredicatedSIMD() { return has_predicated_simd_; }
+ void SetHasPredicatedSIMD(bool value) { has_predicated_simd_ = value; }
+
+ bool HasSIMD() const { return has_traditional_simd_ || has_predicated_simd_; }
bool HasLoops() const { return has_loops_; }
void SetHasLoops(bool value) { has_loops_ = value; }
@@ -822,10 +829,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// DexRegisterMap to be present to allow deadlock analysis for non-debuggable code.
bool has_monitor_operations_;
- // Flag whether SIMD instructions appear in the graph. If true, the
- // code generators may have to be more careful spilling the wider
+ // Flags whether SIMD (traditional or predicated) instructions appear in the graph.
+ // If either is true, the code generators may have to be more careful spilling the wider
// contents of SIMD registers.
- bool has_simd_;
+ bool has_traditional_simd_;
+ bool has_predicated_simd_;
// Flag whether there are any loops in the graph. We can skip loop
// optimization if it's false.
@@ -1544,7 +1552,6 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(If, Instruction) \
M(InstanceFieldGet, Instruction) \
M(InstanceFieldSet, Instruction) \
- M(PredicatedInstanceFieldGet, Instruction) \
M(InstanceOf, Instruction) \
M(IntConstant, Constant) \
M(IntermediateAddress, Instruction) \
@@ -1636,7 +1643,9 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(VecStore, VecMemoryOperation) \
M(VecPredSetAll, VecPredSetOperation) \
M(VecPredWhile, VecPredSetOperation) \
- M(VecPredCondition, VecOperation) \
+ M(VecPredToBoolean, VecOperation) \
+ M(VecCondition, VecPredSetOperation) \
+ M(VecPredNot, VecPredSetOperation) \
#define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \
FOR_EACH_CONCRETE_INSTRUCTION_SCALAR_COMMON(M) \
@@ -1659,6 +1668,8 @@ class HLoopInformationOutwardIterator : public ValueObject {
#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
+#define FOR_EACH_CONCRETE_INSTRUCTION_RISCV64(M)
+
#ifndef ART_ENABLE_CODEGEN_x86
#define FOR_EACH_CONCRETE_INSTRUCTION_X86(M)
#else
@@ -1715,7 +1726,7 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION)
const char* DebugName() const override { return #type; } \
HInstruction* Clone(ArenaAllocator* arena) const override { \
DCHECK(IsClonable()); \
- return new (arena) H##type(*this->As##type()); \
+ return new (arena) H##type(*this); \
} \
void Accept(HGraphVisitor* visitor) override
@@ -2062,12 +2073,12 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
ArtMethod* method,
uint32_t dex_pc,
HInstruction* holder)
- : vregs_(number_of_vregs, allocator->Adapter(kArenaAllocEnvironmentVRegs)),
- locations_(allocator->Adapter(kArenaAllocEnvironmentLocations)),
- parent_(nullptr),
- method_(method),
- dex_pc_(dex_pc),
- holder_(holder) {
+ : vregs_(number_of_vregs, allocator->Adapter(kArenaAllocEnvironmentVRegs)),
+ locations_(allocator->Adapter(kArenaAllocEnvironmentLocations)),
+ parent_(nullptr),
+ method_(method),
+ dex_pc_(dex_pc),
+ holder_(holder) {
}
ALWAYS_INLINE HEnvironment(ArenaAllocator* allocator,
@@ -2183,9 +2194,14 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
std::ostream& operator<<(std::ostream& os, const HInstruction& rhs);
// Iterates over the Environments
-class HEnvironmentIterator : public ValueObject,
- public std::iterator<std::forward_iterator_tag, HEnvironment*> {
+class HEnvironmentIterator : public ValueObject {
public:
+ using iterator_category = std::forward_iterator_tag;
+ using value_type = HEnvironment*;
+ using difference_type = ptrdiff_t;
+ using pointer = void;
+ using reference = void;
+
explicit HEnvironmentIterator(HEnvironment* cur) : cur_(cur) {}
HEnvironment* operator*() const {
@@ -2355,9 +2371,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
return true;
}
- virtual bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const {
- return false;
- }
+ virtual bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const { return false; }
// If this instruction will do an implicit null check, return the `HNullCheck` associated
// with it. Otherwise return null.
@@ -2553,7 +2567,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
#define INSTRUCTION_TYPE_CAST(type, super) \
const H##type* As##type() const; \
- H##type* As##type();
+ H##type* As##type(); \
+ const H##type* As##type##OrNull() const; \
+ H##type* As##type##OrNull();
FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CAST)
#undef INSTRUCTION_TYPE_CAST
@@ -2568,7 +2584,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
//
// Note: HEnvironment and some other fields are not copied and are set to default values, see
// 'explicit HInstruction(const HInstruction& other)' for details.
- virtual HInstruction* Clone(ArenaAllocator* arena ATTRIBUTE_UNUSED) const {
+ virtual HInstruction* Clone([[maybe_unused]] ArenaAllocator* arena) const {
LOG(FATAL) << "Cloning is not implemented for the instruction " <<
DebugName() << " " << GetId();
UNREACHABLE();
@@ -2596,7 +2612,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
// Returns whether any data encoded in the two instructions is equal.
// This method does not look at the inputs. Both instructions must be
// of the same type, otherwise the method has undefined behavior.
- virtual bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const {
+ virtual bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const {
return false;
}
@@ -2729,7 +2745,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
private:
using InstructionKindField =
- BitField<InstructionKind, kFieldInstructionKind, kFieldInstructionKindSize>;
+ BitField<InstructionKind, kFieldInstructionKind, kFieldInstructionKindSize>;
void FixUpUserRecordsAfterUseInsertion(HUseList<HInstruction*>::iterator fixup_end) {
auto before_use_node = uses_.before_begin();
@@ -2904,9 +2920,14 @@ class HBackwardInstructionIterator : public ValueObject {
};
template <typename InnerIter>
-struct HSTLInstructionIterator : public ValueObject,
- public std::iterator<std::forward_iterator_tag, HInstruction*> {
+struct HSTLInstructionIterator : public ValueObject {
public:
+ using iterator_category = std::forward_iterator_tag;
+ using value_type = HInstruction*;
+ using difference_type = ptrdiff_t;
+ using pointer = void;
+ using reference = void;
+
static_assert(std::is_same_v<InnerIter, HBackwardInstructionIterator> ||
std::is_same_v<InnerIter, HInstructionIterator> ||
std::is_same_v<InnerIter, HInstructionIteratorHandleChanges>,
@@ -3164,7 +3185,7 @@ class HPhi final : public HVariableInputSizeInstruction {
bool IsVRegEquivalentOf(const HInstruction* other) const {
return other != nullptr
&& other->IsPhi()
- && other->AsPhi()->GetBlock() == GetBlock()
+ && other->GetBlock() == GetBlock()
&& other->AsPhi()->GetRegNumber() == GetRegNumber();
}
@@ -3270,7 +3291,7 @@ class HConstant : public HExpression<0> {
class HNullConstant final : public HConstant {
public:
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -3497,7 +3518,9 @@ class HDoubleConstant final : public HConstant {
class HIf final : public HExpression<1> {
public:
explicit HIf(HInstruction* input, uint32_t dex_pc = kNoDexPc)
- : HExpression(kIf, SideEffects::None(), dex_pc) {
+ : HExpression(kIf, SideEffects::None(), dex_pc),
+ true_count_(std::numeric_limits<uint16_t>::max()),
+ false_count_(std::numeric_limits<uint16_t>::max()) {
SetRawInputAt(0, input);
}
@@ -3512,10 +3535,20 @@ class HIf final : public HExpression<1> {
return GetBlock()->GetSuccessors()[1];
}
+ void SetTrueCount(uint16_t count) { true_count_ = count; }
+ uint16_t GetTrueCount() const { return true_count_; }
+
+ void SetFalseCount(uint16_t count) { false_count_ = count; }
+ uint16_t GetFalseCount() const { return false_count_; }
+
DECLARE_INSTRUCTION(If);
protected:
DEFAULT_COPY_CONSTRUCTOR(If);
+
+ private:
+ uint16_t true_count_;
+ uint16_t false_count_;
};
@@ -3639,7 +3672,8 @@ class HDeoptimize final : public HVariableInputSizeInstruction {
bool CanBeMoved() const override { return GetPackedFlag<kFieldCanBeMoved>(); }
bool InstructionDataEquals(const HInstruction* other) const override {
- return (other->CanBeMoved() == CanBeMoved()) && (other->AsDeoptimize()->GetKind() == GetKind());
+ return (other->CanBeMoved() == CanBeMoved()) &&
+ (other->AsDeoptimize()->GetDeoptimizationKind() == GetDeoptimizationKind());
}
bool NeedsEnvironment() const override { return true; }
@@ -3827,7 +3861,7 @@ class HUnaryOperation : public HExpression<1> {
DataType::Type GetResultType() const { return GetType(); }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -3836,6 +3870,9 @@ class HUnaryOperation : public HExpression<1> {
// be evaluated as a constant, return null.
HConstant* TryStaticEvaluation() const;
+ // Same but for `input` instead of GetInput().
+ HConstant* TryStaticEvaluation(HInstruction* input) const;
+
// Apply this operation to `x`.
virtual HConstant* Evaluate(HIntConstant* x) const = 0;
virtual HConstant* Evaluate(HLongConstant* x) const = 0;
@@ -3903,7 +3940,7 @@ class HBinaryOperation : public HExpression<2> {
}
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -3912,16 +3949,19 @@ class HBinaryOperation : public HExpression<2> {
// be evaluated as a constant, return null.
HConstant* TryStaticEvaluation() const;
+ // Same but for `left` and `right` instead of GetLeft() and GetRight().
+ HConstant* TryStaticEvaluation(HInstruction* left, HInstruction* right) const;
+
// Apply this operation to `x` and `y`.
- virtual HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
- HNullConstant* y ATTRIBUTE_UNUSED) const {
+ virtual HConstant* Evaluate([[maybe_unused]] HNullConstant* x,
+ [[maybe_unused]] HNullConstant* y) const {
LOG(FATAL) << DebugName() << " is not defined for the (null, null) case.";
UNREACHABLE();
}
virtual HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const = 0;
virtual HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const = 0;
- virtual HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED,
- HIntConstant* y ATTRIBUTE_UNUSED) const {
+ virtual HConstant* Evaluate([[maybe_unused]] HLongConstant* x,
+ [[maybe_unused]] HIntConstant* y) const {
LOG(FATAL) << DebugName() << " is not defined for the (long, int) case.";
UNREACHABLE();
}
@@ -4049,8 +4089,8 @@ class HEqual final : public HCondition {
bool IsCommutative() const override { return true; }
- HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
- HNullConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HNullConstant* x,
+ [[maybe_unused]] HNullConstant* y) const override {
return MakeConstantCondition(true, GetDexPc());
}
HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
@@ -4096,8 +4136,8 @@ class HNotEqual final : public HCondition {
bool IsCommutative() const override { return true; }
- HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
- HNullConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HNullConstant* x,
+ [[maybe_unused]] HNullConstant* y) const override {
return MakeConstantCondition(false, GetDexPc());
}
HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
@@ -4303,13 +4343,13 @@ class HBelow final : public HCondition {
HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -4345,13 +4385,13 @@ class HBelowOrEqual final : public HCondition {
HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -4387,13 +4427,13 @@ class HAbove final : public HCondition {
HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -4429,13 +4469,13 @@ class HAboveOrEqual final : public HCondition {
HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -4522,7 +4562,7 @@ class HCompare final : public HBinaryOperation {
return GetBias() == ComparisonBias::kGtBias;
}
- static SideEffects SideEffectsForArchRuntimeCalls(DataType::Type type ATTRIBUTE_UNUSED) {
+ static SideEffects SideEffectsForArchRuntimeCalls([[maybe_unused]] DataType::Type type) {
// Comparisons do not require a runtime call in any back end.
return SideEffects::None();
}
@@ -4859,8 +4899,7 @@ class HInvokePolymorphic final : public HInvoke {
// to pass intrinsic information to the HInvokePolymorphic node.
ArtMethod* resolved_method,
MethodReference resolved_method_reference,
- dex::ProtoIndex proto_idx,
- bool enable_intrinsic_opt)
+ dex::ProtoIndex proto_idx)
: HInvoke(kInvokePolymorphic,
allocator,
number_of_arguments,
@@ -4871,9 +4910,8 @@ class HInvokePolymorphic final : public HInvoke {
resolved_method,
resolved_method_reference,
kPolymorphic,
- enable_intrinsic_opt),
- proto_idx_(proto_idx) {
- }
+ /* enable_intrinsic_opt= */ true),
+ proto_idx_(proto_idx) {}
bool IsClonable() const override { return true; }
@@ -5015,15 +5053,13 @@ class HInvokeStaticOrDirect final : public HInvoke {
return input_records;
}
- bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override {
+ bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const override {
// We do not access the method via object reference, so we cannot do an implicit null check.
// TODO: for intrinsics we can generate implicit null checks.
return false;
}
- bool CanBeNull() const override {
- return GetType() == DataType::Type::kReference && !IsStringInit();
- }
+ bool CanBeNull() const override;
MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; }
CodePtrLocation GetCodePtrLocation() const {
@@ -5599,10 +5635,14 @@ class HMin final : public HBinaryOperation {
ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
}
// TODO: Evaluation for floating-point values.
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; }
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; }
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
+ return nullptr;
+ }
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
+ return nullptr;
+ }
DECLARE_INSTRUCTION(Min);
@@ -5634,10 +5674,14 @@ class HMax final : public HBinaryOperation {
ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
}
// TODO: Evaluation for floating-point values.
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; }
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; }
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
+ return nullptr;
+ }
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
+ return nullptr;
+ }
DECLARE_INSTRUCTION(Max);
@@ -5699,7 +5743,7 @@ class HDivZeroCheck final : public HExpression<1> {
bool IsClonable() const override { return true; }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -5736,18 +5780,18 @@ class HShl final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
- HLongConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HLongConstant* value,
+ [[maybe_unused]] HLongConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
UNREACHABLE();
}
- HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
- HFloatConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* value,
+ [[maybe_unused]] HFloatConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
- HDoubleConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value,
+ [[maybe_unused]] HDoubleConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5782,18 +5826,18 @@ class HShr final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
- HLongConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HLongConstant* value,
+ [[maybe_unused]] HLongConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
UNREACHABLE();
}
- HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
- HFloatConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* value,
+ [[maybe_unused]] HFloatConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
- HDoubleConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value,
+ [[maybe_unused]] HDoubleConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5830,18 +5874,18 @@ class HUShr final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
- HLongConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HLongConstant* value,
+ [[maybe_unused]] HLongConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
UNREACHABLE();
}
- HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
- HFloatConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* value,
+ [[maybe_unused]] HFloatConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
- HDoubleConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value,
+ [[maybe_unused]] HDoubleConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5873,13 +5917,13 @@ class HAnd final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5911,13 +5955,13 @@ class HOr final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5949,13 +5993,13 @@ class HXor final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5993,18 +6037,18 @@ class HRor final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
- HLongConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HLongConstant* value,
+ [[maybe_unused]] HLongConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
UNREACHABLE();
}
- HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
- HFloatConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* value,
+ [[maybe_unused]] HFloatConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
- HDoubleConstant* distance ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value,
+ [[maybe_unused]] HDoubleConstant* distance) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -6067,7 +6111,7 @@ class HNot final : public HUnaryOperation {
}
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -6079,11 +6123,11 @@ class HNot final : public HUnaryOperation {
HConstant* Evaluate(HLongConstant* x) const override {
return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -6101,7 +6145,7 @@ class HBooleanNot final : public HUnaryOperation {
}
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -6113,15 +6157,15 @@ class HBooleanNot final : public HUnaryOperation {
HConstant* Evaluate(HIntConstant* x) const override {
return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HLongConstant* x) const override {
LOG(FATAL) << DebugName() << " is not defined for long values";
UNREACHABLE();
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -6148,7 +6192,7 @@ class HTypeConversion final : public HExpression<1> {
bool IsClonable() const override { return true; }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
// Return whether the conversion is implicit. This includes conversion to the same type.
@@ -6160,6 +6204,9 @@ class HTypeConversion final : public HExpression<1> {
// containing the result. If the input cannot be converted, return nullptr.
HConstant* TryStaticEvaluation() const;
+ // Same but for `input` instead of GetInput().
+ HConstant* TryStaticEvaluation(HInstruction* input) const;
+
DECLARE_INSTRUCTION(TypeConversion);
protected:
@@ -6180,7 +6227,7 @@ class HNullCheck final : public HExpression<1> {
bool IsClonable() const override { return true; }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -6321,96 +6368,6 @@ class HInstanceFieldGet final : public HExpression<1> {
const FieldInfo field_info_;
};
-class HPredicatedInstanceFieldGet final : public HExpression<2> {
- public:
- HPredicatedInstanceFieldGet(HInstanceFieldGet* orig,
- HInstruction* target,
- HInstruction* default_val)
- : HExpression(kPredicatedInstanceFieldGet,
- orig->GetFieldType(),
- orig->GetSideEffects(),
- orig->GetDexPc()),
- field_info_(orig->GetFieldInfo()) {
- // NB Default-val is at 0 so we can avoid doing a move.
- SetRawInputAt(1, target);
- SetRawInputAt(0, default_val);
- }
-
- HPredicatedInstanceFieldGet(HInstruction* value,
- ArtField* field,
- HInstruction* default_value,
- DataType::Type field_type,
- MemberOffset field_offset,
- bool is_volatile,
- uint32_t field_idx,
- uint16_t declaring_class_def_index,
- const DexFile& dex_file,
- uint32_t dex_pc)
- : HExpression(kPredicatedInstanceFieldGet,
- field_type,
- SideEffects::FieldReadOfType(field_type, is_volatile),
- dex_pc),
- field_info_(field,
- field_offset,
- field_type,
- is_volatile,
- field_idx,
- declaring_class_def_index,
- dex_file) {
- SetRawInputAt(1, value);
- SetRawInputAt(0, default_value);
- }
-
- bool IsClonable() const override {
- return true;
- }
- bool CanBeMoved() const override {
- return !IsVolatile();
- }
-
- HInstruction* GetDefaultValue() const {
- return InputAt(0);
- }
- HInstruction* GetTarget() const {
- return InputAt(1);
- }
-
- bool InstructionDataEquals(const HInstruction* other) const override {
- const HPredicatedInstanceFieldGet* other_get = other->AsPredicatedInstanceFieldGet();
- return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue() &&
- GetDefaultValue() == other_get->GetDefaultValue();
- }
-
- bool CanDoImplicitNullCheckOn(HInstruction* obj) const override {
- return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value());
- }
-
- size_t ComputeHashCode() const override {
- return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue();
- }
-
- bool IsFieldAccess() const override { return true; }
- const FieldInfo& GetFieldInfo() const override { return field_info_; }
- MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); }
- DataType::Type GetFieldType() const { return field_info_.GetFieldType(); }
- bool IsVolatile() const { return field_info_.IsVolatile(); }
-
- void SetType(DataType::Type new_type) {
- DCHECK(DataType::IsIntegralType(GetType()));
- DCHECK(DataType::IsIntegralType(new_type));
- DCHECK_EQ(DataType::Size(GetType()), DataType::Size(new_type));
- SetPackedField<TypeField>(new_type);
- }
-
- DECLARE_INSTRUCTION(PredicatedInstanceFieldGet);
-
- protected:
- DEFAULT_COPY_CONSTRUCTOR(PredicatedInstanceFieldGet);
-
- private:
- const FieldInfo field_info_;
-};
-
enum class WriteBarrierKind {
// Emit the write barrier, with a runtime optimization which checks if the value that it is being
// set is null.
@@ -6455,7 +6412,6 @@ class HInstanceFieldSet final : public HExpression<2> {
declaring_class_def_index,
dex_file) {
SetPackedFlag<kFlagValueCanBeNull>(true);
- SetPackedFlag<kFlagIsPredicatedSet>(false);
SetPackedField<WriteBarrierKindField>(WriteBarrierKind::kEmitWithNullCheck);
SetRawInputAt(0, object);
SetRawInputAt(1, value);
@@ -6475,8 +6431,6 @@ class HInstanceFieldSet final : public HExpression<2> {
HInstruction* GetValue() const { return InputAt(1); }
bool GetValueCanBeNull() const { return GetPackedFlag<kFlagValueCanBeNull>(); }
void ClearValueCanBeNull() { SetPackedFlag<kFlagValueCanBeNull>(false); }
- bool GetIsPredicatedSet() const { return GetPackedFlag<kFlagIsPredicatedSet>(); }
- void SetIsPredicatedSet(bool value = true) { SetPackedFlag<kFlagIsPredicatedSet>(value); }
WriteBarrierKind GetWriteBarrierKind() { return GetPackedField<WriteBarrierKindField>(); }
void SetWriteBarrierKind(WriteBarrierKind kind) {
DCHECK(kind != WriteBarrierKind::kEmitWithNullCheck)
@@ -6491,8 +6445,7 @@ class HInstanceFieldSet final : public HExpression<2> {
private:
static constexpr size_t kFlagValueCanBeNull = kNumberOfGenericPackedBits;
- static constexpr size_t kFlagIsPredicatedSet = kFlagValueCanBeNull + 1;
- static constexpr size_t kWriteBarrierKind = kFlagIsPredicatedSet + 1;
+ static constexpr size_t kWriteBarrierKind = kFlagValueCanBeNull + 1;
static constexpr size_t kWriteBarrierKindSize =
MinimumBitsToStore(static_cast<size_t>(WriteBarrierKind::kLast));
static constexpr size_t kNumberOfInstanceFieldSetPackedBits =
@@ -6511,12 +6464,12 @@ class HArrayGet final : public HExpression<2> {
HInstruction* index,
DataType::Type type,
uint32_t dex_pc)
- : HArrayGet(array,
- index,
- type,
- SideEffects::ArrayReadOfType(type),
- dex_pc,
- /* is_string_char_at= */ false) {
+ : HArrayGet(array,
+ index,
+ type,
+ SideEffects::ArrayReadOfType(type),
+ dex_pc,
+ /* is_string_char_at= */ false) {
}
HArrayGet(HInstruction* array,
@@ -6533,10 +6486,10 @@ class HArrayGet final : public HExpression<2> {
bool IsClonable() const override { return true; }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
- bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override {
+ bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const override {
// TODO: We can be smarter here.
// Currently, unless the array is the result of NewArray, the array access is always
// preceded by some form of null NullCheck necessary for the bounds check, usually
@@ -6640,7 +6593,7 @@ class HArraySet final : public HExpression<3> {
// Can throw ArrayStoreException.
bool CanThrow() const override { return NeedsTypeCheck(); }
- bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override {
+ bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const override {
// TODO: Same as for ArrayGet.
return false;
}
@@ -6746,7 +6699,7 @@ class HArrayLength final : public HExpression<1> {
bool IsClonable() const override { return true; }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
bool CanDoImplicitNullCheckOn(HInstruction* obj) const override {
@@ -6790,7 +6743,7 @@ class HBoundsCheck final : public HExpression<2> {
bool IsClonable() const override { return true; }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -7000,17 +6953,15 @@ class HLoadClass final : public HInstruction {
bool CanCallRuntime() const {
return NeedsAccessCheck() ||
MustGenerateClinitCheck() ||
- GetLoadKind() == LoadKind::kRuntimeCall ||
- GetLoadKind() == LoadKind::kBssEntry;
+ NeedsBss() ||
+ GetLoadKind() == LoadKind::kRuntimeCall;
}
bool CanThrow() const override {
return NeedsAccessCheck() ||
MustGenerateClinitCheck() ||
// If the class is in the boot image, the lookup in the runtime call cannot throw.
- ((GetLoadKind() == LoadKind::kRuntimeCall ||
- GetLoadKind() == LoadKind::kBssEntry) &&
- !IsInBootImage());
+ ((GetLoadKind() == LoadKind::kRuntimeCall || NeedsBss()) && !IsInBootImage());
}
ReferenceTypeInfo GetLoadedClassRTI() {
@@ -7362,6 +7313,16 @@ class HLoadMethodHandle final : public HInstruction {
class HLoadMethodType final : public HInstruction {
public:
+ // Determines how to load the MethodType.
+ enum class LoadKind {
+ // Load from an entry in the .bss section using a PC-relative load.
+ kBssEntry,
+ // Load using a single runtime call.
+ kRuntimeCall,
+
+ kLast = kRuntimeCall,
+ };
+
HLoadMethodType(HCurrentMethod* current_method,
dex::ProtoIndex proto_index,
const DexFile& dex_file,
@@ -7373,6 +7334,7 @@ class HLoadMethodType final : public HInstruction {
special_input_(HUserRecord<HInstruction*>(current_method)),
proto_index_(proto_index),
dex_file_(dex_file) {
+ SetPackedField<LoadKindField>(LoadKind::kRuntimeCall);
}
using HInstruction::GetInputRecords; // Keep the const version visible.
@@ -7383,6 +7345,12 @@ class HLoadMethodType final : public HInstruction {
bool IsClonable() const override { return true; }
+ void SetLoadKind(LoadKind load_kind);
+
+ LoadKind GetLoadKind() const {
+ return GetPackedField<LoadKindField>();
+ }
+
dex::ProtoIndex GetProtoIndex() const { return proto_index_; }
const DexFile& GetDexFile() const { return dex_file_; }
@@ -7401,6 +7369,14 @@ class HLoadMethodType final : public HInstruction {
DEFAULT_COPY_CONSTRUCTOR(LoadMethodType);
private:
+ static constexpr size_t kFieldLoadKind = kNumberOfGenericPackedBits;
+ static constexpr size_t kFieldLoadKindSize =
+ MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast));
+ static constexpr size_t kNumberOfLoadMethodTypePackedBits = kFieldLoadKind + kFieldLoadKindSize;
+ static_assert(kNumberOfLoadMethodTypePackedBits <= kMaxNumberOfPackedBits,
+ "Too many packed fields.");
+ using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>;
+
// The special input is the HCurrentMethod for kRuntimeCall.
HUserRecord<HInstruction*> special_input_;
@@ -7408,6 +7384,17 @@ class HLoadMethodType final : public HInstruction {
const DexFile& dex_file_;
};
+std::ostream& operator<<(std::ostream& os, HLoadMethodType::LoadKind rhs);
+
+// Note: defined outside class to see operator<<(., HLoadMethodType::LoadKind).
+inline void HLoadMethodType::SetLoadKind(LoadKind load_kind) {
+ // The load kind should be determined before inserting the instruction to the graph.
+ DCHECK(GetBlock() == nullptr);
+ DCHECK(GetEnvironment() == nullptr);
+ DCHECK_EQ(GetLoadKind(), LoadKind::kRuntimeCall);
+ SetPackedField<LoadKindField>(load_kind);
+}
+
/**
* Performs an initialization check on its Class object input.
*/
@@ -7423,7 +7410,7 @@ class HClinitCheck final : public HExpression<1> {
}
// TODO: Make ClinitCheck clonable.
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -8343,7 +8330,7 @@ class HSelect final : public HExpression<3> {
HInstruction* GetCondition() const { return InputAt(2); }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
@@ -8351,6 +8338,12 @@ class HSelect final : public HExpression<3> {
return GetTrueValue()->CanBeNull() || GetFalseValue()->CanBeNull();
}
+ void UpdateType() {
+ DCHECK_EQ(HPhi::ToPhiType(GetTrueValue()->GetType()),
+ HPhi::ToPhiType(GetFalseValue()->GetType()));
+ SetPackedField<TypeField>(HPhi::ToPhiType(GetTrueValue()->GetType()));
+ }
+
DECLARE_INSTRUCTION(Select);
protected:
@@ -8513,7 +8506,7 @@ class HIntermediateAddress final : public HExpression<2> {
bool IsClonable() const override { return true; }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
bool IsActualObject() const override { return false; }
@@ -8550,7 +8543,7 @@ class HGraphVisitor : public ValueObject {
graph_(graph) {}
virtual ~HGraphVisitor() {}
- virtual void VisitInstruction(HInstruction* instruction ATTRIBUTE_UNUSED) {}
+ virtual void VisitInstruction([[maybe_unused]] HInstruction* instruction) {}
virtual void VisitBasicBlock(HBasicBlock* block);
// Visit the graph following basic block insertion order.
@@ -8623,7 +8616,7 @@ class CloneAndReplaceInstructionVisitor final : public HGraphDelegateVisitor {
DISALLOW_COPY_AND_ASSIGN(CloneAndReplaceInstructionVisitor);
};
-// Iterator over the blocks that art part of the loop. Includes blocks part
+// Iterator over the blocks that are part of the loop; includes blocks which are part
// of an inner loop. The order in which the blocks are iterated is on their
// block id.
class HBlocksInLoopIterator : public ValueObject {
@@ -8656,7 +8649,7 @@ class HBlocksInLoopIterator : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopIterator);
};
-// Iterator over the blocks that art part of the loop. Includes blocks part
+// Iterator over the blocks that are part of the loop; includes blocks which are part
// of an inner loop. The order in which the blocks are iterated is reverse
// post order.
class HBlocksInLoopReversePostOrderIterator : public ValueObject {
@@ -8689,6 +8682,39 @@ class HBlocksInLoopReversePostOrderIterator : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopReversePostOrderIterator);
};
+// Iterator over the blocks that are part of the loop; includes blocks which are part
+// of an inner loop. The order in which the blocks are iterated is post order.
+class HBlocksInLoopPostOrderIterator : public ValueObject {
+ public:
+ explicit HBlocksInLoopPostOrderIterator(const HLoopInformation& info)
+ : blocks_in_loop_(info.GetBlocks()),
+ blocks_(info.GetHeader()->GetGraph()->GetReversePostOrder()),
+ index_(blocks_.size() - 1) {
+ if (!blocks_in_loop_.IsBitSet(blocks_[index_]->GetBlockId())) {
+ Advance();
+ }
+ }
+
+ bool Done() const { return index_ < 0; }
+ HBasicBlock* Current() const { return blocks_[index_]; }
+ void Advance() {
+ --index_;
+ for (; index_ >= 0; --index_) {
+ if (blocks_in_loop_.IsBitSet(blocks_[index_]->GetBlockId())) {
+ break;
+ }
+ }
+ }
+
+ private:
+ const BitVector& blocks_in_loop_;
+ const ArenaVector<HBasicBlock*>& blocks_;
+
+ int32_t index_;
+
+ DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopPostOrderIterator);
+};
+
// Returns int64_t value of a properly typed constant.
inline int64_t Int64FromConstant(HConstant* constant) {
if (constant->IsIntConstant()) {
@@ -8752,10 +8778,18 @@ inline bool IsZeroBitPattern(HInstruction* instruction) {
#define INSTRUCTION_TYPE_CAST(type, super) \
inline const H##type* HInstruction::As##type() const { \
- return Is##type() ? down_cast<const H##type*>(this) : nullptr; \
+ DCHECK(Is##type()); \
+ return down_cast<const H##type*>(this); \
} \
inline H##type* HInstruction::As##type() { \
- return Is##type() ? static_cast<H##type*>(this) : nullptr; \
+ DCHECK(Is##type()); \
+ return down_cast<H##type*>(this); \
+ } \
+ inline const H##type* HInstruction::As##type##OrNull() const { \
+ return Is##type() ? down_cast<const H##type*>(this) : nullptr; \
+ } \
+ inline H##type* HInstruction::As##type##OrNull() { \
+ return Is##type() ? down_cast<H##type*>(this) : nullptr; \
}
FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CAST)
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index 27e610328f..4b0187d536 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -105,13 +105,13 @@ class HBitwiseNegatedRight final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -160,7 +160,7 @@ class HIntermediateAddressIndex final : public HExpression<3> {
bool IsClonable() const override { return true; }
bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+ bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
return true;
}
bool IsActualObject() const override { return false; }
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 73f6c40a0d..6a60d6be01 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -1384,8 +1384,8 @@ class HVecPredWhile final : public HVecPredSetOperation {
static constexpr size_t kCondKind = HVecOperation::kNumberOfVectorOpPackedBits;
static constexpr size_t kCondKindSize =
MinimumBitsToStore(static_cast<size_t>(CondKind::kLast));
- static constexpr size_t kNumberOfVecPredConditionPackedBits = kCondKind + kCondKindSize;
- static_assert(kNumberOfVecPredConditionPackedBits <= kMaxNumberOfPackedBits,
+ static constexpr size_t kNumberOfVecPredWhilePackedBits = kCondKind + kCondKindSize;
+ static_assert(kNumberOfVecPredWhilePackedBits <= kMaxNumberOfPackedBits,
"Too many packed fields.");
using CondKindField = BitField<CondKind, kCondKind, kCondKindSize>;
@@ -1395,13 +1395,13 @@ class HVecPredWhile final : public HVecPredSetOperation {
// Evaluates the predicate condition (PCondKind) for a vector predicate; outputs
// a scalar boolean value result.
//
-// Note: as VecPredCondition can be also predicated, only active elements (determined by the
+// Note: as VecPredToBoolean can be also predicated, only active elements (determined by the
// instruction's governing predicate) of the input vector predicate are used for condition
// evaluation.
//
// Note: this instruction is currently used as a workaround for the fact that IR instructions
// can't have more than one output.
-class HVecPredCondition final : public HVecOperation {
+class HVecPredToBoolean final : public HVecOperation {
public:
// To get more info on the condition kinds please see "2.2 Process state, PSTATE" section of
// "ARM Architecture Reference Manual Supplement. The Scalable Vector Extension (SVE),
@@ -1418,13 +1418,13 @@ class HVecPredCondition final : public HVecOperation {
kEnumLast = kPLast
};
- HVecPredCondition(ArenaAllocator* allocator,
+ HVecPredToBoolean(ArenaAllocator* allocator,
HInstruction* input,
PCondKind pred_cond,
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecOperation(kVecPredCondition,
+ : HVecOperation(kVecPredToBoolean,
allocator,
packed_type,
SideEffects::None(),
@@ -1447,19 +1447,86 @@ class HVecPredCondition final : public HVecOperation {
return GetPackedField<CondKindField>();
}
- DECLARE_INSTRUCTION(VecPredCondition);
+ DECLARE_INSTRUCTION(VecPredToBoolean);
protected:
// Additional packed bits.
static constexpr size_t kCondKind = HVecOperation::kNumberOfVectorOpPackedBits;
static constexpr size_t kCondKindSize =
MinimumBitsToStore(static_cast<size_t>(PCondKind::kEnumLast));
- static constexpr size_t kNumberOfVecPredConditionPackedBits = kCondKind + kCondKindSize;
- static_assert(kNumberOfVecPredConditionPackedBits <= kMaxNumberOfPackedBits,
+ static constexpr size_t kNumberOfVecPredToBooleanPackedBits = kCondKind + kCondKindSize;
+ static_assert(kNumberOfVecPredToBooleanPackedBits <= kMaxNumberOfPackedBits,
"Too many packed fields.");
using CondKindField = BitField<PCondKind, kCondKind, kCondKindSize>;
- DEFAULT_COPY_CONSTRUCTOR(VecPredCondition);
+ DEFAULT_COPY_CONSTRUCTOR(VecPredToBoolean);
+};
+
+// Evaluates condition for pairwise elements in two input vectors and sets the result
+// as an output predicate vector.
+//
+// viz. [ p1, .. , pn ] = [ x1 OP y1 , x2 OP y2, .. , xn OP yn] where OP is CondKind
+// condition.
+//
+// Currently only kEqual is supported by this vector instruction - we don't even define
+// the kCondType here.
+// TODO: support other condition ops.
+class HVecCondition final : public HVecPredSetOperation {
+ public:
+ HVecCondition(ArenaAllocator* allocator,
+ HInstruction* left,
+ HInstruction* right,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc) :
+ HVecPredSetOperation(kVecCondition,
+ allocator,
+ packed_type,
+ SideEffects::None(),
+ /* number_of_inputs= */ 2,
+ vector_length,
+ dex_pc) {
+ DCHECK(left->IsVecOperation());
+ DCHECK(!left->IsVecPredSetOperation());
+ DCHECK(right->IsVecOperation());
+ DCHECK(!right->IsVecPredSetOperation());
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+
+ DECLARE_INSTRUCTION(VecCondition);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecCondition);
+};
+
+// Inverts every component in the predicate vector.
+//
+// viz. [ p1, .. , pn ] = [ !px1 , !px2 , .. , !pxn ].
+class HVecPredNot final : public HVecPredSetOperation {
+ public:
+ HVecPredNot(ArenaAllocator* allocator,
+ HInstruction* input,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc) :
+ HVecPredSetOperation(kVecPredNot,
+ allocator,
+ packed_type,
+ SideEffects::None(),
+ /* number_of_inputs= */ 1,
+ vector_length,
+ dex_pc) {
+ DCHECK(input->IsVecOperation());
+ DCHECK(input->IsVecPredSetOperation());
+
+ SetRawInputAt(0, input);
+ }
+
+ DECLARE_INSTRUCTION(VecPredNot);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecPredNot);
};
} // namespace art
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index e246390aa5..14d9823355 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -149,13 +149,13 @@ class HX86AndNot final : public HBinaryOperation {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+ [[maybe_unused]] HFloatConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+ [[maybe_unused]] HDoubleConstant* y) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -196,11 +196,11 @@ class HX86MaskOrResetLeastSetBit final : public HUnaryOperation {
HConstant* Evaluate(HLongConstant* x) const override {
return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HFloatConstant* x) const override {
LOG(FATAL) << DebugName() << "is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override {
+ HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x) const override {
LOG(FATAL) << DebugName() << "is not defined for double values";
UNREACHABLE();
}
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index 12e9a1046d..16045d447c 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -23,6 +23,9 @@
#ifdef ART_ENABLE_CODEGEN_arm64
#include "instruction_simplifier_arm64.h"
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+#include "critical_native_abi_fixup_riscv64.h"
+#endif
#ifdef ART_ENABLE_CODEGEN_x86
#include "pc_relative_fixups_x86.h"
#include "instruction_simplifier_x86.h"
@@ -109,6 +112,10 @@ const char* OptimizationPassName(OptimizationPass pass) {
case OptimizationPass::kInstructionSimplifierArm64:
return arm64::InstructionSimplifierArm64::kInstructionSimplifierArm64PassName;
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+ case OptimizationPass::kCriticalNativeAbiFixupRiscv64:
+ return riscv64::CriticalNativeAbiFixupRiscv64::kCriticalNativeAbiFixupRiscv64PassName;
+#endif
#ifdef ART_ENABLE_CODEGEN_x86
case OptimizationPass::kPcRelativeFixupsX86:
return x86::PcRelativeFixups::kPcRelativeFixupsX86PassName;
@@ -155,6 +162,9 @@ OptimizationPass OptimizationPassByName(const std::string& pass_name) {
#ifdef ART_ENABLE_CODEGEN_arm64
X(OptimizationPass::kInstructionSimplifierArm64);
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+ X(OptimizationPass::kCriticalNativeAbiFixupRiscv64);
+#endif
#ifdef ART_ENABLE_CODEGEN_x86
X(OptimizationPass::kPcRelativeFixupsX86);
X(OptimizationPass::kX86MemoryOperandGeneration);
@@ -290,7 +300,7 @@ ArenaVector<HOptimization*> ConstructOptimizations(
#ifdef ART_ENABLE_CODEGEN_arm
case OptimizationPass::kInstructionSimplifierArm:
DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name";
- opt = new (allocator) arm::InstructionSimplifierArm(graph, stats);
+ opt = new (allocator) arm::InstructionSimplifierArm(graph, codegen, stats);
break;
case OptimizationPass::kCriticalNativeAbiFixupArm:
DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name";
@@ -300,7 +310,13 @@ ArenaVector<HOptimization*> ConstructOptimizations(
#ifdef ART_ENABLE_CODEGEN_arm64
case OptimizationPass::kInstructionSimplifierArm64:
DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name";
- opt = new (allocator) arm64::InstructionSimplifierArm64(graph, stats);
+ opt = new (allocator) arm64::InstructionSimplifierArm64(graph, codegen, stats);
+ break;
+#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+ case OptimizationPass::kCriticalNativeAbiFixupRiscv64:
+ DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name";
+ opt = new (allocator) riscv64::CriticalNativeAbiFixupRiscv64(graph, stats);
break;
#endif
#ifdef ART_ENABLE_CODEGEN_x86
@@ -313,8 +329,8 @@ ArenaVector<HOptimization*> ConstructOptimizations(
opt = new (allocator) x86::X86MemoryOperandGeneration(graph, codegen, stats);
break;
case OptimizationPass::kInstructionSimplifierX86:
- opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats);
- break;
+ opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats);
+ break;
#endif
#ifdef ART_ENABLE_CODEGEN_x86_64
case OptimizationPass::kInstructionSimplifierX86_64:
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index 134e3cdc7a..57c5f4639c 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -93,6 +93,9 @@ enum class OptimizationPass {
#ifdef ART_ENABLE_CODEGEN_arm64
kInstructionSimplifierArm64,
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+ kCriticalNativeAbiFixupRiscv64,
+#endif
#ifdef ART_ENABLE_CODEGEN_x86
kPcRelativeFixupsX86,
kInstructionSimplifierX86,
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index f12e748941..9df4932f3c 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -35,9 +35,6 @@ namespace vixl32 = vixl::aarch32;
namespace art HIDDEN {
-// Run the tests only on host.
-#ifndef ART_TARGET_ANDROID
-
class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
public:
// Enable this flag to generate the expected outputs.
@@ -89,7 +86,7 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
void Finish() {
code_gen_->GenerateFrameExit();
- code_gen_->Finalize(&code_allocator_);
+ code_gen_->Finalize();
}
void Check(InstructionSet isa,
@@ -97,7 +94,7 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
const std::vector<uint8_t>& expected_asm,
const std::vector<uint8_t>& expected_cfi) {
// Get the outputs.
- ArrayRef<const uint8_t> actual_asm = code_allocator_.GetMemory();
+ ArrayRef<const uint8_t> actual_asm = code_gen_->GetCode();
Assembler* opt_asm = code_gen_->GetAssembler();
ArrayRef<const uint8_t> actual_cfi(*(opt_asm->cfi().data()));
@@ -123,27 +120,9 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
}
private:
- class InternalCodeAllocator : public CodeAllocator {
- public:
- InternalCodeAllocator() {}
-
- uint8_t* Allocate(size_t size) override {
- memory_.resize(size);
- return memory_.data();
- }
-
- ArrayRef<const uint8_t> GetMemory() const override { return ArrayRef<const uint8_t>(memory_); }
-
- private:
- std::vector<uint8_t> memory_;
-
- DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
- };
-
HGraph* graph_;
std::unique_ptr<CodeGenerator> code_gen_;
ArenaVector<HBasicBlock*> blocks_;
- InternalCodeAllocator code_allocator_;
};
#define TEST_ISA(isa) \
@@ -162,26 +141,15 @@ TEST_ISA(kThumb2)
#endif
#ifdef ART_ENABLE_CODEGEN_arm64
-// Run the tests for ARM64 only with Baker read barriers, as the
+// Run the tests for ARM64 only if the Marking Register is reserved as the
// expected generated code saves and restore X21 and X22 (instead of
// X20 and X21), as X20 is used as Marking Register in the Baker read
// barrier configuration, and as such is removed from the set of
// callee-save registers in the ARM64 code generator of the Optimizing
// compiler.
-//
-// We can't use compile-time macros for read-barrier as the introduction
-// of userfaultfd-GC has made it a runtime choice.
-TEST_F(OptimizingCFITest, kArm64) {
- if (kUseBakerReadBarrier && gUseReadBarrier) {
- std::vector<uint8_t> expected_asm(
- expected_asm_kArm64,
- expected_asm_kArm64 + arraysize(expected_asm_kArm64));
- std::vector<uint8_t> expected_cfi(
- expected_cfi_kArm64,
- expected_cfi_kArm64 + arraysize(expected_cfi_kArm64));
- TestImpl(InstructionSet::kArm64, "kArm64", expected_asm, expected_cfi);
- }
-}
+#if defined(RESERVE_MARKING_REGISTER)
+TEST_ISA(kArm64)
+#endif
#endif
#ifdef ART_ENABLE_CODEGEN_x86
@@ -217,6 +185,4 @@ TEST_F(OptimizingCFITest, kThumb2Adjust) {
}
#endif
-#endif // ART_TARGET_ANDROID
-
} // namespace art
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 00eb6e5c42..d458462226 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -53,6 +53,7 @@
#include "oat_quick_method_header.h"
#include "optimizing/write_barrier_elimination.h"
#include "prepare_for_register_allocation.h"
+#include "profiling_info_builder.h"
#include "reference_type_propagation.h"
#include "register_allocator_linear_scan.h"
#include "select_generator.h"
@@ -69,28 +70,6 @@ static constexpr size_t kArenaAllocatorMemoryReportThreshold = 8 * MB;
static constexpr const char* kPassNameSeparator = "$";
/**
- * Used by the code generator, to allocate the code in a vector.
- */
-class CodeVectorAllocator final : public CodeAllocator {
- public:
- explicit CodeVectorAllocator(ArenaAllocator* allocator)
- : memory_(allocator->Adapter(kArenaAllocCodeBuffer)) {}
-
- uint8_t* Allocate(size_t size) override {
- memory_.resize(size);
- return &memory_[0];
- }
-
- ArrayRef<const uint8_t> GetMemory() const override { return ArrayRef<const uint8_t>(memory_); }
- uint8_t* GetData() { return memory_.data(); }
-
- private:
- ArenaVector<uint8_t> memory_;
-
- DISALLOW_COPY_AND_ASSIGN(CodeVectorAllocator);
-};
-
-/**
* Filter to apply to the visualizer. Methods whose name contain that filter will
* be dumped.
*/
@@ -361,7 +340,6 @@ class OptimizingCompiler final : public Compiler {
// Create a 'CompiledMethod' for an optimized graph.
CompiledMethod* Emit(ArenaAllocator* allocator,
- CodeVectorAllocator* code_allocator,
CodeGenerator* codegen,
bool is_intrinsic,
const dex::CodeItem* item) const;
@@ -372,10 +350,8 @@ class OptimizingCompiler final : public Compiler {
// 1) Builds the graph. Returns null if it failed to build it.
// 2) Transforms the graph to SSA. Returns null if it failed.
// 3) Runs optimizations on the graph, including register allocator.
- // 4) Generates code with the `code_allocator` provided.
CodeGenerator* TryCompile(ArenaAllocator* allocator,
ArenaStack* arena_stack,
- CodeVectorAllocator* code_allocator,
const DexCompilationUnit& dex_compilation_unit,
ArtMethod* method,
CompilationKind compilation_kind,
@@ -383,7 +359,6 @@ class OptimizingCompiler final : public Compiler {
CodeGenerator* TryCompileIntrinsic(ArenaAllocator* allocator,
ArenaStack* arena_stack,
- CodeVectorAllocator* code_allocator,
const DexCompilationUnit& dex_compilation_unit,
ArtMethod* method,
VariableSizedHandleScope* handles) const;
@@ -440,24 +415,33 @@ void OptimizingCompiler::DumpInstructionSetFeaturesToCfg() const {
std::string isa_string =
std::string("isa:") + GetInstructionSetString(features->GetInstructionSet());
std::string features_string = "isa_features:" + features->GetFeatureString();
+ std::string read_barrier_type = "none";
+ if (compiler_options.EmitReadBarrier()) {
+ if (art::kUseBakerReadBarrier)
+ read_barrier_type = "baker";
+ else if (art::kUseTableLookupReadBarrier)
+ read_barrier_type = "tablelookup";
+ }
+ std::string read_barrier_string = ART_FORMAT("read_barrier_type:{}", read_barrier_type);
// It is assumed that visualizer_output_ is empty when calling this function, hence the fake
// compilation block containing the ISA features will be printed at the beginning of the .cfg
// file.
- *visualizer_output_
- << HGraphVisualizer::InsertMetaDataAsCompilationBlock(isa_string + ' ' + features_string);
+ *visualizer_output_ << HGraphVisualizer::InsertMetaDataAsCompilationBlock(
+ isa_string + ' ' + features_string + ' ' + read_barrier_string);
}
-bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx ATTRIBUTE_UNUSED,
- const DexFile& dex_file ATTRIBUTE_UNUSED) const {
+bool OptimizingCompiler::CanCompileMethod([[maybe_unused]] uint32_t method_idx,
+ [[maybe_unused]] const DexFile& dex_file) const {
return true;
}
static bool IsInstructionSetSupported(InstructionSet instruction_set) {
- return instruction_set == InstructionSet::kArm
- || instruction_set == InstructionSet::kArm64
- || instruction_set == InstructionSet::kThumb2
- || instruction_set == InstructionSet::kX86
- || instruction_set == InstructionSet::kX86_64;
+ return instruction_set == InstructionSet::kArm ||
+ instruction_set == InstructionSet::kArm64 ||
+ instruction_set == InstructionSet::kThumb2 ||
+ instruction_set == InstructionSet::kRiscv64 ||
+ instruction_set == InstructionSet::kX86 ||
+ instruction_set == InstructionSet::kX86_64;
}
bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph,
@@ -469,7 +453,7 @@ bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph,
case InstructionSet::kThumb2:
case InstructionSet::kArm: {
OptimizationDef arm_optimizations[] = {
- OptDef(OptimizationPass::kCriticalNativeAbiFixupArm),
+ OptDef(OptimizationPass::kCriticalNativeAbiFixupArm),
};
return RunOptimizations(graph,
codegen,
@@ -478,10 +462,22 @@ bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph,
arm_optimizations);
}
#endif
+#if defined(ART_ENABLE_CODEGEN_riscv64)
+ case InstructionSet::kRiscv64: {
+ OptimizationDef riscv64_optimizations[] = {
+ OptDef(OptimizationPass::kCriticalNativeAbiFixupRiscv64),
+ };
+ return RunOptimizations(graph,
+ codegen,
+ dex_compilation_unit,
+ pass_observer,
+ riscv64_optimizations);
+ }
+#endif
#ifdef ART_ENABLE_CODEGEN_x86
case InstructionSet::kX86: {
OptimizationDef x86_optimizations[] = {
- OptDef(OptimizationPass::kPcRelativeFixupsX86),
+ OptDef(OptimizationPass::kPcRelativeFixupsX86),
};
return RunOptimizations(graph,
codegen,
@@ -508,11 +504,11 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph,
case InstructionSet::kThumb2:
case InstructionSet::kArm: {
OptimizationDef arm_optimizations[] = {
- OptDef(OptimizationPass::kInstructionSimplifierArm),
- OptDef(OptimizationPass::kSideEffectsAnalysis),
- OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
- OptDef(OptimizationPass::kCriticalNativeAbiFixupArm),
- OptDef(OptimizationPass::kScheduling)
+ OptDef(OptimizationPass::kInstructionSimplifierArm),
+ OptDef(OptimizationPass::kSideEffectsAnalysis),
+ OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
+ OptDef(OptimizationPass::kCriticalNativeAbiFixupArm),
+ OptDef(OptimizationPass::kScheduling)
};
return RunOptimizations(graph,
codegen,
@@ -524,10 +520,10 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph,
#ifdef ART_ENABLE_CODEGEN_arm64
case InstructionSet::kArm64: {
OptimizationDef arm64_optimizations[] = {
- OptDef(OptimizationPass::kInstructionSimplifierArm64),
- OptDef(OptimizationPass::kSideEffectsAnalysis),
- OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
- OptDef(OptimizationPass::kScheduling)
+ OptDef(OptimizationPass::kInstructionSimplifierArm64),
+ OptDef(OptimizationPass::kSideEffectsAnalysis),
+ OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
+ OptDef(OptimizationPass::kScheduling)
};
return RunOptimizations(graph,
codegen,
@@ -536,14 +532,28 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph,
arm64_optimizations);
}
#endif
+#if defined(ART_ENABLE_CODEGEN_riscv64)
+ case InstructionSet::kRiscv64: {
+ OptimizationDef riscv64_optimizations[] = {
+ OptDef(OptimizationPass::kSideEffectsAnalysis),
+ OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
+ OptDef(OptimizationPass::kCriticalNativeAbiFixupRiscv64)
+ };
+ return RunOptimizations(graph,
+ codegen,
+ dex_compilation_unit,
+ pass_observer,
+ riscv64_optimizations);
+ }
+#endif
#ifdef ART_ENABLE_CODEGEN_x86
case InstructionSet::kX86: {
OptimizationDef x86_optimizations[] = {
- OptDef(OptimizationPass::kInstructionSimplifierX86),
- OptDef(OptimizationPass::kSideEffectsAnalysis),
- OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
- OptDef(OptimizationPass::kPcRelativeFixupsX86),
- OptDef(OptimizationPass::kX86MemoryOperandGeneration)
+ OptDef(OptimizationPass::kInstructionSimplifierX86),
+ OptDef(OptimizationPass::kSideEffectsAnalysis),
+ OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
+ OptDef(OptimizationPass::kPcRelativeFixupsX86),
+ OptDef(OptimizationPass::kX86MemoryOperandGeneration)
};
return RunOptimizations(graph,
codegen,
@@ -555,10 +565,10 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph,
#ifdef ART_ENABLE_CODEGEN_x86_64
case InstructionSet::kX86_64: {
OptimizationDef x86_64_optimizations[] = {
- OptDef(OptimizationPass::kInstructionSimplifierX86_64),
- OptDef(OptimizationPass::kSideEffectsAnalysis),
- OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
- OptDef(OptimizationPass::kX86MemoryOperandGeneration)
+ OptDef(OptimizationPass::kInstructionSimplifierX86_64),
+ OptDef(OptimizationPass::kSideEffectsAnalysis),
+ OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
+ OptDef(OptimizationPass::kX86MemoryOperandGeneration)
};
return RunOptimizations(graph,
codegen,
@@ -633,68 +643,68 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
}
OptimizationDef optimizations[] = {
- // Initial optimizations.
- OptDef(OptimizationPass::kConstantFolding),
- OptDef(OptimizationPass::kInstructionSimplifier),
- OptDef(OptimizationPass::kDeadCodeElimination,
- "dead_code_elimination$initial"),
- // Inlining.
- OptDef(OptimizationPass::kInliner),
- // Simplification (if inlining occurred, or if we analyzed the invoke as "always throwing").
- OptDef(OptimizationPass::kConstantFolding,
- "constant_folding$after_inlining",
- OptimizationPass::kInliner),
- OptDef(OptimizationPass::kInstructionSimplifier,
- "instruction_simplifier$after_inlining",
- OptimizationPass::kInliner),
- OptDef(OptimizationPass::kDeadCodeElimination,
- "dead_code_elimination$after_inlining",
- OptimizationPass::kInliner),
- // GVN.
- OptDef(OptimizationPass::kSideEffectsAnalysis,
- "side_effects$before_gvn"),
- OptDef(OptimizationPass::kGlobalValueNumbering),
- // Simplification (TODO: only if GVN occurred).
- OptDef(OptimizationPass::kSelectGenerator),
- OptDef(OptimizationPass::kAggressiveConstantFolding,
- "constant_folding$after_gvn"),
- OptDef(OptimizationPass::kInstructionSimplifier,
- "instruction_simplifier$after_gvn"),
- OptDef(OptimizationPass::kDeadCodeElimination,
- "dead_code_elimination$after_gvn"),
- // High-level optimizations.
- OptDef(OptimizationPass::kSideEffectsAnalysis,
- "side_effects$before_licm"),
- OptDef(OptimizationPass::kInvariantCodeMotion),
- OptDef(OptimizationPass::kInductionVarAnalysis),
- OptDef(OptimizationPass::kBoundsCheckElimination),
- OptDef(OptimizationPass::kLoopOptimization),
- // Simplification.
- OptDef(OptimizationPass::kConstantFolding,
- "constant_folding$after_loop_opt"),
- OptDef(OptimizationPass::kAggressiveInstructionSimplifier,
- "instruction_simplifier$after_loop_opt"),
- OptDef(OptimizationPass::kDeadCodeElimination,
- "dead_code_elimination$after_loop_opt"),
- // Other high-level optimizations.
- OptDef(OptimizationPass::kLoadStoreElimination),
- OptDef(OptimizationPass::kCHAGuardOptimization),
- OptDef(OptimizationPass::kCodeSinking),
- // Simplification.
- OptDef(OptimizationPass::kConstantFolding,
- "constant_folding$before_codegen"),
- // The codegen has a few assumptions that only the instruction simplifier
- // can satisfy. For example, the code generator does not expect to see a
- // HTypeConversion from a type to the same type.
- OptDef(OptimizationPass::kAggressiveInstructionSimplifier,
- "instruction_simplifier$before_codegen"),
- // Simplification may result in dead code that should be removed prior to
- // code generation.
- OptDef(OptimizationPass::kDeadCodeElimination,
- "dead_code_elimination$before_codegen"),
- // Eliminate constructor fences after code sinking to avoid
- // complicated sinking logic to split a fence with many inputs.
- OptDef(OptimizationPass::kConstructorFenceRedundancyElimination)
+ // Initial optimizations.
+ OptDef(OptimizationPass::kConstantFolding),
+ OptDef(OptimizationPass::kInstructionSimplifier),
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$initial"),
+ // Inlining.
+ OptDef(OptimizationPass::kInliner),
+ // Simplification (if inlining occurred, or if we analyzed the invoke as "always throwing").
+ OptDef(OptimizationPass::kConstantFolding,
+ "constant_folding$after_inlining",
+ OptimizationPass::kInliner),
+ OptDef(OptimizationPass::kInstructionSimplifier,
+ "instruction_simplifier$after_inlining",
+ OptimizationPass::kInliner),
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$after_inlining",
+ OptimizationPass::kInliner),
+ // GVN.
+ OptDef(OptimizationPass::kSideEffectsAnalysis,
+ "side_effects$before_gvn"),
+ OptDef(OptimizationPass::kGlobalValueNumbering),
+ // Simplification (TODO: only if GVN occurred).
+ OptDef(OptimizationPass::kSelectGenerator),
+ OptDef(OptimizationPass::kAggressiveConstantFolding,
+ "constant_folding$after_gvn"),
+ OptDef(OptimizationPass::kInstructionSimplifier,
+ "instruction_simplifier$after_gvn"),
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$after_gvn"),
+ // High-level optimizations.
+ OptDef(OptimizationPass::kSideEffectsAnalysis,
+ "side_effects$before_licm"),
+ OptDef(OptimizationPass::kInvariantCodeMotion),
+ OptDef(OptimizationPass::kInductionVarAnalysis),
+ OptDef(OptimizationPass::kBoundsCheckElimination),
+ OptDef(OptimizationPass::kLoopOptimization),
+ // Simplification.
+ OptDef(OptimizationPass::kConstantFolding,
+ "constant_folding$after_loop_opt"),
+ OptDef(OptimizationPass::kAggressiveInstructionSimplifier,
+ "instruction_simplifier$after_loop_opt"),
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$after_loop_opt"),
+ // Other high-level optimizations.
+ OptDef(OptimizationPass::kLoadStoreElimination),
+ OptDef(OptimizationPass::kCHAGuardOptimization),
+ OptDef(OptimizationPass::kCodeSinking),
+ // Simplification.
+ OptDef(OptimizationPass::kConstantFolding,
+ "constant_folding$before_codegen"),
+ // The codegen has a few assumptions that only the instruction simplifier
+ // can satisfy. For example, the code generator does not expect to see a
+ // HTypeConversion from a type to the same type.
+ OptDef(OptimizationPass::kAggressiveInstructionSimplifier,
+ "instruction_simplifier$before_codegen"),
+ // Simplification may result in dead code that should be removed prior to
+ // code generation.
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$before_codegen"),
+ // Eliminate constructor fences after code sinking to avoid
+ // complicated sinking logic to split a fence with many inputs.
+ OptDef(OptimizationPass::kConstructorFenceRedundancyElimination)
};
RunOptimizations(graph,
codegen,
@@ -719,7 +729,6 @@ static ArenaVector<linker::LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator*
}
CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator,
- CodeVectorAllocator* code_allocator,
CodeGenerator* codegen,
bool is_intrinsic,
const dex::CodeItem* code_item_for_osr_check) const {
@@ -729,7 +738,7 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator,
CompiledCodeStorage* storage = GetCompiledCodeStorage();
CompiledMethod* compiled_method = storage->CreateCompiledMethod(
codegen->GetInstructionSet(),
- code_allocator->GetMemory(),
+ codegen->GetCode(),
ArrayRef<const uint8_t>(stack_map),
ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
ArrayRef<const linker::LinkerPatch>(linker_patches),
@@ -749,7 +758,6 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator,
CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
ArenaStack* arena_stack,
- CodeVectorAllocator* code_allocator,
const DexCompilationUnit& dex_compilation_unit,
ArtMethod* method,
CompilationKind compilation_kind,
@@ -828,8 +836,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
jit::Jit* jit = Runtime::Current()->GetJit();
if (jit != nullptr) {
ProfilingInfo* info = jit->GetCodeCache()->GetProfilingInfo(method, Thread::Current());
- DCHECK_IMPLIES(compilation_kind == CompilationKind::kBaseline, info != nullptr)
- << "Compiling a method baseline should always have a ProfilingInfo";
graph->SetProfilingInfo(info);
}
@@ -913,8 +919,23 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
&pass_observer,
regalloc_strategy,
compilation_stats_.get());
+ // If we are compiling baseline and we haven't created a profiling info for
+ // this method already, do it now.
+ if (jit != nullptr &&
+ compilation_kind == CompilationKind::kBaseline &&
+ graph->GetProfilingInfo() == nullptr) {
+ ProfilingInfoBuilder(
+ graph, codegen->GetCompilerOptions(), codegen.get(), compilation_stats_.get()).Run();
+ // We expect a profiling info to be created and attached to the graph.
+ // However, we may have run out of memory trying to create it, so in this
+ // case just abort the compilation.
+ if (graph->GetProfilingInfo() == nullptr) {
+ MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit);
+ return nullptr;
+ }
+ }
- codegen->Compile(code_allocator);
+ codegen->Compile();
pass_observer.DumpDisassembly();
MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledBytecode);
@@ -924,7 +945,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
ArenaAllocator* allocator,
ArenaStack* arena_stack,
- CodeVectorAllocator* code_allocator,
const DexCompilationUnit& dex_compilation_unit,
ArtMethod* method,
VariableSizedHandleScope* handles) const {
@@ -986,9 +1006,9 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
}
OptimizationDef optimizations[] = {
- // The codegen has a few assumptions that only the instruction simplifier
- // can satisfy.
- OptDef(OptimizationPass::kInstructionSimplifier),
+ // The codegen has a few assumptions that only the instruction simplifier
+ // can satisfy.
+ OptDef(OptimizationPass::kInstructionSimplifier),
};
RunOptimizations(graph,
codegen.get(),
@@ -1013,7 +1033,7 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
return nullptr;
}
- codegen->Compile(code_allocator);
+ codegen->Compile();
pass_observer.DumpDisassembly();
VLOG(compiler) << "Compiled intrinsic: " << method->GetIntrinsic()
@@ -1037,7 +1057,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
DCHECK(runtime->IsAotCompiler());
ArenaAllocator allocator(runtime->GetArenaPool());
ArenaStack arena_stack(runtime->GetArenaPool());
- CodeVectorAllocator code_allocator(&allocator);
std::unique_ptr<CodeGenerator> codegen;
bool compiled_intrinsic = false;
{
@@ -1071,7 +1090,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
codegen.reset(
TryCompileIntrinsic(&allocator,
&arena_stack,
- &code_allocator,
dex_compilation_unit,
method,
&handles));
@@ -1083,7 +1101,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
codegen.reset(
TryCompile(&allocator,
&arena_stack,
- &code_allocator,
dex_compilation_unit,
method,
compiler_options.IsBaseline()
@@ -1094,7 +1111,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
}
if (codegen.get() != nullptr) {
compiled_method = Emit(&allocator,
- &code_allocator,
codegen.get(),
compiled_intrinsic,
compiled_intrinsic ? nullptr : code_item);
@@ -1177,19 +1193,16 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
/*verified_method=*/ nullptr,
dex_cache,
compiling_class);
- CodeVectorAllocator code_allocator(&allocator);
// Go to native so that we don't block GC during compilation.
ScopedThreadSuspension sts(soa.Self(), ThreadState::kNative);
std::unique_ptr<CodeGenerator> codegen(
TryCompileIntrinsic(&allocator,
&arena_stack,
- &code_allocator,
dex_compilation_unit,
method,
&handles));
if (codegen != nullptr) {
return Emit(&allocator,
- &code_allocator,
codegen.get(),
/*is_intrinsic=*/ true,
/*item=*/ nullptr);
@@ -1221,7 +1234,7 @@ Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options,
return new OptimizingCompiler(compiler_options, storage);
}
-bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) {
+bool EncodeArtMethodInInlineInfo([[maybe_unused]] ArtMethod* method) {
// Note: the runtime is null only for unit testing.
return Runtime::Current() == nullptr || !Runtime::Current()->IsAotCompiler();
}
@@ -1328,7 +1341,6 @@ bool OptimizingCompiler::JitCompile(Thread* self,
debug_info,
/* is_full_debug_info= */ compiler_options.GetGenerateDebugInfo(),
compilation_kind,
- /* has_should_deoptimize_flag= */ false,
cha_single_implementation_list)) {
code_cache->Free(self, region, reserved_code.data(), reserved_data.data());
return false;
@@ -1342,7 +1354,6 @@ bool OptimizingCompiler::JitCompile(Thread* self,
}
ArenaStack arena_stack(runtime->GetJitArenaPool());
- CodeVectorAllocator code_allocator(&allocator);
VariableSizedHandleScope handles(self);
std::unique_ptr<CodeGenerator> codegen;
@@ -1365,7 +1376,6 @@ bool OptimizingCompiler::JitCompile(Thread* self,
codegen.reset(
TryCompile(&allocator,
&arena_stack,
- &code_allocator,
dex_compilation_unit,
method,
compilation_kind,
@@ -1381,7 +1391,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
ArrayRef<const uint8_t> reserved_data;
if (!code_cache->Reserve(self,
region,
- code_allocator.GetMemory().size(),
+ codegen->GetAssembler()->CodeSize(),
stack_map.size(),
/*number_of_roots=*/codegen->GetNumberOfJitRoots(),
method,
@@ -1394,7 +1404,9 @@ bool OptimizingCompiler::JitCompile(Thread* self,
const uint8_t* roots_data = reserved_data.data();
std::vector<Handle<mirror::Object>> roots;
- codegen->EmitJitRoots(code_allocator.GetData(), roots_data, &roots);
+ codegen->EmitJitRoots(const_cast<uint8_t*>(codegen->GetAssembler()->CodeBufferBaseAddress()),
+ roots_data,
+ &roots);
// The root Handle<>s filled by the codegen reference entries in the VariableSizedHandleScope.
DCHECK(std::all_of(roots.begin(),
roots.end(),
@@ -1418,7 +1430,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
info.is_optimized = true;
info.is_code_address_text_relative = false;
info.code_address = reinterpret_cast<uintptr_t>(code);
- info.code_size = code_allocator.GetMemory().size();
+ info.code_size = codegen->GetAssembler()->CodeSize(),
info.frame_size_in_bytes = codegen->GetFrameSize();
info.code_info = stack_map.size() == 0 ? nullptr : stack_map.data();
info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data());
@@ -1429,22 +1441,23 @@ bool OptimizingCompiler::JitCompile(Thread* self,
region,
method,
reserved_code,
- code_allocator.GetMemory(),
+ codegen->GetCode(),
reserved_data,
roots,
ArrayRef<const uint8_t>(stack_map),
debug_info,
/* is_full_debug_info= */ compiler_options.GetGenerateDebugInfo(),
compilation_kind,
- codegen->GetGraph()->HasShouldDeoptimizeFlag(),
codegen->GetGraph()->GetCHASingleImplementationList())) {
+ CHECK_EQ(CodeInfo::HasShouldDeoptimizeFlag(stack_map.data()),
+ codegen->GetGraph()->HasShouldDeoptimizeFlag());
code_cache->Free(self, region, reserved_code.data(), reserved_data.data());
return false;
}
Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed());
if (jit_logger != nullptr) {
- jit_logger->WriteLog(code, code_allocator.GetMemory().size(), method);
+ jit_logger->WriteLog(code, codegen->GetAssembler()->CodeSize(), method);
}
if (kArenaAllocatorCountAllocations) {
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index a1d0a5a845..4549af3cbf 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -47,6 +47,7 @@ enum class MethodCompilationStat {
kUnresolvedFieldNotAFastAccess,
kRemovedCheckedCast,
kRemovedDeadInstruction,
+ kRemovedDeadPhi,
kRemovedTry,
kRemovedNullCheck,
kNotCompiledSkipped,
@@ -130,8 +131,6 @@ enum class MethodCompilationStat {
kPartialLSEPossible,
kPartialStoreRemoved,
kPartialAllocationMoved,
- kPredicatedLoadAdded,
- kPredicatedStoreAdded,
kDevirtualized,
kLastStat
};
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 2e05c41f01..77e6420df8 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -244,7 +244,6 @@ class OptimizingUnitTestHelper {
auto container =
std::make_shared<MemoryDexFileContainer>(dex_data, sizeof(StandardDexFile::Header));
dex_files_.emplace_back(new StandardDexFile(dex_data,
- sizeof(StandardDexFile::Header),
"no_location",
/*location_checksum*/ 0,
/*oat_dex_file*/ nullptr,
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index a1c05e9cad..d2b993280d 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -81,8 +81,8 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap {
message_ << ")";
}
- void SpillScratch(int reg ATTRIBUTE_UNUSED) override {}
- void RestoreScratch(int reg ATTRIBUTE_UNUSED) override {}
+ void SpillScratch([[maybe_unused]] int reg) override {}
+ void RestoreScratch([[maybe_unused]] int reg) override {}
std::string GetMessage() const {
return message_.str();
@@ -126,7 +126,7 @@ class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap {
return scratch;
}
- void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) override {}
+ void FreeScratchLocation([[maybe_unused]] Location loc) override {}
void EmitMove(size_t index) override {
MoveOperands* move = moves_[index];
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index d3da3d3ce1..c2d5ec7b60 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -62,7 +62,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor {
}
void VisitReturn(HReturn* ret) override {
- HConstant* value = ret->InputAt(0)->AsConstant();
+ HConstant* value = ret->InputAt(0)->AsConstantOrNull();
if ((value != nullptr && DataType::IsFloatingPointType(value->GetType()))) {
ReplaceInput(ret, value, 0, true);
}
@@ -95,7 +95,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor {
}
void BinaryFP(HBinaryOperation* bin) {
- HConstant* rhs = bin->InputAt(1)->AsConstant();
+ HConstant* rhs = bin->InputAt(1)->AsConstantOrNull();
if (rhs != nullptr && DataType::IsFloatingPointType(rhs->GetType())) {
ReplaceInput(bin, rhs, 1, false);
}
@@ -193,7 +193,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor {
}
void HandleInvoke(HInvoke* invoke) {
- HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
+ HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirectOrNull();
// If this is an invoke-static/-direct with PC-relative addressing (within boot image
// or using .bss or .data.bimg.rel.ro), we need the PC-relative address base.
@@ -207,7 +207,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor {
base_added = true;
}
- HInvokeInterface* invoke_interface = invoke->AsInvokeInterface();
+ HInvokeInterface* invoke_interface = invoke->AsInvokeInterfaceOrNull();
if (invoke_interface != nullptr &&
IsPcRelativeMethodLoadKind(invoke_interface->GetHiddenArgumentLoadKind())) {
HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(invoke);
@@ -219,7 +219,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor {
// Ensure that we can load FP arguments from the constant area.
HInputsRef inputs = invoke->GetInputs();
for (size_t i = 0; i < inputs.size(); i++) {
- HConstant* input = inputs[i]->AsConstant();
+ HConstant* input = inputs[i]->AsConstantOrNull();
if (input != nullptr && DataType::IsFloatingPointType(input->GetType())) {
ReplaceInput(invoke, input, i, true);
}
@@ -235,6 +235,9 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor {
LOG(FATAL) << "Unreachable min/max/abs: intrinsics should have been lowered "
"to IR nodes by instruction simplifier";
UNREACHABLE();
+ case Intrinsics::kByteValueOf:
+ case Intrinsics::kShortValueOf:
+ case Intrinsics::kCharacterValueOf:
case Intrinsics::kIntegerValueOf:
// This intrinsic can be call free if it loads the address of the boot image object.
// If we're compiling PIC, we need the address base for loading from .data.bimg.rel.ro.
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 398b10abf3..1e99732d03 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -180,6 +180,11 @@ bool PrepareForRegisterAllocation::CanEmitConditionAt(HCondition* condition,
return false;
}
+ if (GetGraph()->IsCompilingBaseline() && compiler_options_.ProfileBranches()) {
+ // To do branch profiling, we cannot emit conditions at use site.
+ return false;
+ }
+
if (user->IsIf() || user->IsDeoptimize()) {
return true;
}
diff --git a/compiler/optimizing/profiling_info_builder.cc b/compiler/optimizing/profiling_info_builder.cc
new file mode 100644
index 0000000000..7888753830
--- /dev/null
+++ b/compiler/optimizing/profiling_info_builder.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "profiling_info_builder.h"
+
+#include "art_method-inl.h"
+#include "code_generator.h"
+#include "driver/compiler_options.h"
+#include "dex/code_item_accessors-inl.h"
+#include "jit/profiling_info.h"
+#include "optimizing_compiler_stats.h"
+#include "scoped_thread_state_change-inl.h"
+
+namespace art HIDDEN {
+
+void ProfilingInfoBuilder::Run() {
+ DCHECK_EQ(GetGraph()->GetProfilingInfo(), nullptr);
+ // Order does not matter.
+ for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
+ // No need to visit the phis.
+ for (HInstructionIteratorHandleChanges inst_it(block->GetInstructions()); !inst_it.Done();
+ inst_it.Advance()) {
+ inst_it.Current()->Accept(this);
+ }
+ }
+
+ ScopedObjectAccess soa(Thread::Current());
+ GetGraph()->SetProfilingInfo(
+ ProfilingInfo::Create(soa.Self(), GetGraph()->GetArtMethod(), inline_caches_));
+}
+
+void ProfilingInfoBuilder::HandleInvoke(HInvoke* invoke) {
+ DCHECK(!invoke->GetEnvironment()->IsFromInlinedInvoke());
+ if (IsInlineCacheUseful(invoke, codegen_)) {
+ inline_caches_.push_back(invoke->GetDexPc());
+ }
+}
+
+void ProfilingInfoBuilder::VisitInvokeInterface(HInvokeInterface* invoke) {
+ HandleInvoke(invoke);
+}
+
+void ProfilingInfoBuilder::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+ HandleInvoke(invoke);
+}
+
+bool ProfilingInfoBuilder::IsInlineCacheUseful(HInvoke* invoke, CodeGenerator* codegen) {
+ DCHECK(invoke->IsInvokeVirtual() || invoke->IsInvokeInterface());
+ if (codegen->IsImplementedIntrinsic(invoke)) {
+ return false;
+ }
+ if (!invoke->GetBlock()->GetGraph()->IsCompilingBaseline()) {
+ return false;
+ }
+ if (Runtime::Current()->IsAotCompiler()) {
+ return false;
+ }
+ if (invoke->InputAt(0)->GetReferenceTypeInfo().IsExact()) {
+ return false;
+ }
+ if (invoke->GetResolvedMethod() != nullptr) {
+ ScopedObjectAccess soa(Thread::Current());
+ if (invoke->GetResolvedMethod()->IsFinal() ||
+ invoke->GetResolvedMethod()->GetDeclaringClass()->IsFinal()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+InlineCache* ProfilingInfoBuilder::GetInlineCache(ProfilingInfo* info, HInvoke* instruction) {
+ DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
+ ScopedObjectAccess soa(Thread::Current());
+ return info->GetInlineCache(instruction->GetDexPc());
+}
+
+} // namespace art
diff --git a/compiler/optimizing/profiling_info_builder.h b/compiler/optimizing/profiling_info_builder.h
new file mode 100644
index 0000000000..2185b0eed3
--- /dev/null
+++ b/compiler/optimizing/profiling_info_builder.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_PROFILING_INFO_BUILDER_H_
+#define ART_COMPILER_OPTIMIZING_PROFILING_INFO_BUILDER_H_
+
+#include "base/macros.h"
+#include "nodes.h"
+
+namespace art HIDDEN {
+
+class CodeGenerator;
+class CompilerOptions;
+class InlineCache;
+class ProfilingInfo;
+
+class ProfilingInfoBuilder : public HGraphDelegateVisitor {
+ public:
+ ProfilingInfoBuilder(HGraph* graph,
+ const CompilerOptions& compiler_options,
+ CodeGenerator* codegen,
+ OptimizingCompilerStats* stats = nullptr)
+ : HGraphDelegateVisitor(graph, stats),
+ codegen_(codegen),
+ compiler_options_(compiler_options) {}
+
+ void Run();
+
+ static constexpr const char* kProfilingInfoBuilderPassName =
+ "profiling_info_builder";
+
+ static InlineCache* GetInlineCache(ProfilingInfo* info, HInvoke* invoke);
+ static bool IsInlineCacheUseful(HInvoke* invoke, CodeGenerator* codegen);
+
+ private:
+ void VisitInvokeVirtual(HInvokeVirtual* invoke) override;
+ void VisitInvokeInterface(HInvokeInterface* invoke) override;
+
+ void HandleInvoke(HInvoke* invoke);
+
+ CodeGenerator* codegen_;
+ [[maybe_unused]] const CompilerOptions& compiler_options_;
+ std::vector<uint32_t> inline_caches_;
+
+ DISALLOW_COPY_AND_ASSIGN(ProfilingInfoBuilder);
+};
+
+} // namespace art
+
+
+#endif // ART_COMPILER_OPTIMIZING_PROFILING_INFO_BUILDER_H_
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 91bae5f49b..6f44d45ed4 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -63,7 +63,6 @@ class ReferenceTypePropagation::RTPVisitor final : public HGraphDelegateVisitor
void VisitLoadException(HLoadException* instr) override;
void VisitNewArray(HNewArray* instr) override;
void VisitParameterValue(HParameterValue* instr) override;
- void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* instr) override;
void VisitInstanceFieldGet(HInstanceFieldGet* instr) override;
void VisitStaticFieldGet(HStaticFieldGet* instr) override;
void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) override;
@@ -254,7 +253,7 @@ static void BoundTypeForClassCheck(HInstruction* check) {
HInstruction* input_two = compare->InputAt(1);
HLoadClass* load_class = input_one->IsLoadClass()
? input_one->AsLoadClass()
- : input_two->AsLoadClass();
+ : input_two->AsLoadClassOrNull();
if (load_class == nullptr) {
return;
}
@@ -266,7 +265,7 @@ static void BoundTypeForClassCheck(HInstruction* check) {
}
HInstruction* field_get = (load_class == input_one) ? input_two : input_one;
- if (!field_get->IsInstanceFieldGet() && !field_get->IsPredicatedInstanceFieldGet()) {
+ if (!field_get->IsInstanceFieldGet()) {
return;
}
HInstruction* receiver = field_get->InputAt(0);
@@ -335,7 +334,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitBasicBlock(HBasicBlock* block) {
}
void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfNotNull(HBasicBlock* block) {
- HIf* ifInstruction = block->GetLastInstruction()->AsIf();
+ HIf* ifInstruction = block->GetLastInstruction()->AsIfOrNull();
if (ifInstruction == nullptr) {
return;
}
@@ -453,7 +452,7 @@ static bool MatchIfInstanceOf(HIf* ifInstruction,
// If that's the case insert an HBoundType instruction to bound the type of `x`
// to `ClassX` in the scope of the dominated blocks.
void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock* block) {
- HIf* ifInstruction = block->GetLastInstruction()->AsIf();
+ HIf* ifInstruction = block->GetLastInstruction()->AsIfOrNull();
if (ifInstruction == nullptr) {
return;
}
@@ -539,9 +538,14 @@ void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction*
DCHECK_EQ(instr->GetType(), DataType::Type::kReference);
ScopedObjectAccess soa(Thread::Current());
- ObjPtr<mirror::DexCache> dex_cache = FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_);
- ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->LookupResolvedType(
- type_idx, dex_cache, dex_cache->GetClassLoader());
+ StackHandleScope<2> hs(soa.Self());
+ Handle<mirror::DexCache> dex_cache =
+ hs.NewHandle(FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_));
+ Handle<mirror::ClassLoader> loader = hs.NewHandle(dex_cache->GetClassLoader());
+ ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->ResolveType(
+ type_idx, dex_cache, loader);
+ DCHECK_EQ(klass == nullptr, soa.Self()->IsExceptionPending());
+ soa.Self()->ClearException(); // Clean up the exception left by type resolution if any.
SetClassAsTypeInfo(instr, klass, is_exact);
}
@@ -582,11 +586,6 @@ void ReferenceTypePropagation::RTPVisitor::UpdateFieldAccessTypeInfo(HInstructio
SetClassAsTypeInfo(instr, klass, /* is_exact= */ false);
}
-void ReferenceTypePropagation::RTPVisitor::VisitPredicatedInstanceFieldGet(
- HPredicatedInstanceFieldGet* instr) {
- UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo());
-}
-
void ReferenceTypePropagation::RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) {
UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo());
}
@@ -704,7 +703,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) {
}
void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) {
- HBoundType* bound_type = check_cast->GetNext()->AsBoundType();
+ HBoundType* bound_type = check_cast->GetNext()->AsBoundTypeOrNull();
if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) {
// The next instruction is not an uninitialized BoundType. This must be
// an RTP pass after SsaBuilder and we do not need to do anything.
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
index 2b012fcd67..ffd94e56b5 100644
--- a/compiler/optimizing/reference_type_propagation_test.cc
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -468,7 +468,7 @@ TEST_P(LoopReferenceTypePropagationTestGroup, RunVisitTest) {
LoopOptions lo(GetParam());
std::default_random_engine g(
lo.initial_null_state_ != InitialNullState::kTrueRandom ? 42 : std::rand());
- std::uniform_int_distribution<bool> uid(false, true);
+ std::uniform_int_distribution<int> uid(0, 1);
RunVisitListTest([&](std::vector<HInstruction*>& lst, HInstruction* null_input) {
auto pred_null = false;
auto next_null = [&]() {
@@ -482,7 +482,7 @@ TEST_P(LoopReferenceTypePropagationTestGroup, RunVisitTest) {
return pred_null;
case InitialNullState::kRandomSetSeed:
case InitialNullState::kTrueRandom:
- return uid(g);
+ return uid(g) > 0;
}
};
HPhi* nulled_phi = lo.null_insertion_ >= 0 ? lst[lo.null_insertion_]->AsPhi() : nullptr;
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 53e11f2c3d..a4b1698b8d 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -531,9 +531,9 @@ void RegisterAllocationResolver::AddInputMoveFor(HInstruction* input,
HInstruction* previous = user->GetPrevious();
HParallelMove* move = nullptr;
- if (previous == nullptr
- || !previous->IsParallelMove()
- || previous->GetLifetimePosition() < user->GetLifetimePosition()) {
+ if (previous == nullptr ||
+ !previous->IsParallelMove() ||
+ previous->GetLifetimePosition() < user->GetLifetimePosition()) {
move = new (allocator_) HParallelMove(allocator_);
move->SetLifetimePosition(user->GetLifetimePosition());
user->GetBlock()->InsertInstructionBefore(move, user);
@@ -593,7 +593,7 @@ void RegisterAllocationResolver::InsertParallelMoveAt(size_t position,
} else if (IsInstructionEnd(position)) {
// Move must happen after the instruction.
DCHECK(!at->IsControlFlow());
- move = at->GetNext()->AsParallelMove();
+ move = at->GetNext()->AsParallelMoveOrNull();
// This is a parallel move for connecting siblings in a same block. We need to
// differentiate it with moves for connecting blocks, and input moves.
if (move == nullptr || move->GetLifetimePosition() > position) {
@@ -604,15 +604,15 @@ void RegisterAllocationResolver::InsertParallelMoveAt(size_t position,
} else {
// Move must happen before the instruction.
HInstruction* previous = at->GetPrevious();
- if (previous == nullptr
- || !previous->IsParallelMove()
- || previous->GetLifetimePosition() != position) {
+ if (previous == nullptr ||
+ !previous->IsParallelMove() ||
+ previous->GetLifetimePosition() != position) {
// If the previous is a parallel move, then its position must be lower
// than the given `position`: it was added just after the non-parallel
// move instruction that precedes `instruction`.
- DCHECK(previous == nullptr
- || !previous->IsParallelMove()
- || previous->GetLifetimePosition() < position);
+ DCHECK(previous == nullptr ||
+ !previous->IsParallelMove() ||
+ previous->GetLifetimePosition() < position);
move = new (allocator_) HParallelMove(allocator_);
move->SetLifetimePosition(position);
at->GetBlock()->InsertInstructionBefore(move, at);
@@ -643,8 +643,9 @@ void RegisterAllocationResolver::InsertParallelMoveAtExitOf(HBasicBlock* block,
// This is a parallel move for connecting blocks. We need to differentiate
// it with moves for connecting siblings in a same block, and output moves.
size_t position = last->GetLifetimePosition();
- if (previous == nullptr || !previous->IsParallelMove()
- || previous->AsParallelMove()->GetLifetimePosition() != position) {
+ if (previous == nullptr ||
+ !previous->IsParallelMove() ||
+ previous->AsParallelMove()->GetLifetimePosition() != position) {
move = new (allocator_) HParallelMove(allocator_);
move->SetLifetimePosition(position);
block->InsertInstructionBefore(move, last);
@@ -662,7 +663,7 @@ void RegisterAllocationResolver::InsertParallelMoveAtEntryOf(HBasicBlock* block,
if (source.Equals(destination)) return;
HInstruction* first = block->GetFirstInstruction();
- HParallelMove* move = first->AsParallelMove();
+ HParallelMove* move = first->AsParallelMoveOrNull();
size_t position = block->GetLifetimeStart();
// This is a parallel move for connecting blocks. We need to differentiate
// it with moves for connecting siblings in a same block, and input moves.
@@ -686,7 +687,7 @@ void RegisterAllocationResolver::InsertMoveAfter(HInstruction* instruction,
}
size_t position = instruction->GetLifetimePosition() + 1;
- HParallelMove* move = instruction->GetNext()->AsParallelMove();
+ HParallelMove* move = instruction->GetNext()->AsParallelMoveOrNull();
// This is a parallel move for moving the output of an instruction. We need
// to differentiate with input moves, moves for connecting siblings in a
// and moves for connecting blocks.
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index e4c2d74908..f8b057d4a8 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -23,7 +23,6 @@
#include "base/scoped_arena_containers.h"
#include "base/bit_vector-inl.h"
#include "code_generator.h"
-#include "register_allocator_graph_color.h"
#include "register_allocator_linear_scan.h"
#include "ssa_liveness_analysis.h"
@@ -45,8 +44,8 @@ std::unique_ptr<RegisterAllocator> RegisterAllocator::Create(ScopedArenaAllocato
return std::unique_ptr<RegisterAllocator>(
new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis));
case kRegisterAllocatorGraphColor:
- return std::unique_ptr<RegisterAllocator>(
- new (allocator) RegisterAllocatorGraphColor(allocator, codegen, analysis));
+ LOG(FATAL) << "Graph coloring register allocator has been removed.";
+ UNREACHABLE();
default:
LOG(FATAL) << "Invalid register allocation strategy: " << strategy;
UNREACHABLE();
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
deleted file mode 100644
index a7c891d4e7..0000000000
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ /dev/null
@@ -1,2086 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "register_allocator_graph_color.h"
-
-#include "code_generator.h"
-#include "linear_order.h"
-#include "register_allocation_resolver.h"
-#include "ssa_liveness_analysis.h"
-#include "thread-current-inl.h"
-
-namespace art HIDDEN {
-
-// Highest number of registers that we support for any platform. This can be used for std::bitset,
-// for example, which needs to know its size at compile time.
-static constexpr size_t kMaxNumRegs = 32;
-
-// The maximum number of graph coloring attempts before triggering a DCHECK.
-// This is meant to catch changes to the graph coloring algorithm that undermine its forward
-// progress guarantees. Forward progress for the algorithm means splitting live intervals on
-// every graph coloring attempt so that eventually the interference graph will be sparse enough
-// to color. The main threat to forward progress is trying to split short intervals which cannot be
-// split further; this could cause infinite looping because the interference graph would never
-// change. This is avoided by prioritizing short intervals before long ones, so that long
-// intervals are split when coloring fails.
-static constexpr size_t kMaxGraphColoringAttemptsDebug = 100;
-
-// We always want to avoid spilling inside loops.
-static constexpr size_t kLoopSpillWeightMultiplier = 10;
-
-// If we avoid moves in single jump blocks, we can avoid jumps to jumps.
-static constexpr size_t kSingleJumpBlockWeightMultiplier = 2;
-
-// We avoid moves in blocks that dominate the exit block, since these blocks will
-// be executed on every path through the method.
-static constexpr size_t kDominatesExitBlockWeightMultiplier = 2;
-
-enum class CoalesceKind {
- kAdjacentSibling, // Prevents moves at interval split points.
- kFixedOutputSibling, // Prevents moves from a fixed output location.
- kFixedInput, // Prevents moves into a fixed input location.
- kNonlinearControlFlow, // Prevents moves between blocks.
- kPhi, // Prevents phi resolution moves.
- kFirstInput, // Prevents a single input move.
- kAnyInput, // May lead to better instruction selection / smaller encodings.
-};
-
-std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) {
- return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind);
-}
-
-static size_t LoopDepthAt(HBasicBlock* block) {
- HLoopInformation* loop_info = block->GetLoopInformation();
- size_t depth = 0;
- while (loop_info != nullptr) {
- ++depth;
- loop_info = loop_info->GetPreHeader()->GetLoopInformation();
- }
- return depth;
-}
-
-// Return the runtime cost of inserting a move instruction at the specified location.
-static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) {
- HBasicBlock* block = liveness.GetBlockFromPosition(position / 2);
- DCHECK(block != nullptr);
- size_t cost = 1;
- if (block->IsSingleJump()) {
- cost *= kSingleJumpBlockWeightMultiplier;
- }
- if (block->Dominates(block->GetGraph()->GetExitBlock())) {
- cost *= kDominatesExitBlockWeightMultiplier;
- }
- for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) {
- cost *= kLoopSpillWeightMultiplier;
- }
- return cost;
-}
-
-// In general, we estimate coalesce priority by whether it will definitely avoid a move,
-// and by how likely it is to create an interference graph that's harder to color.
-static size_t ComputeCoalescePriority(CoalesceKind kind,
- size_t position,
- const SsaLivenessAnalysis& liveness) {
- if (kind == CoalesceKind::kAnyInput) {
- // This type of coalescing can affect instruction selection, but not moves, so we
- // give it the lowest priority.
- return 0;
- } else {
- return CostForMoveAt(position, liveness);
- }
-}
-
-enum class CoalesceStage {
- kWorklist, // Currently in the iterative coalescing worklist.
- kActive, // Not in a worklist, but could be considered again during iterative coalescing.
- kInactive, // No longer considered until last-chance coalescing.
- kDefunct, // Either the two nodes interfere, or have already been coalesced.
-};
-
-std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) {
- return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage);
-}
-
-// Represents a coalesce opportunity between two nodes.
-struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> {
- CoalesceOpportunity(InterferenceNode* a,
- InterferenceNode* b,
- CoalesceKind kind,
- size_t position,
- const SsaLivenessAnalysis& liveness)
- : node_a(a),
- node_b(b),
- stage(CoalesceStage::kWorklist),
- priority(ComputeCoalescePriority(kind, position, liveness)) {}
-
- // Compare two coalesce opportunities based on their priority.
- // Return true if lhs has a lower priority than that of rhs.
- static bool CmpPriority(const CoalesceOpportunity* lhs,
- const CoalesceOpportunity* rhs) {
- return lhs->priority < rhs->priority;
- }
-
- InterferenceNode* const node_a;
- InterferenceNode* const node_b;
-
- // The current stage of this coalesce opportunity, indicating whether it is in a worklist,
- // and whether it should still be considered.
- CoalesceStage stage;
-
- // The priority of this coalesce opportunity, based on heuristics.
- const size_t priority;
-};
-
-enum class NodeStage {
- kInitial, // Uninitialized.
- kPrecolored, // Marks fixed nodes.
- kSafepoint, // Marks safepoint nodes.
- kPrunable, // Marks uncolored nodes in the interference graph.
- kSimplifyWorklist, // Marks non-move-related nodes with degree less than the number of registers.
- kFreezeWorklist, // Marks move-related nodes with degree less than the number of registers.
- kSpillWorklist, // Marks nodes with degree greater or equal to the number of registers.
- kPruned // Marks nodes already pruned from the interference graph.
-};
-
-std::ostream& operator<<(std::ostream& os, const NodeStage& stage) {
- return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage);
-}
-
-// Returns the estimated cost of spilling a particular live interval.
-static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) {
- if (interval->HasRegister()) {
- // Intervals with a fixed register cannot be spilled.
- return std::numeric_limits<float>::min();
- }
-
- size_t length = interval->GetLength();
- if (length == 1) {
- // Tiny intervals should have maximum priority, since they cannot be split any further.
- return std::numeric_limits<float>::max();
- }
-
- size_t use_weight = 0;
- if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) {
- // Cost for spilling at a register definition point.
- use_weight += CostForMoveAt(interval->GetStart() + 1, liveness);
- }
-
- // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e.
- // [interval->GetStart() + 1, interval->GetEnd() + 1)
- auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
- interval->GetUses().end(),
- interval->GetStart() + 1u,
- interval->GetEnd() + 1u);
- for (const UsePosition& use : matching_use_range) {
- if (use.GetUser() != nullptr && use.RequiresRegister()) {
- // Cost for spilling at a register use point.
- use_weight += CostForMoveAt(use.GetUser()->GetLifetimePosition() - 1, liveness);
- }
- }
-
- // We divide by the length of the interval because we want to prioritize
- // short intervals; we do not benefit much if we split them further.
- return static_cast<float>(use_weight) / static_cast<float>(length);
-}
-
-// Interference nodes make up the interference graph, which is the primary data structure in
-// graph coloring register allocation. Each node represents a single live interval, and contains
-// a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory,
-// pre-colored nodes never contain outgoing edges (only incoming ones).
-//
-// As nodes are pruned from the interference graph, incoming edges of the pruned node are removed,
-// but outgoing edges remain in order to later color the node based on the colors of its neighbors.
-//
-// Note that a pair interval is represented by a single node in the interference graph, which
-// essentially requires two colors. One consequence of this is that the degree of a node is not
-// necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum
-// number of colors with which a node could interfere. We model this by giving edges different
-// weights (1 or 2) to control how much it increases the degree of adjacent nodes.
-// For example, the edge between two single nodes will have weight 1. On the other hand,
-// the edge between a single node and a pair node will have weight 2. This is because the pair
-// node could block up to two colors for the single node, and because the single node could
-// block an entire two-register aligned slot for the pair node.
-// The degree is defined this way because we use it to decide whether a node is guaranteed a color,
-// and thus whether it is safe to prune it from the interference graph early on.
-class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
- public:
- InterferenceNode(LiveInterval* interval,
- const SsaLivenessAnalysis& liveness)
- : stage(NodeStage::kInitial),
- interval_(interval),
- adjacent_nodes_(nullptr),
- coalesce_opportunities_(nullptr),
- out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0),
- alias_(this),
- spill_weight_(ComputeSpillWeight(interval, liveness)),
- requires_color_(interval->RequiresRegister()),
- needs_spill_slot_(false) {
- DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval";
- }
-
- void AddInterference(InterferenceNode* other,
- bool guaranteed_not_interfering_yet,
- ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>>* storage) {
- DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences";
- DCHECK_NE(this, other) << "Should not create self loops in the interference graph";
- DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another";
- DCHECK_NE(stage, NodeStage::kPruned);
- DCHECK_NE(other->stage, NodeStage::kPruned);
- if (adjacent_nodes_ == nullptr) {
- ScopedArenaVector<InterferenceNode*>::allocator_type adapter(storage->get_allocator());
- storage->emplace_back(adapter);
- adjacent_nodes_ = &storage->back();
- }
- if (guaranteed_not_interfering_yet) {
- DCHECK(!ContainsElement(GetAdjacentNodes(), other));
- adjacent_nodes_->push_back(other);
- out_degree_ += EdgeWeightWith(other);
- } else {
- if (!ContainsElement(GetAdjacentNodes(), other)) {
- adjacent_nodes_->push_back(other);
- out_degree_ += EdgeWeightWith(other);
- }
- }
- }
-
- void RemoveInterference(InterferenceNode* other) {
- DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node";
- DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning";
- if (adjacent_nodes_ != nullptr) {
- auto it = std::find(adjacent_nodes_->begin(), adjacent_nodes_->end(), other);
- if (it != adjacent_nodes_->end()) {
- adjacent_nodes_->erase(it);
- out_degree_ -= EdgeWeightWith(other);
- }
- }
- }
-
- bool ContainsInterference(InterferenceNode* other) const {
- DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences";
- DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences";
- return ContainsElement(GetAdjacentNodes(), other);
- }
-
- LiveInterval* GetInterval() const {
- return interval_;
- }
-
- ArrayRef<InterferenceNode*> GetAdjacentNodes() const {
- return adjacent_nodes_ != nullptr
- ? ArrayRef<InterferenceNode*>(*adjacent_nodes_)
- : ArrayRef<InterferenceNode*>();
- }
-
- size_t GetOutDegree() const {
- // Pre-colored nodes have infinite degree.
- DCHECK_IMPLIES(IsPrecolored(), out_degree_ == std::numeric_limits<size_t>::max());
- return out_degree_;
- }
-
- void AddCoalesceOpportunity(CoalesceOpportunity* opportunity,
- ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>>* storage) {
- if (coalesce_opportunities_ == nullptr) {
- ScopedArenaVector<CoalesceOpportunity*>::allocator_type adapter(storage->get_allocator());
- storage->emplace_back(adapter);
- coalesce_opportunities_ = &storage->back();
- }
- coalesce_opportunities_->push_back(opportunity);
- }
-
- void ClearCoalesceOpportunities() {
- coalesce_opportunities_ = nullptr;
- }
-
- bool IsMoveRelated() const {
- for (CoalesceOpportunity* opportunity : GetCoalesceOpportunities()) {
- if (opportunity->stage == CoalesceStage::kWorklist ||
- opportunity->stage == CoalesceStage::kActive) {
- return true;
- }
- }
- return false;
- }
-
- // Return whether this node already has a color.
- // Used to find fixed nodes in the interference graph before coloring.
- bool IsPrecolored() const {
- return interval_->HasRegister();
- }
-
- bool IsPair() const {
- return interval_->HasHighInterval();
- }
-
- void SetAlias(InterferenceNode* rep) {
- DCHECK_NE(rep->stage, NodeStage::kPruned);
- DCHECK_EQ(this, alias_) << "Should only set a node's alias once";
- alias_ = rep;
- }
-
- InterferenceNode* GetAlias() {
- if (alias_ != this) {
- // Recurse in order to flatten tree of alias pointers.
- alias_ = alias_->GetAlias();
- }
- return alias_;
- }
-
- ArrayRef<CoalesceOpportunity*> GetCoalesceOpportunities() const {
- return coalesce_opportunities_ != nullptr
- ? ArrayRef<CoalesceOpportunity*>(*coalesce_opportunities_)
- : ArrayRef<CoalesceOpportunity*>();
- }
-
- float GetSpillWeight() const {
- return spill_weight_;
- }
-
- bool RequiresColor() const {
- return requires_color_;
- }
-
- // We give extra weight to edges adjacent to pair nodes. See the general comment on the
- // interference graph above.
- size_t EdgeWeightWith(const InterferenceNode* other) const {
- return (IsPair() || other->IsPair()) ? 2 : 1;
- }
-
- bool NeedsSpillSlot() const {
- return needs_spill_slot_;
- }
-
- void SetNeedsSpillSlot() {
- needs_spill_slot_ = true;
- }
-
- // The current stage of this node, indicating which worklist it belongs to.
- NodeStage stage;
-
- private:
- // The live interval that this node represents.
- LiveInterval* const interval_;
-
- // All nodes interfering with this one.
- // We use an unsorted vector as a set, since a tree or hash set is too heavy for the
- // set sizes that we encounter. Using a vector leads to much better performance.
- ScopedArenaVector<InterferenceNode*>* adjacent_nodes_; // Owned by ColoringIteration.
-
- // Interference nodes that this node should be coalesced with to reduce moves.
- ScopedArenaVector<CoalesceOpportunity*>* coalesce_opportunities_; // Owned by ColoringIteration.
-
- // The maximum number of colors with which this node could interfere. This could be more than
- // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes.
- // We use "out" degree because incoming edges come from nodes already pruned from the graph,
- // and do not affect the coloring of this node.
- // Pre-colored nodes are treated as having infinite degree.
- size_t out_degree_;
-
- // The node representing this node in the interference graph.
- // Initially set to `this`, and only changed if this node is coalesced into another.
- InterferenceNode* alias_;
-
- // The cost of splitting and spilling this interval to the stack.
- // Nodes with a higher spill weight should be prioritized when assigning registers.
- // This is essentially based on use density and location; short intervals with many uses inside
- // deeply nested loops have a high spill weight.
- const float spill_weight_;
-
- const bool requires_color_;
-
- bool needs_spill_slot_;
-
- DISALLOW_COPY_AND_ASSIGN(InterferenceNode);
-};
-
-// The order in which we color nodes is important. To guarantee forward progress,
-// we prioritize intervals that require registers, and after that we prioritize
-// short intervals. That way, if we fail to color a node, it either won't require a
-// register, or it will be a long interval that can be split in order to make the
-// interference graph sparser.
-// To improve code quality, we prioritize intervals used frequently in deeply nested loops.
-// (This metric is secondary to the forward progress requirements above.)
-// TODO: May also want to consider:
-// - Constants (since they can be rematerialized)
-// - Allocated spill slots
-static bool HasGreaterNodePriority(const InterferenceNode* lhs,
- const InterferenceNode* rhs) {
- // (1) Prioritize the node that requires a color.
- if (lhs->RequiresColor() != rhs->RequiresColor()) {
- return lhs->RequiresColor();
- }
-
- // (2) Prioritize the interval that has a higher spill weight.
- return lhs->GetSpillWeight() > rhs->GetSpillWeight();
-}
-
-// A ColoringIteration holds the many data structures needed for a single graph coloring attempt,
-// and provides methods for each phase of the attempt.
-class ColoringIteration {
- public:
- ColoringIteration(RegisterAllocatorGraphColor* register_allocator,
- ScopedArenaAllocator* allocator,
- bool processing_core_regs,
- size_t num_regs)
- : register_allocator_(register_allocator),
- allocator_(allocator),
- processing_core_regs_(processing_core_regs),
- num_regs_(num_regs),
- interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)),
- coalesce_worklist_(CoalesceOpportunity::CmpPriority,
- allocator->Adapter(kArenaAllocRegisterAllocator)),
- adjacent_nodes_links_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- coalesce_opportunities_links_(allocator->Adapter(kArenaAllocRegisterAllocator)) {}
-
- // Use the intervals collected from instructions to construct an
- // interference graph mapping intervals to adjacency lists.
- // Also, collect synthesized safepoint nodes, used to keep
- // track of live intervals across safepoints.
- // TODO: Should build safepoints elsewhere.
- void BuildInterferenceGraph(const ScopedArenaVector<LiveInterval*>& intervals,
- const ScopedArenaVector<InterferenceNode*>& physical_nodes);
-
- // Add coalesce opportunities to interference nodes.
- void FindCoalesceOpportunities();
-
- // Prune nodes from the interference graph to be colored later. Build
- // a stack (pruned_nodes) containing these intervals in an order determined
- // by various heuristics.
- void PruneInterferenceGraph();
-
- // Process pruned_intervals_ to color the interference graph, spilling when
- // necessary. Returns true if successful. Else, some intervals have been
- // split, and the interference graph should be rebuilt for another attempt.
- bool ColorInterferenceGraph();
-
- // Return prunable nodes.
- // The register allocator will need to access prunable nodes after coloring
- // in order to tell the code generator which registers have been assigned.
- ArrayRef<InterferenceNode* const> GetPrunableNodes() const {
- return ArrayRef<InterferenceNode* const>(prunable_nodes_);
- }
-
- private:
- // Create a coalesce opportunity between two nodes.
- void CreateCoalesceOpportunity(InterferenceNode* a,
- InterferenceNode* b,
- CoalesceKind kind,
- size_t position);
-
- // Add an edge in the interference graph, if valid.
- // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion
- // when possible.
- void AddPotentialInterference(InterferenceNode* from,
- InterferenceNode* to,
- bool guaranteed_not_interfering_yet,
- bool both_directions = true);
-
- // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors)
- // may be pruned from the interference graph.
- void FreezeMoves(InterferenceNode* node);
-
- // Prune a node from the interference graph, updating worklists if necessary.
- void PruneNode(InterferenceNode* node);
-
- // Add coalesce opportunities associated with this node to the coalesce worklist.
- void EnableCoalesceOpportunities(InterferenceNode* node);
-
- // If needed, from `node` from the freeze worklist to the simplify worklist.
- void CheckTransitionFromFreezeWorklist(InterferenceNode* node);
-
- // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively.
- bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
-
- // Return true if `from` and `into` are uncolored, and can be coalesced conservatively.
- bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
-
- void Coalesce(CoalesceOpportunity* opportunity);
-
- // Merge `from` into `into` in the interference graph.
- void Combine(InterferenceNode* from, InterferenceNode* into);
-
- // A reference to the register allocator instance,
- // needed to split intervals and assign spill slots.
- RegisterAllocatorGraphColor* register_allocator_;
-
- // A scoped arena allocator used for a single graph coloring attempt.
- ScopedArenaAllocator* allocator_;
-
- const bool processing_core_regs_;
-
- const size_t num_regs_;
-
- // A map from live intervals to interference nodes.
- ScopedArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_;
-
- // Uncolored nodes that should be pruned from the interference graph.
- ScopedArenaVector<InterferenceNode*> prunable_nodes_;
-
- // A stack of nodes pruned from the interference graph, waiting to be pruned.
- ScopedArenaStdStack<InterferenceNode*> pruned_nodes_;
-
- // A queue containing low degree, non-move-related nodes that can pruned immediately.
- ScopedArenaDeque<InterferenceNode*> simplify_worklist_;
-
- // A queue containing low degree, move-related nodes.
- ScopedArenaDeque<InterferenceNode*> freeze_worklist_;
-
- // A queue containing high degree nodes.
- // If we have to prune from the spill worklist, we cannot guarantee
- // the pruned node a color, so we order the worklist by priority.
- ScopedArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_;
-
- // A queue containing coalesce opportunities.
- // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those
- // inside of loops) are more important than others.
- ScopedArenaPriorityQueue<CoalesceOpportunity*,
- decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_;
-
- // Storage for links to adjacent nodes for interference nodes.
- // Using std::deque so that elements do not move when adding new ones.
- ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>> adjacent_nodes_links_;
-
- // Storage for links to coalesce opportunities for interference nodes.
- // Using std::deque so that elements do not move when adding new ones.
- ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>> coalesce_opportunities_links_;
-
- DISALLOW_COPY_AND_ASSIGN(ColoringIteration);
-};
-
-static bool IsCoreInterval(LiveInterval* interval) {
- return !DataType::IsFloatingPointType(interval->GetType());
-}
-
-static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) {
- return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize;
-}
-
-RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator,
- CodeGenerator* codegen,
- const SsaLivenessAnalysis& liveness,
- bool iterative_move_coalescing)
- : RegisterAllocator(allocator, codegen, liveness),
- iterative_move_coalescing_(iterative_move_coalescing),
- core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- num_int_spill_slots_(0),
- num_double_spill_slots_(0),
- num_float_spill_slots_(0),
- num_long_spill_slots_(0),
- catch_phi_spill_slot_counter_(0),
- reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)),
- reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) {
- // Before we ask for blocked registers, set them up in the code generator.
- codegen->SetupBlockedRegisters();
-
- // Initialize physical core register live intervals and blocked registers.
- // This includes globally blocked registers, such as the stack pointer.
- physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr);
- for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
- LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kInt32);
- physical_core_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness);
- physical_core_nodes_[i]->stage = NodeStage::kPrecolored;
- core_intervals_.push_back(interval);
- if (codegen_->IsBlockedCoreRegister(i)) {
- interval->AddRange(0, liveness.GetMaxLifetimePosition());
- }
- }
- // Initialize physical floating point register live intervals and blocked registers.
- physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr);
- for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
- LiveInterval* interval =
- LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kFloat32);
- physical_fp_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness);
- physical_fp_nodes_[i]->stage = NodeStage::kPrecolored;
- fp_intervals_.push_back(interval);
- if (codegen_->IsBlockedFloatingPointRegister(i)) {
- interval->AddRange(0, liveness.GetMaxLifetimePosition());
- }
- }
-}
-
-RegisterAllocatorGraphColor::~RegisterAllocatorGraphColor() {}
-
-void RegisterAllocatorGraphColor::AllocateRegisters() {
- // (1) Collect and prepare live intervals.
- ProcessInstructions();
-
- for (bool processing_core_regs : {true, false}) {
- ScopedArenaVector<LiveInterval*>& intervals = processing_core_regs
- ? core_intervals_
- : fp_intervals_;
- size_t num_registers = processing_core_regs
- ? codegen_->GetNumberOfCoreRegisters()
- : codegen_->GetNumberOfFloatingPointRegisters();
-
- size_t attempt = 0;
- while (true) {
- ++attempt;
- DCHECK(attempt <= kMaxGraphColoringAttemptsDebug)
- << "Exceeded debug max graph coloring register allocation attempts. "
- << "This could indicate that the register allocator is not making forward progress, "
- << "which could be caused by prioritizing the wrong live intervals. (Short intervals "
- << "should be prioritized over long ones, because they cannot be split further.)";
-
- // Many data structures are cleared between graph coloring attempts, so we reduce
- // total memory usage by using a new scoped arena allocator for each attempt.
- ScopedArenaAllocator coloring_attempt_allocator(allocator_->GetArenaStack());
- ColoringIteration iteration(this,
- &coloring_attempt_allocator,
- processing_core_regs,
- num_registers);
-
- // (2) Build the interference graph.
- ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
- ? physical_core_nodes_
- : physical_fp_nodes_;
- iteration.BuildInterferenceGraph(intervals, physical_nodes);
-
- // (3) Add coalesce opportunities.
- // If we have tried coloring the graph a suspiciously high number of times, give
- // up on move coalescing, just in case the coalescing heuristics are not conservative.
- // (This situation will be caught if DCHECKs are turned on.)
- if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) {
- iteration.FindCoalesceOpportunities();
- }
-
- // (4) Prune all uncolored nodes from interference graph.
- iteration.PruneInterferenceGraph();
-
- // (5) Color pruned nodes based on interferences.
- bool successful = iteration.ColorInterferenceGraph();
-
- // We manually clear coalesce opportunities for physical nodes,
- // since they persist across coloring attempts.
- for (InterferenceNode* node : physical_core_nodes_) {
- node->ClearCoalesceOpportunities();
- }
- for (InterferenceNode* node : physical_fp_nodes_) {
- node->ClearCoalesceOpportunities();
- }
-
- if (successful) {
- // Assign spill slots.
- AllocateSpillSlots(iteration.GetPrunableNodes());
-
- // Tell the code generator which registers were allocated.
- // We only look at prunable_nodes because we already told the code generator about
- // fixed intervals while processing instructions. We also ignore the fixed intervals
- // placed at the top of catch blocks.
- for (InterferenceNode* node : iteration.GetPrunableNodes()) {
- LiveInterval* interval = node->GetInterval();
- if (interval->HasRegister()) {
- Location low_reg = processing_core_regs
- ? Location::RegisterLocation(interval->GetRegister())
- : Location::FpuRegisterLocation(interval->GetRegister());
- codegen_->AddAllocatedRegister(low_reg);
- if (interval->HasHighInterval()) {
- LiveInterval* high = interval->GetHighInterval();
- DCHECK(high->HasRegister());
- Location high_reg = processing_core_regs
- ? Location::RegisterLocation(high->GetRegister())
- : Location::FpuRegisterLocation(high->GetRegister());
- codegen_->AddAllocatedRegister(high_reg);
- }
- } else {
- DCHECK_IMPLIES(interval->HasHighInterval(),
- !interval->GetHighInterval()->HasRegister());
- }
- }
-
- break;
- }
- } // while unsuccessful
- } // for processing_core_instructions
-
- // (6) Resolve locations and deconstruct SSA form.
- RegisterAllocationResolver(codegen_, liveness_)
- .Resolve(ArrayRef<HInstruction* const>(safepoints_),
- reserved_art_method_slots_ + reserved_out_slots_,
- num_int_spill_slots_,
- num_long_spill_slots_,
- num_float_spill_slots_,
- num_double_spill_slots_,
- catch_phi_spill_slot_counter_,
- ArrayRef<LiveInterval* const>(temp_intervals_));
-
- if (kIsDebugBuild) {
- Validate(/*log_fatal_on_failure*/ true);
- }
-}
-
-bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) {
- for (bool processing_core_regs : {true, false}) {
- ScopedArenaAllocator allocator(allocator_->GetArenaStack());
- ScopedArenaVector<LiveInterval*> intervals(
- allocator.Adapter(kArenaAllocRegisterAllocatorValidate));
- for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
- HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
- LiveInterval* interval = instruction->GetLiveInterval();
- if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) {
- intervals.push_back(instruction->GetLiveInterval());
- }
- }
-
- ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
- ? physical_core_nodes_
- : physical_fp_nodes_;
- for (InterferenceNode* fixed : physical_nodes) {
- LiveInterval* interval = fixed->GetInterval();
- if (interval->GetFirstRange() != nullptr) {
- // Ideally we would check fixed ranges as well, but currently there are times when
- // two fixed intervals for the same register will overlap. For example, a fixed input
- // and a fixed output may sometimes share the same register, in which there will be two
- // fixed intervals for the same place.
- }
- }
-
- for (LiveInterval* temp : temp_intervals_) {
- if (IsCoreInterval(temp) == processing_core_regs) {
- intervals.push_back(temp);
- }
- }
-
- size_t spill_slots = num_int_spill_slots_
- + num_long_spill_slots_
- + num_float_spill_slots_
- + num_double_spill_slots_
- + catch_phi_spill_slot_counter_;
- bool ok = ValidateIntervals(ArrayRef<LiveInterval* const>(intervals),
- spill_slots,
- reserved_art_method_slots_ + reserved_out_slots_,
- *codegen_,
- processing_core_regs,
- log_fatal_on_failure);
- if (!ok) {
- return false;
- }
- } // for processing_core_regs
-
- return true;
-}
-
-void RegisterAllocatorGraphColor::ProcessInstructions() {
- for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) {
- // Note that we currently depend on this ordering, since some helper
- // code is designed for linear scan register allocation.
- for (HBackwardInstructionIterator instr_it(block->GetInstructions());
- !instr_it.Done();
- instr_it.Advance()) {
- ProcessInstruction(instr_it.Current());
- }
-
- for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
- ProcessInstruction(phi_it.Current());
- }
-
- if (block->IsCatchBlock()
- || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
- // By blocking all registers at the top of each catch block or irreducible loop, we force
- // intervals belonging to the live-in set of the catch/header block to be spilled.
- // TODO(ngeoffray): Phis in this block could be allocated in register.
- size_t position = block->GetLifetimeStart();
- BlockRegisters(position, position + 1);
- }
- }
-}
-
-bool RegisterAllocatorGraphColor::TryRemoveSuspendCheckEntry(HInstruction* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- if (instruction->IsSuspendCheckEntry() && !codegen_->NeedsSuspendCheckEntry()) {
- // TODO: We do this here because we do not want the suspend check to artificially
- // create live registers. We should find another place, but this is currently the
- // simplest.
- DCHECK_EQ(locations->GetTempCount(), 0u);
- instruction->GetBlock()->RemoveInstruction(instruction);
- return true;
- }
- return false;
-}
-
-void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- if (locations == nullptr) {
- return;
- }
- if (TryRemoveSuspendCheckEntry(instruction)) {
- return;
- }
-
- CheckForTempLiveIntervals(instruction);
- CheckForSafepoint(instruction);
- if (locations->WillCall()) {
- // If a call will happen, create fixed intervals for caller-save registers.
- // TODO: Note that it may be beneficial to later split intervals at this point,
- // so that we allow last-minute moves from a caller-save register
- // to a callee-save register.
- BlockRegisters(instruction->GetLifetimePosition(),
- instruction->GetLifetimePosition() + 1,
- /*caller_save_only*/ true);
- }
- CheckForFixedInputs(instruction);
-
- LiveInterval* interval = instruction->GetLiveInterval();
- if (interval == nullptr) {
- // Instructions lacking a valid output location do not have a live interval.
- DCHECK(!locations->Out().IsValid());
- return;
- }
-
- // Low intervals act as representatives for their corresponding high interval.
- DCHECK(!interval->IsHighInterval());
- if (codegen_->NeedsTwoRegisters(interval->GetType())) {
- interval->AddHighInterval();
- }
- AddSafepointsFor(instruction);
- CheckForFixedOutput(instruction);
- AllocateSpillSlotForCatchPhi(instruction);
-
- ScopedArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval)
- ? core_intervals_
- : fp_intervals_;
- if (interval->HasSpillSlot() || instruction->IsConstant()) {
- // Note that if an interval already has a spill slot, then its value currently resides
- // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first
- // register use. This is also true for constants, which can be materialized at any point.
- size_t first_register_use = interval->FirstRegisterUse();
- if (first_register_use != kNoLifetime) {
- LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1);
- intervals.push_back(split);
- } else {
- // We won't allocate a register for this value.
- }
- } else {
- intervals.push_back(interval);
- }
-}
-
-void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) {
- // We simply block physical registers where necessary.
- // TODO: Ideally we would coalesce the physical register with the register
- // allocated to the input value, but this can be tricky if, e.g., there
- // could be multiple physical register uses of the same value at the
- // same instruction. Furthermore, there's currently no distinction between
- // fixed inputs to a call (which will be clobbered) and other fixed inputs (which
- // may not be clobbered).
- LocationSummary* locations = instruction->GetLocations();
- size_t position = instruction->GetLifetimePosition();
- for (size_t i = 0; i < locations->GetInputCount(); ++i) {
- Location input = locations->InAt(i);
- if (input.IsRegister() || input.IsFpuRegister()) {
- BlockRegister(input, position, position + 1);
- codegen_->AddAllocatedRegister(input);
- } else if (input.IsPair()) {
- BlockRegister(input.ToLow(), position, position + 1);
- BlockRegister(input.ToHigh(), position, position + 1);
- codegen_->AddAllocatedRegister(input.ToLow());
- codegen_->AddAllocatedRegister(input.ToHigh());
- }
- }
-}
-
-void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) {
- // If an instruction has a fixed output location, we give the live interval a register and then
- // proactively split it just after the definition point to avoid creating too many interferences
- // with a fixed node.
- LiveInterval* interval = instruction->GetLiveInterval();
- Location out = interval->GetDefinedBy()->GetLocations()->Out();
- size_t position = instruction->GetLifetimePosition();
- DCHECK_GE(interval->GetEnd() - position, 2u);
-
- if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
- out = instruction->GetLocations()->InAt(0);
- }
-
- if (out.IsRegister() || out.IsFpuRegister()) {
- interval->SetRegister(out.reg());
- codegen_->AddAllocatedRegister(out);
- Split(interval, position + 1);
- } else if (out.IsPair()) {
- interval->SetRegister(out.low());
- interval->GetHighInterval()->SetRegister(out.high());
- codegen_->AddAllocatedRegister(out.ToLow());
- codegen_->AddAllocatedRegister(out.ToHigh());
- Split(interval, position + 1);
- } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) {
- interval->SetSpillSlot(out.GetStackIndex());
- } else {
- DCHECK(out.IsUnallocated() || out.IsConstant());
- }
-}
-
-void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) {
- LiveInterval* interval = instruction->GetLiveInterval();
- for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
- HInstruction* safepoint = safepoints_[safepoint_index - 1u];
- size_t safepoint_position = safepoint->GetLifetimePosition();
-
- // Test that safepoints_ are ordered in the optimal way.
- DCHECK(safepoint_index == safepoints_.size() ||
- safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
-
- if (safepoint_position == interval->GetStart()) {
- // The safepoint is for this instruction, so the location of the instruction
- // does not need to be saved.
- DCHECK_EQ(safepoint_index, safepoints_.size());
- DCHECK_EQ(safepoint, instruction);
- continue;
- } else if (interval->IsDeadAt(safepoint_position)) {
- break;
- } else if (!interval->Covers(safepoint_position)) {
- // Hole in the interval.
- continue;
- }
- interval->AddSafepoint(safepoint);
- }
-}
-
-void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- size_t position = instruction->GetLifetimePosition();
- for (size_t i = 0; i < locations->GetTempCount(); ++i) {
- Location temp = locations->GetTemp(i);
- if (temp.IsRegister() || temp.IsFpuRegister()) {
- BlockRegister(temp, position, position + 1);
- codegen_->AddAllocatedRegister(temp);
- } else {
- DCHECK(temp.IsUnallocated());
- switch (temp.GetPolicy()) {
- case Location::kRequiresRegister: {
- LiveInterval* interval =
- LiveInterval::MakeTempInterval(allocator_, DataType::Type::kInt32);
- interval->AddTempUse(instruction, i);
- core_intervals_.push_back(interval);
- temp_intervals_.push_back(interval);
- break;
- }
-
- case Location::kRequiresFpuRegister: {
- LiveInterval* interval =
- LiveInterval::MakeTempInterval(allocator_, DataType::Type::kFloat64);
- interval->AddTempUse(instruction, i);
- fp_intervals_.push_back(interval);
- temp_intervals_.push_back(interval);
- if (codegen_->NeedsTwoRegisters(DataType::Type::kFloat64)) {
- interval->AddHighInterval(/*is_temp*/ true);
- temp_intervals_.push_back(interval->GetHighInterval());
- }
- break;
- }
-
- default:
- LOG(FATAL) << "Unexpected policy for temporary location "
- << temp.GetPolicy();
- }
- }
- }
-}
-
-void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) {
- LocationSummary* locations = instruction->GetLocations();
-
- if (locations->NeedsSafepoint()) {
- safepoints_.push_back(instruction);
- }
-}
-
-LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) {
- if (interval->GetStart() < position && position < interval->GetEnd()) {
- return Split(interval, position);
- } else {
- return interval;
- }
-}
-
-void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) {
- DCHECK(!interval->IsHighInterval());
-
- // Split just after a register definition.
- if (interval->IsParent() && interval->DefinitionRequiresRegister()) {
- interval = TrySplit(interval, interval->GetStart() + 1);
- }
-
- // Process uses in the range [interval->GetStart(), interval->GetEnd()], i.e.
- // [interval->GetStart(), interval->GetEnd() + 1)
- auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
- interval->GetUses().end(),
- interval->GetStart(),
- interval->GetEnd() + 1u);
- // Split around register uses.
- for (const UsePosition& use : matching_use_range) {
- if (use.RequiresRegister()) {
- size_t position = use.GetPosition();
- interval = TrySplit(interval, position - 1);
- if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) {
- // If we are at the very end of a basic block, we cannot split right
- // at the use. Split just after instead.
- interval = TrySplit(interval, position + 1);
- } else {
- interval = TrySplit(interval, position);
- }
- }
- }
-}
-
-void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) {
- if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
- HPhi* phi = instruction->AsPhi();
- LiveInterval* interval = phi->GetLiveInterval();
-
- HInstruction* previous_phi = phi->GetPrevious();
- DCHECK(previous_phi == nullptr ||
- previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
- << "Phis expected to be sorted by vreg number, "
- << "so that equivalent phis are adjacent.";
-
- if (phi->IsVRegEquivalentOf(previous_phi)) {
- // Assign the same spill slot.
- DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
- interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
- } else {
- interval->SetSpillSlot(catch_phi_spill_slot_counter_);
- catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded();
- }
- }
-}
-
-void RegisterAllocatorGraphColor::BlockRegister(Location location,
- size_t start,
- size_t end) {
- DCHECK(location.IsRegister() || location.IsFpuRegister());
- int reg = location.reg();
- LiveInterval* interval = location.IsRegister()
- ? physical_core_nodes_[reg]->GetInterval()
- : physical_fp_nodes_[reg]->GetInterval();
- DCHECK(interval->GetRegister() == reg);
- bool blocked_by_codegen = location.IsRegister()
- ? codegen_->IsBlockedCoreRegister(reg)
- : codegen_->IsBlockedFloatingPointRegister(reg);
- if (blocked_by_codegen) {
- // We've already blocked this register for the entire method. (And adding a
- // range inside another range violates the preconditions of AddRange).
- } else {
- interval->AddRange(start, end);
- }
-}
-
-void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
- for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
- if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
- BlockRegister(Location::RegisterLocation(i), start, end);
- }
- }
- for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
- if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
- BlockRegister(Location::FpuRegisterLocation(i), start, end);
- }
- }
-}
-
-void ColoringIteration::AddPotentialInterference(InterferenceNode* from,
- InterferenceNode* to,
- bool guaranteed_not_interfering_yet,
- bool both_directions) {
- if (from->IsPrecolored()) {
- // We save space by ignoring outgoing edges from fixed nodes.
- } else if (to->IsPrecolored()) {
- // It is important that only a single node represents a given fixed register in the
- // interference graph. We retrieve that node here.
- const ScopedArenaVector<InterferenceNode*>& physical_nodes =
- to->GetInterval()->IsFloatingPoint() ? register_allocator_->physical_fp_nodes_
- : register_allocator_->physical_core_nodes_;
- InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()];
- from->AddInterference(
- physical_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_);
- DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister());
- DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node";
-
- // If a node interferes with a fixed pair node, the weight of the edge may
- // be inaccurate after using the alias of the pair node, because the alias of the pair node
- // is a singular node.
- // We could make special pair fixed nodes, but that ends up being too conservative because
- // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of
- // three rather than two.
- // Instead, we explicitly add an interference with the high node of the fixed pair node.
- // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals
- // can be unaligned on x86 complicates things.
- if (to->IsPair()) {
- InterferenceNode* high_node =
- physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()];
- DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(),
- high_node->GetInterval()->GetRegister());
- from->AddInterference(
- high_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_);
- }
- } else {
- // Standard interference between two uncolored nodes.
- from->AddInterference(to, guaranteed_not_interfering_yet, &adjacent_nodes_links_);
- }
-
- if (both_directions) {
- AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false);
- }
-}
-
-// Returns true if `in_node` represents an input interval of `out_node`, and the output interval
-// is allowed to have the same register as the input interval.
-// TODO: Ideally we should just produce correct intervals in liveness analysis.
-// We would need to refactor the current live interval layout to do so, which is
-// no small task.
-static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) {
- LiveInterval* output_interval = out_node->GetInterval();
- HInstruction* defined_by = output_interval->GetDefinedBy();
- if (defined_by == nullptr) {
- // This must not be a definition point.
- return false;
- }
-
- LocationSummary* locations = defined_by->GetLocations();
- if (locations->OutputCanOverlapWithInputs()) {
- // This instruction does not allow the output to reuse a register from an input.
- return false;
- }
-
- LiveInterval* input_interval = in_node->GetInterval();
- LiveInterval* next_sibling = input_interval->GetNextSibling();
- size_t def_position = defined_by->GetLifetimePosition();
- size_t use_position = def_position + 1;
- if (next_sibling != nullptr && next_sibling->GetStart() == use_position) {
- // The next sibling starts at the use position, so reusing the input register in the output
- // would clobber the input before it's moved into the sibling interval location.
- return false;
- }
-
- if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) {
- // The input interval is live after the use position.
- return false;
- }
-
- HInputsRef inputs = defined_by->GetInputs();
- for (size_t i = 0; i < inputs.size(); ++i) {
- if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) {
- DCHECK(input_interval->SameRegisterKind(*output_interval));
- return true;
- }
- }
-
- // The input interval was not an input for this instruction.
- return false;
-}
-
-void ColoringIteration::BuildInterferenceGraph(
- const ScopedArenaVector<LiveInterval*>& intervals,
- const ScopedArenaVector<InterferenceNode*>& physical_nodes) {
- DCHECK(interval_node_map_.empty() && prunable_nodes_.empty());
- // Build the interference graph efficiently by ordering range endpoints
- // by position and doing a linear sweep to find interferences. (That is, we
- // jump from endpoint to endpoint, maintaining a set of intervals live at each
- // point. If two nodes are ever in the live set at the same time, then they
- // interfere with each other.)
- //
- // We order by both position and (secondarily) by whether the endpoint
- // begins or ends a range; we want to process range endings before range
- // beginnings at the same position because they should not conflict.
- //
- // For simplicity, we create a tuple for each endpoint, and then sort the tuples.
- // Tuple contents: (position, is_range_beginning, node).
- ScopedArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints(
- allocator_->Adapter(kArenaAllocRegisterAllocator));
-
- // We reserve plenty of space to avoid excessive copying.
- range_endpoints.reserve(4 * prunable_nodes_.size());
-
- for (LiveInterval* parent : intervals) {
- for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) {
- LiveRange* range = sibling->GetFirstRange();
- if (range != nullptr) {
- InterferenceNode* node =
- new (allocator_) InterferenceNode(sibling, register_allocator_->liveness_);
- interval_node_map_.insert(std::make_pair(sibling, node));
-
- if (sibling->HasRegister()) {
- // Fixed nodes should alias the canonical node for the corresponding register.
- node->stage = NodeStage::kPrecolored;
- InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()];
- node->SetAlias(physical_node);
- DCHECK_EQ(node->GetInterval()->GetRegister(),
- physical_node->GetInterval()->GetRegister());
- } else {
- node->stage = NodeStage::kPrunable;
- prunable_nodes_.push_back(node);
- }
-
- while (range != nullptr) {
- range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node));
- range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node));
- range = range->GetNext();
- }
- }
- }
- }
-
- // Sort the endpoints.
- // We explicitly ignore the third entry of each tuple (the node pointer) in order
- // to maintain determinism.
- std::sort(range_endpoints.begin(), range_endpoints.end(),
- [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs,
- const std::tuple<size_t, bool, InterferenceNode*>& rhs) {
- return std::tie(std::get<0>(lhs), std::get<1>(lhs))
- < std::tie(std::get<0>(rhs), std::get<1>(rhs));
- });
-
- // Nodes live at the current position in the linear sweep.
- ScopedArenaVector<InterferenceNode*> live(allocator_->Adapter(kArenaAllocRegisterAllocator));
-
- // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the
- // live set. When we encounter the end of a range, we remove the corresponding node
- // from the live set. Nodes interfere if they are in the live set at the same time.
- for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) {
- bool is_range_beginning;
- InterferenceNode* node;
- size_t position;
- // Extract information from the tuple, including the node this tuple represents.
- std::tie(position, is_range_beginning, node) = *it;
-
- if (is_range_beginning) {
- bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart();
- for (InterferenceNode* conflicting : live) {
- DCHECK_NE(node, conflicting);
- if (CheckInputOutputCanOverlap(conflicting, node)) {
- // We do not add an interference, because the instruction represented by `node` allows
- // its output to share a register with an input, represented here by `conflicting`.
- } else {
- AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet);
- }
- }
- DCHECK(std::find(live.begin(), live.end(), node) == live.end());
- live.push_back(node);
- } else {
- // End of range.
- auto live_it = std::find(live.begin(), live.end(), node);
- DCHECK(live_it != live.end());
- live.erase(live_it);
- }
- }
- DCHECK(live.empty());
-}
-
-void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a,
- InterferenceNode* b,
- CoalesceKind kind,
- size_t position) {
- DCHECK_EQ(a->IsPair(), b->IsPair())
- << "Nodes of different memory widths should never be coalesced";
- CoalesceOpportunity* opportunity =
- new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_);
- a->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
- b->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
- coalesce_worklist_.push(opportunity);
-}
-
-// When looking for coalesce opportunities, we use the interval_node_map_ to find the node
-// corresponding to an interval. Note that not all intervals are in this map, notably the parents
-// of constants and stack arguments. (However, these interval should not be involved in coalesce
-// opportunities anyway, because they're not going to be in registers.)
-void ColoringIteration::FindCoalesceOpportunities() {
- DCHECK(coalesce_worklist_.empty());
-
- for (InterferenceNode* node : prunable_nodes_) {
- LiveInterval* interval = node->GetInterval();
-
- // Coalesce siblings.
- LiveInterval* next_sibling = interval->GetNextSibling();
- if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) {
- auto it = interval_node_map_.find(next_sibling);
- if (it != interval_node_map_.end()) {
- InterferenceNode* sibling_node = it->second;
- CreateCoalesceOpportunity(node,
- sibling_node,
- CoalesceKind::kAdjacentSibling,
- interval->GetEnd());
- }
- }
-
- // Coalesce fixed outputs with this interval if this interval is an adjacent sibling.
- LiveInterval* parent = interval->GetParent();
- if (parent->HasRegister()
- && parent->GetNextSibling() == interval
- && parent->GetEnd() == interval->GetStart()) {
- auto it = interval_node_map_.find(parent);
- if (it != interval_node_map_.end()) {
- InterferenceNode* parent_node = it->second;
- CreateCoalesceOpportunity(node,
- parent_node,
- CoalesceKind::kFixedOutputSibling,
- parent->GetEnd());
- }
- }
-
- // Try to prevent moves across blocks.
- // Note that this does not lead to many succeeding coalesce attempts, so could be removed
- // if found to add to compile time.
- const SsaLivenessAnalysis& liveness = register_allocator_->liveness_;
- if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) {
- // If the start of this interval is at a block boundary, we look at the
- // location of the interval in blocks preceding the block this interval
- // starts at. This can avoid a move between the two blocks.
- HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2);
- for (HBasicBlock* predecessor : block->GetPredecessors()) {
- size_t position = predecessor->GetLifetimeEnd() - 1;
- LiveInterval* existing = interval->GetParent()->GetSiblingAt(position);
- if (existing != nullptr) {
- auto it = interval_node_map_.find(existing);
- if (it != interval_node_map_.end()) {
- InterferenceNode* existing_node = it->second;
- CreateCoalesceOpportunity(node,
- existing_node,
- CoalesceKind::kNonlinearControlFlow,
- position);
- }
- }
- }
- }
-
- // Coalesce phi inputs with the corresponding output.
- HInstruction* defined_by = interval->GetDefinedBy();
- if (defined_by != nullptr && defined_by->IsPhi()) {
- ArrayRef<HBasicBlock* const> predecessors(defined_by->GetBlock()->GetPredecessors());
- HInputsRef inputs = defined_by->GetInputs();
-
- for (size_t i = 0, e = inputs.size(); i < e; ++i) {
- // We want the sibling at the end of the appropriate predecessor block.
- size_t position = predecessors[i]->GetLifetimeEnd() - 1;
- LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position);
-
- auto it = interval_node_map_.find(input_interval);
- if (it != interval_node_map_.end()) {
- InterferenceNode* input_node = it->second;
- CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position);
- }
- }
- }
-
- // Coalesce output with first input when policy is kSameAsFirstInput.
- if (defined_by != nullptr) {
- Location out = defined_by->GetLocations()->Out();
- if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
- LiveInterval* input_interval
- = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1);
- // TODO: Could we consider lifetime holes here?
- if (input_interval->GetEnd() == interval->GetStart()) {
- auto it = interval_node_map_.find(input_interval);
- if (it != interval_node_map_.end()) {
- InterferenceNode* input_node = it->second;
- CreateCoalesceOpportunity(node,
- input_node,
- CoalesceKind::kFirstInput,
- interval->GetStart());
- }
- }
- }
- }
-
- // An interval that starts an instruction (that is, it is not split), may
- // re-use the registers used by the inputs of that instruction, based on the
- // location summary.
- if (defined_by != nullptr) {
- DCHECK(!interval->IsSplit());
- LocationSummary* locations = defined_by->GetLocations();
- if (!locations->OutputCanOverlapWithInputs()) {
- HInputsRef inputs = defined_by->GetInputs();
- for (size_t i = 0; i < inputs.size(); ++i) {
- size_t def_point = defined_by->GetLifetimePosition();
- // TODO: Getting the sibling at the def_point might not be quite what we want
- // for fixed inputs, since the use will be *at* the def_point rather than after.
- LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point);
- if (input_interval != nullptr &&
- input_interval->HasHighInterval() == interval->HasHighInterval()) {
- auto it = interval_node_map_.find(input_interval);
- if (it != interval_node_map_.end()) {
- InterferenceNode* input_node = it->second;
- CreateCoalesceOpportunity(node,
- input_node,
- CoalesceKind::kAnyInput,
- interval->GetStart());
- }
- }
- }
- }
- }
-
- // Try to prevent moves into fixed input locations.
- // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e.
- // [interval->GetStart() + 1, interval->GetEnd() + 1)
- auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
- interval->GetUses().end(),
- interval->GetStart() + 1u,
- interval->GetEnd() + 1u);
- for (const UsePosition& use : matching_use_range) {
- HInstruction* user = use.GetUser();
- if (user == nullptr) {
- // User may be null for certain intervals, such as temp intervals.
- continue;
- }
- LocationSummary* locations = user->GetLocations();
- Location input = locations->InAt(use.GetInputIndex());
- if (input.IsRegister() || input.IsFpuRegister()) {
- // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes
- // is currently not supported.
- InterferenceNode* fixed_node = input.IsRegister()
- ? register_allocator_->physical_core_nodes_[input.reg()]
- : register_allocator_->physical_fp_nodes_[input.reg()];
- CreateCoalesceOpportunity(node,
- fixed_node,
- CoalesceKind::kFixedInput,
- user->GetLifetimePosition());
- }
- }
- } // for node in prunable_nodes
-}
-
-static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) {
- return node->GetOutDegree() < num_regs;
-}
-
-static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) {
- return !IsLowDegreeNode(node, num_regs);
-}
-
-void ColoringIteration::PruneInterferenceGraph() {
- DCHECK(pruned_nodes_.empty()
- && simplify_worklist_.empty()
- && freeze_worklist_.empty()
- && spill_worklist_.empty());
- // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes,
- // and all others as high degree nodes. The distinction is important: low degree nodes are
- // guaranteed a color, while high degree nodes are not.
-
- // Build worklists. Note that the coalesce worklist has already been
- // filled by FindCoalesceOpportunities().
- for (InterferenceNode* node : prunable_nodes_) {
- DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned";
- if (IsLowDegreeNode(node, num_regs_)) {
- if (node->GetCoalesceOpportunities().empty()) {
- // Simplify Worklist.
- node->stage = NodeStage::kSimplifyWorklist;
- simplify_worklist_.push_back(node);
- } else {
- // Freeze Worklist.
- node->stage = NodeStage::kFreezeWorklist;
- freeze_worklist_.push_back(node);
- }
- } else {
- // Spill worklist.
- node->stage = NodeStage::kSpillWorklist;
- spill_worklist_.push(node);
- }
- }
-
- // Prune graph.
- // Note that we do not remove a node from its current worklist if it moves to another, so it may
- // be in multiple worklists at once; the node's `phase` says which worklist it is really in.
- while (true) {
- if (!simplify_worklist_.empty()) {
- // Prune low-degree nodes.
- // TODO: pop_back() should work as well, but it didn't; we get a
- // failed check while pruning. We should look into this.
- InterferenceNode* node = simplify_worklist_.front();
- simplify_worklist_.pop_front();
- DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list";
- DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree";
- DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related";
- PruneNode(node);
- } else if (!coalesce_worklist_.empty()) {
- // Coalesce.
- CoalesceOpportunity* opportunity = coalesce_worklist_.top();
- coalesce_worklist_.pop();
- if (opportunity->stage == CoalesceStage::kWorklist) {
- Coalesce(opportunity);
- }
- } else if (!freeze_worklist_.empty()) {
- // Freeze moves and prune a low-degree move-related node.
- InterferenceNode* node = freeze_worklist_.front();
- freeze_worklist_.pop_front();
- if (node->stage == NodeStage::kFreezeWorklist) {
- DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree";
- DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related";
- FreezeMoves(node);
- PruneNode(node);
- }
- } else if (!spill_worklist_.empty()) {
- // We spill the lowest-priority node, because pruning a node earlier
- // gives it a higher chance of being spilled.
- InterferenceNode* node = spill_worklist_.top();
- spill_worklist_.pop();
- if (node->stage == NodeStage::kSpillWorklist) {
- DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree";
- FreezeMoves(node);
- PruneNode(node);
- }
- } else {
- // Pruning complete.
- break;
- }
- }
- DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size());
-}
-
-void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) {
- for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
- if (opportunity->stage == CoalesceStage::kActive) {
- opportunity->stage = CoalesceStage::kWorklist;
- coalesce_worklist_.push(opportunity);
- }
- }
-}
-
-void ColoringIteration::PruneNode(InterferenceNode* node) {
- DCHECK_NE(node->stage, NodeStage::kPruned);
- DCHECK(!node->IsPrecolored());
- node->stage = NodeStage::kPruned;
- pruned_nodes_.push(node);
-
- for (InterferenceNode* adj : node->GetAdjacentNodes()) {
- DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes";
-
- if (adj->IsPrecolored()) {
- // No effect on pre-colored nodes; they're never pruned.
- } else {
- // Remove the interference.
- bool was_high_degree = IsHighDegreeNode(adj, num_regs_);
- DCHECK(adj->ContainsInterference(node))
- << "Missing reflexive interference from non-fixed node";
- adj->RemoveInterference(node);
-
- // Handle transitions from high degree to low degree.
- if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) {
- EnableCoalesceOpportunities(adj);
- for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) {
- EnableCoalesceOpportunities(adj_adj);
- }
-
- DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist);
- if (adj->IsMoveRelated()) {
- adj->stage = NodeStage::kFreezeWorklist;
- freeze_worklist_.push_back(adj);
- } else {
- adj->stage = NodeStage::kSimplifyWorklist;
- simplify_worklist_.push_back(adj);
- }
- }
- }
- }
-}
-
-void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) {
- if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) {
- DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist);
- node->stage = NodeStage::kSimplifyWorklist;
- simplify_worklist_.push_back(node);
- }
-}
-
-void ColoringIteration::FreezeMoves(InterferenceNode* node) {
- for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
- if (opportunity->stage == CoalesceStage::kDefunct) {
- // Constrained moves should remain constrained, since they will not be considered
- // during last-chance coalescing.
- } else {
- opportunity->stage = CoalesceStage::kInactive;
- }
- InterferenceNode* other = opportunity->node_a->GetAlias() == node
- ? opportunity->node_b->GetAlias()
- : opportunity->node_a->GetAlias();
- if (other != node && other->stage == NodeStage::kFreezeWorklist) {
- DCHECK(IsLowDegreeNode(node, num_regs_));
- CheckTransitionFromFreezeWorklist(other);
- }
- }
-}
-
-bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from,
- InterferenceNode* into) {
- if (!into->IsPrecolored()) {
- // The uncolored heuristic will cover this case.
- return false;
- }
- if (from->IsPair() || into->IsPair()) {
- // TODO: Merging from a pair node is currently not supported, since fixed pair nodes
- // are currently represented as two single fixed nodes in the graph, and `into` is
- // only one of them. (We may lose the implicit connections to the second one in a merge.)
- return false;
- }
-
- // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`.
- // Reasons an adjacent node `adj` can be "ok":
- // (1) If `adj` is low degree, interference with `into` will not affect its existing
- // colorable guarantee. (Notice that coalescing cannot increase its degree.)
- // (2) If `adj` is pre-colored, it already interferes with `into`. See (3).
- // (3) If there's already an interference with `into`, coalescing will not add interferences.
- for (InterferenceNode* adj : from->GetAdjacentNodes()) {
- if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) {
- // Ok.
- } else {
- return false;
- }
- }
- return true;
-}
-
-bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from,
- InterferenceNode* into) {
- if (into->IsPrecolored()) {
- // The pre-colored heuristic will handle this case.
- return false;
- }
-
- // Arbitrary cap to improve compile time. Tests show that this has negligible affect
- // on generated code.
- if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) {
- return false;
- }
-
- // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors
- // of high degree. (Low degree neighbors can be ignored, because they will eventually be
- // pruned from the interference graph in the simplify stage.)
- size_t high_degree_interferences = 0;
- for (InterferenceNode* adj : from->GetAdjacentNodes()) {
- if (IsHighDegreeNode(adj, num_regs_)) {
- high_degree_interferences += from->EdgeWeightWith(adj);
- }
- }
- for (InterferenceNode* adj : into->GetAdjacentNodes()) {
- if (IsHighDegreeNode(adj, num_regs_)) {
- if (from->ContainsInterference(adj)) {
- // We've already counted this adjacent node.
- // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that
- // we should not have counted it at all. (This extends the textbook Briggs coalescing test,
- // but remains conservative.)
- if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) {
- high_degree_interferences -= from->EdgeWeightWith(adj);
- }
- } else {
- high_degree_interferences += into->EdgeWeightWith(adj);
- }
- }
- }
-
- return high_degree_interferences < num_regs_;
-}
-
-void ColoringIteration::Combine(InterferenceNode* from,
- InterferenceNode* into) {
- from->SetAlias(into);
-
- // Add interferences.
- for (InterferenceNode* adj : from->GetAdjacentNodes()) {
- bool was_low_degree = IsLowDegreeNode(adj, num_regs_);
- AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false);
- if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) {
- // This is a (temporary) transition to a high degree node. Its degree will decrease again
- // when we prune `from`, but it's best to be consistent about the current worklist.
- adj->stage = NodeStage::kSpillWorklist;
- spill_worklist_.push(adj);
- }
- }
-
- // Add coalesce opportunities.
- for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) {
- if (opportunity->stage != CoalesceStage::kDefunct) {
- into->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
- }
- }
- EnableCoalesceOpportunities(from);
-
- // Prune and update worklists.
- PruneNode(from);
- if (IsLowDegreeNode(into, num_regs_)) {
- // Coalesce(...) takes care of checking for a transition to the simplify worklist.
- DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist);
- } else if (into->stage == NodeStage::kFreezeWorklist) {
- // This is a transition to a high degree node.
- into->stage = NodeStage::kSpillWorklist;
- spill_worklist_.push(into);
- } else {
- DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored);
- }
-}
-
-void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) {
- InterferenceNode* from = opportunity->node_a->GetAlias();
- InterferenceNode* into = opportunity->node_b->GetAlias();
- DCHECK_NE(from->stage, NodeStage::kPruned);
- DCHECK_NE(into->stage, NodeStage::kPruned);
-
- if (from->IsPrecolored()) {
- // If we have one pre-colored node, make sure it's the `into` node.
- std::swap(from, into);
- }
-
- if (from == into) {
- // These nodes have already been coalesced.
- opportunity->stage = CoalesceStage::kDefunct;
- CheckTransitionFromFreezeWorklist(from);
- } else if (from->IsPrecolored() || from->ContainsInterference(into)) {
- // These nodes interfere.
- opportunity->stage = CoalesceStage::kDefunct;
- CheckTransitionFromFreezeWorklist(from);
- CheckTransitionFromFreezeWorklist(into);
- } else if (PrecoloredHeuristic(from, into)
- || UncoloredHeuristic(from, into)) {
- // We can coalesce these nodes.
- opportunity->stage = CoalesceStage::kDefunct;
- Combine(from, into);
- CheckTransitionFromFreezeWorklist(into);
- } else {
- // We cannot coalesce, but we may be able to later.
- opportunity->stage = CoalesceStage::kActive;
- }
-}
-
-// Build a mask with a bit set for each register assigned to some
-// interval in `intervals`.
-template <typename Container>
-static std::bitset<kMaxNumRegs> BuildConflictMask(const Container& intervals) {
- std::bitset<kMaxNumRegs> conflict_mask;
- for (InterferenceNode* adjacent : intervals) {
- LiveInterval* conflicting = adjacent->GetInterval();
- if (conflicting->HasRegister()) {
- conflict_mask.set(conflicting->GetRegister());
- if (conflicting->HasHighInterval()) {
- DCHECK(conflicting->GetHighInterval()->HasRegister());
- conflict_mask.set(conflicting->GetHighInterval()->GetRegister());
- }
- } else {
- DCHECK(!conflicting->HasHighInterval()
- || !conflicting->GetHighInterval()->HasRegister());
- }
- }
- return conflict_mask;
-}
-
-bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) {
- return processing_core_regs
- ? !codegen_->IsCoreCalleeSaveRegister(reg)
- : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
-}
-
-static bool RegisterIsAligned(size_t reg) {
- return reg % 2 == 0;
-}
-
-static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) {
- // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit.
- // Note that CTZ is undefined if all bits are 0, so we special-case it.
- return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong());
-}
-
-bool ColoringIteration::ColorInterferenceGraph() {
- DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small";
- ScopedArenaVector<LiveInterval*> colored_intervals(
- allocator_->Adapter(kArenaAllocRegisterAllocator));
- bool successful = true;
-
- while (!pruned_nodes_.empty()) {
- InterferenceNode* node = pruned_nodes_.top();
- pruned_nodes_.pop();
- LiveInterval* interval = node->GetInterval();
- size_t reg = 0;
-
- InterferenceNode* alias = node->GetAlias();
- if (alias != node) {
- // This node was coalesced with another.
- LiveInterval* alias_interval = alias->GetInterval();
- if (alias_interval->HasRegister()) {
- reg = alias_interval->GetRegister();
- DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg])
- << "This node conflicts with the register it was coalesced with";
- } else {
- DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " "
- << "Move coalescing was not conservative, causing a node to be coalesced "
- << "with another node that could not be colored";
- if (interval->RequiresRegister()) {
- successful = false;
- }
- }
- } else {
- // Search for free register(s).
- std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes());
- if (interval->HasHighInterval()) {
- // Note that the graph coloring allocator assumes that pair intervals are aligned here,
- // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we
- // change the alignment requirements here, we will have to update the algorithm (e.g.,
- // be more conservative about the weight of edges adjacent to pair nodes.)
- while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) {
- reg += 2;
- }
-
- // Try to use a caller-save register first.
- for (size_t i = 0; i < num_regs_ - 1; i += 2) {
- bool low_caller_save = register_allocator_->IsCallerSave(i, processing_core_regs_);
- bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_);
- if (!conflict_mask[i] && !conflict_mask[i + 1]) {
- if (low_caller_save && high_caller_save) {
- reg = i;
- break;
- } else if (low_caller_save || high_caller_save) {
- reg = i;
- // Keep looking to try to get both parts in caller-save registers.
- }
- }
- }
- } else {
- // Not a pair interval.
- reg = FindFirstZeroInConflictMask(conflict_mask);
-
- // Try to use caller-save registers first.
- for (size_t i = 0; i < num_regs_; ++i) {
- if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) {
- reg = i;
- break;
- }
- }
- }
-
- // Last-chance coalescing.
- for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
- if (opportunity->stage == CoalesceStage::kDefunct) {
- continue;
- }
- LiveInterval* other_interval = opportunity->node_a->GetAlias() == node
- ? opportunity->node_b->GetAlias()->GetInterval()
- : opportunity->node_a->GetAlias()->GetInterval();
- if (other_interval->HasRegister()) {
- size_t coalesce_register = other_interval->GetRegister();
- if (interval->HasHighInterval()) {
- if (!conflict_mask[coalesce_register] &&
- !conflict_mask[coalesce_register + 1] &&
- RegisterIsAligned(coalesce_register)) {
- reg = coalesce_register;
- break;
- }
- } else if (!conflict_mask[coalesce_register]) {
- reg = coalesce_register;
- break;
- }
- }
- }
- }
-
- if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) {
- // Assign register.
- DCHECK(!interval->HasRegister());
- interval->SetRegister(reg);
- colored_intervals.push_back(interval);
- if (interval->HasHighInterval()) {
- DCHECK(!interval->GetHighInterval()->HasRegister());
- interval->GetHighInterval()->SetRegister(reg + 1);
- colored_intervals.push_back(interval->GetHighInterval());
- }
- } else if (interval->RequiresRegister()) {
- // The interference graph is too dense to color. Make it sparser by
- // splitting this live interval.
- successful = false;
- register_allocator_->SplitAtRegisterUses(interval);
- // We continue coloring, because there may be additional intervals that cannot
- // be colored, and that we should split.
- } else {
- // Spill.
- node->SetNeedsSpillSlot();
- }
- }
-
- // If unsuccessful, reset all register assignments.
- if (!successful) {
- for (LiveInterval* interval : colored_intervals) {
- interval->ClearRegister();
- }
- }
-
- return successful;
-}
-
-void RegisterAllocatorGraphColor::AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes) {
- // The register allocation resolver will organize the stack based on value type,
- // so we assign stack slots for each value type separately.
- ScopedArenaAllocator allocator(allocator_->GetArenaStack());
- ScopedArenaAllocatorAdapter<void> adapter = allocator.Adapter(kArenaAllocRegisterAllocator);
- ScopedArenaVector<LiveInterval*> double_intervals(adapter);
- ScopedArenaVector<LiveInterval*> long_intervals(adapter);
- ScopedArenaVector<LiveInterval*> float_intervals(adapter);
- ScopedArenaVector<LiveInterval*> int_intervals(adapter);
-
- // The set of parent intervals already handled.
- ScopedArenaSet<LiveInterval*> seen(adapter);
-
- // Find nodes that need spill slots.
- for (InterferenceNode* node : nodes) {
- if (!node->NeedsSpillSlot()) {
- continue;
- }
-
- LiveInterval* parent = node->GetInterval()->GetParent();
- if (seen.find(parent) != seen.end()) {
- // We've already handled this interval.
- // This can happen if multiple siblings of the same interval request a stack slot.
- continue;
- }
- seen.insert(parent);
-
- HInstruction* defined_by = parent->GetDefinedBy();
- if (parent->HasSpillSlot()) {
- // We already have a spill slot for this value that we can reuse.
- } else if (defined_by->IsParameterValue()) {
- // Parameters already have a stack slot.
- parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
- } else if (defined_by->IsCurrentMethod()) {
- // The current method is always at stack slot 0.
- parent->SetSpillSlot(0);
- } else if (defined_by->IsConstant()) {
- // Constants don't need a spill slot.
- } else {
- // We need to find a spill slot for this interval. Place it in the correct
- // worklist to be processed later.
- switch (node->GetInterval()->GetType()) {
- case DataType::Type::kFloat64:
- double_intervals.push_back(parent);
- break;
- case DataType::Type::kInt64:
- long_intervals.push_back(parent);
- break;
- case DataType::Type::kFloat32:
- float_intervals.push_back(parent);
- break;
- case DataType::Type::kReference:
- case DataType::Type::kInt32:
- case DataType::Type::kUint16:
- case DataType::Type::kUint8:
- case DataType::Type::kInt8:
- case DataType::Type::kBool:
- case DataType::Type::kInt16:
- int_intervals.push_back(parent);
- break;
- case DataType::Type::kUint32:
- case DataType::Type::kUint64:
- case DataType::Type::kVoid:
- LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType();
- UNREACHABLE();
- }
- }
- }
-
- // Color spill slots for each value type.
- ColorSpillSlots(ArrayRef<LiveInterval* const>(double_intervals), &num_double_spill_slots_);
- ColorSpillSlots(ArrayRef<LiveInterval* const>(long_intervals), &num_long_spill_slots_);
- ColorSpillSlots(ArrayRef<LiveInterval* const>(float_intervals), &num_float_spill_slots_);
- ColorSpillSlots(ArrayRef<LiveInterval* const>(int_intervals), &num_int_spill_slots_);
-}
-
-void RegisterAllocatorGraphColor::ColorSpillSlots(ArrayRef<LiveInterval* const> intervals,
- /* out */ size_t* num_stack_slots_used) {
- // We cannot use the original interference graph here because spill slots are assigned to
- // all of the siblings of an interval, whereas an interference node represents only a single
- // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints
- // by position, and assigning the lowest spill slot available when we encounter an interval
- // beginning. We ignore lifetime holes for simplicity.
- ScopedArenaAllocator allocator(allocator_->GetArenaStack());
- ScopedArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
- allocator.Adapter(kArenaAllocRegisterAllocator));
-
- for (LiveInterval* parent_interval : intervals) {
- DCHECK(parent_interval->IsParent());
- DCHECK(!parent_interval->HasSpillSlot());
- size_t start = parent_interval->GetStart();
- size_t end = parent_interval->GetLastSibling()->GetEnd();
- DCHECK_LT(start, end);
- interval_endpoints.push_back(std::make_tuple(start, true, parent_interval));
- interval_endpoints.push_back(std::make_tuple(end, false, parent_interval));
- }
-
- // Sort by position.
- // We explicitly ignore the third entry of each tuple (the interval pointer) in order
- // to maintain determinism.
- std::sort(interval_endpoints.begin(), interval_endpoints.end(),
- [] (const std::tuple<size_t, bool, LiveInterval*>& lhs,
- const std::tuple<size_t, bool, LiveInterval*>& rhs) {
- return std::tie(std::get<0>(lhs), std::get<1>(lhs))
- < std::tie(std::get<0>(rhs), std::get<1>(rhs));
- });
-
- ArenaBitVector taken(&allocator, 0, true, kArenaAllocRegisterAllocator);
- for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) {
- // Extract information from the current tuple.
- LiveInterval* parent_interval;
- bool is_interval_beginning;
- size_t position;
- std::tie(position, is_interval_beginning, parent_interval) = *it;
- size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded();
-
- if (is_interval_beginning) {
- DCHECK(!parent_interval->HasSpillSlot());
- DCHECK_EQ(position, parent_interval->GetStart());
-
- // Find first available free stack slot(s).
- size_t slot = 0;
- for (; ; ++slot) {
- bool found = true;
- for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
- if (taken.IsBitSet(s)) {
- found = false;
- break; // failure
- }
- }
- if (found) {
- break; // success
- }
- }
-
- parent_interval->SetSpillSlot(slot);
-
- *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed);
- if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) {
- // The parallel move resolver requires that there be an even number of spill slots
- // allocated for pair value types.
- ++(*num_stack_slots_used);
- }
-
- for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
- taken.SetBit(s);
- }
- } else {
- DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
- DCHECK(parent_interval->HasSpillSlot());
-
- // Free up the stack slot(s) used by this interval.
- size_t slot = parent_interval->GetSpillSlot();
- for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
- DCHECK(taken.IsBitSet(s));
- taken.ClearBit(s);
- }
- }
- }
- DCHECK_EQ(taken.NumSetBits(), 0u);
-}
-
-} // namespace art
diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h
deleted file mode 100644
index 0e10152049..0000000000
--- a/compiler/optimizing/register_allocator_graph_color.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
-#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
-
-#include "arch/instruction_set.h"
-#include "base/arena_object.h"
-#include "base/array_ref.h"
-#include "base/macros.h"
-#include "base/scoped_arena_containers.h"
-#include "register_allocator.h"
-
-namespace art HIDDEN {
-
-class CodeGenerator;
-class HBasicBlock;
-class HGraph;
-class HInstruction;
-class HParallelMove;
-class Location;
-class SsaLivenessAnalysis;
-class InterferenceNode;
-struct CoalesceOpportunity;
-enum class CoalesceKind;
-
-/**
- * A graph coloring register allocator.
- *
- * The algorithm proceeds as follows:
- * (1) Build an interference graph, where nodes represent live intervals, and edges represent
- * interferences between two intervals. Coloring this graph with k colors is isomorphic to
- * finding a valid register assignment with k registers.
- * (2) To color the graph, first prune all nodes with degree less than k, since these nodes are
- * guaranteed a color. (No matter how we color their adjacent nodes, we can give them a
- * different color.) As we prune nodes from the graph, more nodes may drop below degree k,
- * enabling further pruning. The key is to maintain the pruning order in a stack, so that we
- * can color the nodes in the reverse order.
- * When there are no more nodes with degree less than k, we start pruning alternate nodes based
- * on heuristics. Since these nodes are not guaranteed a color, we are careful to
- * prioritize nodes that require a register. We also prioritize short intervals, because
- * short intervals cannot be split very much if coloring fails (see below). "Prioritizing"
- * a node amounts to pruning it later, since it will have fewer interferences if we prune other
- * nodes first.
- * (3) We color nodes in the reverse order in which we pruned them. If we cannot assign
- * a node a color, we do one of two things:
- * - If the node requires a register, we consider the current coloring attempt a failure.
- * However, we split the node's live interval in order to make the interference graph
- * sparser, so that future coloring attempts may succeed.
- * - If the node does not require a register, we simply assign it a location on the stack.
- *
- * If iterative move coalescing is enabled, the algorithm also attempts to conservatively
- * combine nodes in the graph that would prefer to have the same color. (For example, the output
- * of a phi instruction would prefer to have the same register as at least one of its inputs.)
- * There are several additional steps involved with this:
- * - We look for coalesce opportunities by examining each live interval, a step similar to that
- * used by linear scan when looking for register hints.
- * - When pruning the graph, we maintain a worklist of coalesce opportunities, as well as a worklist
- * of low degree nodes that have associated coalesce opportunities. Only when we run out of
- * coalesce opportunities do we start pruning coalesce-associated nodes.
- * - When pruning a node, if any nodes transition from high degree to low degree, we add
- * associated coalesce opportunities to the worklist, since these opportunities may now succeed.
- * - Whether two nodes can be combined is decided by two different heuristics--one used when
- * coalescing uncolored nodes, and one used for coalescing an uncolored node with a colored node.
- * It is vital that we only combine two nodes if the node that remains is guaranteed to receive
- * a color. This is because additionally spilling is more costly than failing to coalesce.
- * - Even if nodes are not coalesced while pruning, we keep the coalesce opportunities around
- * to be used as last-chance register hints when coloring. If nothing else, we try to use
- * caller-save registers before callee-save registers.
- *
- * A good reference for graph coloring register allocation is
- * "Modern Compiler Implementation in Java" (Andrew W. Appel, 2nd Edition).
- */
-class RegisterAllocatorGraphColor : public RegisterAllocator {
- public:
- RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator,
- CodeGenerator* codegen,
- const SsaLivenessAnalysis& analysis,
- bool iterative_move_coalescing = true);
- ~RegisterAllocatorGraphColor() override;
-
- void AllocateRegisters() override;
-
- bool Validate(bool log_fatal_on_failure) override;
-
- private:
- // Collect all intervals and prepare for register allocation.
- void ProcessInstructions();
- void ProcessInstruction(HInstruction* instruction);
-
- // If any inputs require specific registers, block those registers
- // at the position of this instruction.
- void CheckForFixedInputs(HInstruction* instruction);
-
- // If the output of an instruction requires a specific register, split
- // the interval and assign the register to the first part.
- void CheckForFixedOutput(HInstruction* instruction);
-
- // Add all applicable safepoints to a live interval.
- // Currently depends on instruction processing order.
- void AddSafepointsFor(HInstruction* instruction);
-
- // Collect all live intervals associated with the temporary locations
- // needed by an instruction.
- void CheckForTempLiveIntervals(HInstruction* instruction);
-
- // If a safe point is needed, add a synthesized interval to later record
- // the number of live registers at this point.
- void CheckForSafepoint(HInstruction* instruction);
-
- // Try to remove the SuspendCheck at function entry. Returns true if it was successful.
- bool TryRemoveSuspendCheckEntry(HInstruction* instruction);
-
- // Split an interval, but only if `position` is inside of `interval`.
- // Return either the new interval, or the original interval if not split.
- static LiveInterval* TrySplit(LiveInterval* interval, size_t position);
-
- // To ensure every graph can be colored, split live intervals
- // at their register defs and uses. This creates short intervals with low
- // degree in the interference graph, which are prioritized during graph
- // coloring.
- void SplitAtRegisterUses(LiveInterval* interval);
-
- // If the given instruction is a catch phi, give it a spill slot.
- void AllocateSpillSlotForCatchPhi(HInstruction* instruction);
-
- // Ensure that the given register cannot be allocated for a given range.
- void BlockRegister(Location location, size_t start, size_t end);
- void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
-
- bool IsCallerSave(size_t reg, bool processing_core_regs);
-
- // Assigns stack slots to a list of intervals, ensuring that interfering intervals are not
- // assigned the same stack slot.
- void ColorSpillSlots(ArrayRef<LiveInterval* const> nodes, /* out */ size_t* num_stack_slots_used);
-
- // Provide stack slots to nodes that need them.
- void AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes);
-
- // Whether iterative move coalescing should be performed. Iterative move coalescing
- // improves code quality, but increases compile time.
- const bool iterative_move_coalescing_;
-
- // Live intervals, split by kind (core and floating point).
- // These should not contain high intervals, as those are represented by
- // the corresponding low interval throughout register allocation.
- ScopedArenaVector<LiveInterval*> core_intervals_;
- ScopedArenaVector<LiveInterval*> fp_intervals_;
-
- // Intervals for temporaries, saved for special handling in the resolution phase.
- ScopedArenaVector<LiveInterval*> temp_intervals_;
-
- // Safepoints, saved for special handling while processing instructions.
- ScopedArenaVector<HInstruction*> safepoints_;
-
- // Interference nodes representing specific registers. These are "pre-colored" nodes
- // in the interference graph.
- ScopedArenaVector<InterferenceNode*> physical_core_nodes_;
- ScopedArenaVector<InterferenceNode*> physical_fp_nodes_;
-
- // Allocated stack slot counters.
- size_t num_int_spill_slots_;
- size_t num_double_spill_slots_;
- size_t num_float_spill_slots_;
- size_t num_long_spill_slots_;
- size_t catch_phi_spill_slot_counter_;
-
- // Number of stack slots needed for the pointer to the current method.
- // This is 1 for 32-bit architectures, and 2 for 64-bit architectures.
- const size_t reserved_art_method_slots_;
-
- // Number of stack slots needed for outgoing arguments.
- const size_t reserved_out_slots_;
-
- friend class ColoringIteration;
-
- DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorGraphColor);
-};
-
-} // namespace art
-
-#endif // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index fcdaa2d34f..a3029f56c6 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -355,9 +355,14 @@ void RegisterAllocatorLinearScan::CheckForFixedInputs(HInstruction* instruction)
Location input = locations->InAt(i);
if (input.IsRegister() || input.IsFpuRegister()) {
BlockRegister(input, position, position + 1);
+ // Ensure that an explicit input register is marked as being allocated.
+ codegen_->AddAllocatedRegister(input);
} else if (input.IsPair()) {
BlockRegister(input.ToLow(), position, position + 1);
BlockRegister(input.ToHigh(), position, position + 1);
+ // Ensure that an explicit input register pair is marked as being allocated.
+ codegen_->AddAllocatedRegister(input.ToLow());
+ codegen_->AddAllocatedRegister(input.ToHigh());
}
}
}
@@ -417,6 +422,8 @@ void RegisterAllocatorLinearScan::CheckForFixedOutput(HInstruction* instruction)
current->SetFrom(position + 1);
current->SetRegister(output.reg());
BlockRegister(output, position, position + 1);
+ // Ensure that an explicit output register is marked as being allocated.
+ codegen_->AddAllocatedRegister(output);
} else if (output.IsPair()) {
current->SetFrom(position + 1);
current->SetRegister(output.low());
@@ -425,6 +432,9 @@ void RegisterAllocatorLinearScan::CheckForFixedOutput(HInstruction* instruction)
high->SetFrom(position + 1);
BlockRegister(output.ToLow(), position, position + 1);
BlockRegister(output.ToHigh(), position, position + 1);
+ // Ensure that an explicit output register pair is marked as being allocated.
+ codegen_->AddAllocatedRegister(output.ToLow());
+ codegen_->AddAllocatedRegister(output.ToHigh());
} else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
current->SetSpillSlot(output.GetStackIndex());
} else {
@@ -1208,8 +1218,7 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) {
LiveInterval* interval = phi->GetLiveInterval();
HInstruction* previous_phi = phi->GetPrevious();
- DCHECK(previous_phi == nullptr ||
- previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
+ DCHECK(previous_phi == nullptr || previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
<< "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent.";
if (phi->IsVRegEquivalentOf(previous_phi)) {
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index d316aa5dc2..0d2d20682d 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -84,7 +84,8 @@ class RegisterAllocatorTest : public CommonCompilerTest, public OptimizingUnitTe
TEST_F(RegisterAllocatorTest, test_name##_LinearScan) {\
test_name(Strategy::kRegisterAllocatorLinearScan);\
}\
-TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\
+/* Note: Graph coloring register allocator has been removed, so the test is DISABLED. */ \
+TEST_F(RegisterAllocatorTest, DISABLED_##test_name##_GraphColor) {\
test_name(Strategy::kRegisterAllocatorGraphColor);\
}
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index 116f52605e..4c68844dbb 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -108,7 +108,6 @@ static bool IsArrayAccess(const HInstruction* instruction) {
static bool IsInstanceFieldAccess(const HInstruction* instruction) {
return instruction->IsInstanceFieldGet() ||
instruction->IsInstanceFieldSet() ||
- instruction->IsPredicatedInstanceFieldGet() ||
instruction->IsUnresolvedInstanceFieldGet() ||
instruction->IsUnresolvedInstanceFieldSet();
}
@@ -123,7 +122,6 @@ static bool IsStaticFieldAccess(const HInstruction* instruction) {
static bool IsResolvedFieldAccess(const HInstruction* instruction) {
return instruction->IsInstanceFieldGet() ||
instruction->IsInstanceFieldSet() ||
- instruction->IsPredicatedInstanceFieldGet() ||
instruction->IsStaticFieldGet() ||
instruction->IsStaticFieldSet();
}
@@ -149,9 +147,7 @@ size_t SideEffectDependencyAnalysis::MemoryDependencyAnalysis::FieldAccessHeapLo
DCHECK(GetFieldInfo(instr) != nullptr);
DCHECK(heap_location_collector_ != nullptr);
- HInstruction* ref = instr->IsPredicatedInstanceFieldGet()
- ? instr->AsPredicatedInstanceFieldGet()->GetTarget()
- : instr->InputAt(0);
+ HInstruction* ref = instr->InputAt(0);
size_t heap_loc = heap_location_collector_->GetFieldHeapLocation(ref, GetFieldInfo(instr));
// This field access should be analyzed and added to HeapLocationCollector before.
DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound);
@@ -490,9 +486,9 @@ SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition(
DCHECK(instruction != nullptr);
if (instruction->IsIf()) {
- condition = instruction->AsIf()->InputAt(0)->AsCondition();
+ condition = instruction->AsIf()->InputAt(0)->AsConditionOrNull();
} else if (instruction->IsSelect()) {
- condition = instruction->AsSelect()->GetCondition()->AsCondition();
+ condition = instruction->AsSelect()->GetCondition()->AsConditionOrNull();
}
SchedulingNode* condition_node = (condition != nullptr) ? graph.GetNode(condition) : nullptr;
@@ -554,7 +550,7 @@ void HScheduler::Schedule(HGraph* graph) {
// should run the analysis or not.
const HeapLocationCollector* heap_location_collector = nullptr;
ScopedArenaAllocator allocator(graph->GetArenaStack());
- LoadStoreAnalysis lsa(graph, /*stats=*/nullptr, &allocator, LoadStoreAnalysisType::kBasic);
+ LoadStoreAnalysis lsa(graph, /*stats=*/nullptr, &allocator);
if (!only_optimize_loop_blocks_ || graph->HasLoops()) {
lsa.Run();
heap_location_collector = &lsa.GetHeapLocationCollector();
@@ -734,8 +730,6 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
instruction->IsCurrentMethod() ||
instruction->IsDivZeroCheck() ||
(instruction->IsInstanceFieldGet() && !instruction->AsInstanceFieldGet()->IsVolatile()) ||
- (instruction->IsPredicatedInstanceFieldGet() &&
- !instruction->AsPredicatedInstanceFieldGet()->IsVolatile()) ||
(instruction->IsInstanceFieldSet() && !instruction->AsInstanceFieldSet()->IsVolatile()) ||
instruction->IsInstanceOf() ||
instruction->IsInvokeInterface() ||
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
index 3f931c4c49..cafb0f5da6 100644
--- a/compiler/optimizing/scheduler_arm.cc
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -610,7 +610,7 @@ void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifte
}
}
-void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitIntermediateAddress([[maybe_unused]] HIntermediateAddress*) {
// Although the code generated is a simple `add` instruction, we found through empirical results
// that spacing it from its use in memory accesses was beneficial.
last_visited_internal_latency_ = kArmNopLatency;
@@ -618,11 +618,11 @@ void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress*
}
void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex(
- HIntermediateAddressIndex* ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HIntermediateAddressIndex*) {
UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM";
}
-void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate([[maybe_unused]] HMultiplyAccumulate*) {
last_visited_latency_ = kArmMulIntegerLatency;
}
@@ -669,7 +669,7 @@ void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) {
}
case DataType::Type::kReference: {
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency;
} else {
if (index->IsConstant()) {
@@ -806,7 +806,7 @@ void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) {
}
}
-void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitBoundsCheck([[maybe_unused]] HBoundsCheck*) {
last_visited_internal_latency_ = kArmIntegerOpLatency;
// Users do not use any data results.
last_visited_latency_ = 0;
@@ -853,11 +853,6 @@ void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) {
}
}
-void SchedulingLatencyVisitorARM::VisitPredicatedInstanceFieldGet(
- HPredicatedInstanceFieldGet* instruction) {
- HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
-}
-
void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
}
@@ -866,22 +861,22 @@ void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instr
HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
}
-void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitInstanceOf([[maybe_unused]] HInstanceOf*) {
last_visited_internal_latency_ = kArmCallInternalLatency;
last_visited_latency_ = kArmIntegerOpLatency;
}
-void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitInvoke([[maybe_unused]] HInvoke*) {
last_visited_internal_latency_ = kArmCallInternalLatency;
last_visited_latency_ = kArmCallLatency;
}
-void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitLoadString([[maybe_unused]] HLoadString*) {
last_visited_internal_latency_ = kArmLoadStringInternalLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
}
-void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitNewArray([[maybe_unused]] HNewArray*) {
last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency;
last_visited_latency_ = kArmCallLatency;
}
@@ -918,9 +913,7 @@ void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) {
void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction,
const FieldInfo& field_info) {
- DCHECK(instruction->IsInstanceFieldGet() ||
- instruction->IsStaticFieldGet() ||
- instruction->IsPredicatedInstanceFieldGet());
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
DCHECK(codegen_ != nullptr);
bool is_volatile = field_info.IsVolatile();
DataType::Type field_type = field_info.GetFieldType();
@@ -937,7 +930,7 @@ void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruct
break;
case DataType::Type::kReference:
- if (gUseReadBarrier && kUseBakerReadBarrier) {
+ if (codegen_->EmitBakerReadBarrier()) {
last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
index 0da21c187f..cf00fa12a3 100644
--- a/compiler/optimizing/scheduler_arm.h
+++ b/compiler/optimizing/scheduler_arm.h
@@ -53,7 +53,7 @@ class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor {
: codegen_(down_cast<CodeGeneratorARMVIXL*>(codegen)) {}
// Default visitor for instructions not handled specifically below.
- void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
+ void VisitInstruction([[maybe_unused]] HInstruction*) override {
last_visited_latency_ = kArmIntegerOpLatency;
}
@@ -77,7 +77,6 @@ class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor {
M(Condition, unused) \
M(Compare, unused) \
M(BoundsCheck, unused) \
- M(PredicatedInstanceFieldGet, unused) \
M(InstanceFieldGet, unused) \
M(InstanceFieldSet, unused) \
M(InstanceOf, unused) \
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index 3071afd951..5113cf446d 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -30,30 +30,30 @@ void SchedulingLatencyVisitorARM64::VisitBinaryOperation(HBinaryOperation* instr
}
void SchedulingLatencyVisitorARM64::VisitBitwiseNegatedRight(
- HBitwiseNegatedRight* ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HBitwiseNegatedRight*) {
last_visited_latency_ = kArm64IntegerOpLatency;
}
void SchedulingLatencyVisitorARM64::VisitDataProcWithShifterOp(
- HDataProcWithShifterOp* ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HDataProcWithShifterOp*) {
last_visited_latency_ = kArm64DataProcWithShifterOpLatency;
}
void SchedulingLatencyVisitorARM64::VisitIntermediateAddress(
- HIntermediateAddress* ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HIntermediateAddress*) {
// Although the code generated is a simple `add` instruction, we found through empirical results
// that spacing it from its use in memory accesses was beneficial.
last_visited_latency_ = kArm64IntegerOpLatency + 2;
}
void SchedulingLatencyVisitorARM64::VisitIntermediateAddressIndex(
- HIntermediateAddressIndex* instr ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HIntermediateAddressIndex* instr) {
// Although the code generated is a simple `add` instruction, we found through empirical results
// that spacing it from its use in memory accesses was beneficial.
last_visited_latency_ = kArm64DataProcWithShifterOpLatency + 2;
}
-void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate([[maybe_unused]] HMultiplyAccumulate*) {
last_visited_latency_ = kArm64MulIntegerLatency;
}
@@ -65,15 +65,15 @@ void SchedulingLatencyVisitorARM64::VisitArrayGet(HArrayGet* instruction) {
last_visited_latency_ = kArm64MemoryLoadLatency;
}
-void SchedulingLatencyVisitorARM64::VisitArrayLength(HArrayLength* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitArrayLength([[maybe_unused]] HArrayLength*) {
last_visited_latency_ = kArm64MemoryLoadLatency;
}
-void SchedulingLatencyVisitorARM64::VisitArraySet(HArraySet* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitArraySet([[maybe_unused]] HArraySet*) {
last_visited_latency_ = kArm64MemoryStoreLatency;
}
-void SchedulingLatencyVisitorARM64::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitBoundsCheck([[maybe_unused]] HBoundsCheck*) {
last_visited_internal_latency_ = kArm64IntegerOpLatency;
// Users do not use any data results.
last_visited_latency_ = 0;
@@ -113,21 +113,21 @@ void SchedulingLatencyVisitorARM64::VisitDiv(HDiv* instr) {
}
}
-void SchedulingLatencyVisitorARM64::VisitInstanceFieldGet(HInstanceFieldGet* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitInstanceFieldGet([[maybe_unused]] HInstanceFieldGet*) {
last_visited_latency_ = kArm64MemoryLoadLatency;
}
-void SchedulingLatencyVisitorARM64::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitInstanceOf([[maybe_unused]] HInstanceOf*) {
last_visited_internal_latency_ = kArm64CallInternalLatency;
last_visited_latency_ = kArm64IntegerOpLatency;
}
-void SchedulingLatencyVisitorARM64::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitInvoke([[maybe_unused]] HInvoke*) {
last_visited_internal_latency_ = kArm64CallInternalLatency;
last_visited_latency_ = kArm64CallLatency;
}
-void SchedulingLatencyVisitorARM64::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitLoadString([[maybe_unused]] HLoadString*) {
last_visited_internal_latency_ = kArm64LoadStringInternalLatency;
last_visited_latency_ = kArm64MemoryLoadLatency;
}
@@ -138,7 +138,7 @@ void SchedulingLatencyVisitorARM64::VisitMul(HMul* instr) {
: kArm64MulIntegerLatency;
}
-void SchedulingLatencyVisitorARM64::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitNewArray([[maybe_unused]] HNewArray*) {
last_visited_internal_latency_ = kArm64IntegerOpLatency + kArm64CallInternalLatency;
last_visited_latency_ = kArm64CallLatency;
}
@@ -181,7 +181,7 @@ void SchedulingLatencyVisitorARM64::VisitRem(HRem* instruction) {
}
}
-void SchedulingLatencyVisitorARM64::VisitStaticFieldGet(HStaticFieldGet* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitStaticFieldGet([[maybe_unused]] HStaticFieldGet*) {
last_visited_latency_ = kArm64MemoryLoadLatency;
}
@@ -211,7 +211,7 @@ void SchedulingLatencyVisitorARM64::HandleSimpleArithmeticSIMD(HVecOperation *in
}
void SchedulingLatencyVisitorARM64::VisitVecReplicateScalar(
- HVecReplicateScalar* instr ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HVecReplicateScalar* instr) {
last_visited_latency_ = kArm64SIMDReplicateOpLatency;
}
@@ -223,7 +223,7 @@ void SchedulingLatencyVisitorARM64::VisitVecReduce(HVecReduce* instr) {
HandleSimpleArithmeticSIMD(instr);
}
-void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitVecCnv([[maybe_unused]] HVecCnv* instr) {
last_visited_latency_ = kArm64SIMDTypeConversionInt2FPLatency;
}
@@ -279,19 +279,19 @@ void SchedulingLatencyVisitorARM64::VisitVecMax(HVecMax* instr) {
HandleSimpleArithmeticSIMD(instr);
}
-void SchedulingLatencyVisitorARM64::VisitVecAnd(HVecAnd* instr ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitVecAnd([[maybe_unused]] HVecAnd* instr) {
last_visited_latency_ = kArm64SIMDIntegerOpLatency;
}
-void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitVecAndNot([[maybe_unused]] HVecAndNot* instr) {
last_visited_latency_ = kArm64SIMDIntegerOpLatency;
}
-void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitVecOr([[maybe_unused]] HVecOr* instr) {
last_visited_latency_ = kArm64SIMDIntegerOpLatency;
}
-void SchedulingLatencyVisitorARM64::VisitVecXor(HVecXor* instr ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitVecXor([[maybe_unused]] HVecXor* instr) {
last_visited_latency_ = kArm64SIMDIntegerOpLatency;
}
@@ -312,13 +312,12 @@ void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) {
}
void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate(
- HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) {
+ [[maybe_unused]] HVecMultiplyAccumulate* instr) {
last_visited_latency_ = kArm64SIMDMulIntegerLatency;
}
-void SchedulingLatencyVisitorARM64::HandleVecAddress(
- HVecMemoryOperation* instruction,
- size_t size ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::HandleVecAddress(HVecMemoryOperation* instruction,
+ [[maybe_unused]] size_t size) {
HInstruction* index = instruction->InputAt(1);
if (!index->IsConstant()) {
last_visited_internal_latency_ += kArm64DataProcWithShifterOpLatency;
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index ec41577e9d..7ce00e00ab 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -59,7 +59,7 @@ static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;
class SchedulingLatencyVisitorARM64 final : public SchedulingLatencyVisitor {
public:
// Default visitor for instructions not handled specifically below.
- void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
+ void VisitInstruction([[maybe_unused]] HInstruction*) override {
last_visited_latency_ = kArm64IntegerOpLatency;
}
diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc
index 165bfe3d94..c2b1fd6f7c 100644
--- a/compiler/optimizing/scheduler_test.cc
+++ b/compiler/optimizing/scheduler_test.cc
@@ -274,8 +274,7 @@ class SchedulerTest : public CommonCompilerTest, public OptimizingUnitTestHelper
entry->AddInstruction(instr);
}
- HeapLocationCollector heap_location_collector(
- graph_, GetScopedAllocator(), LoadStoreAnalysisType::kBasic);
+ HeapLocationCollector heap_location_collector(graph_, GetScopedAllocator());
heap_location_collector.VisitBasicBlock(entry);
heap_location_collector.BuildAliasingMatrix();
TestSchedulingGraph scheduling_graph(GetScopedAllocator(), &heap_location_collector);
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index 6a10440d11..07065efbb7 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -46,8 +46,7 @@ static bool IsSimpleBlock(HBasicBlock* block) {
} else if (instruction->CanBeMoved() &&
!instruction->HasSideEffects() &&
!instruction->CanThrow()) {
- if (instruction->IsSelect() &&
- instruction->AsSelect()->GetCondition()->GetBlock() == block) {
+ if (instruction->IsSelect() && instruction->AsSelect()->GetCondition()->GetBlock() == block) {
// Count one HCondition and HSelect in the same block as a single instruction.
// This enables finding nested selects.
continue;
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index a658252e69..2179bf50b5 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -604,7 +604,7 @@ GraphAnalysisResult SsaBuilder::BuildSsa() {
*/
HFloatConstant* SsaBuilder::GetFloatEquivalent(HIntConstant* constant) {
// We place the floating point constant next to this constant.
- HFloatConstant* result = constant->GetNext()->AsFloatConstant();
+ HFloatConstant* result = constant->GetNext()->AsFloatConstantOrNull();
if (result == nullptr) {
float value = bit_cast<float, int32_t>(constant->GetValue());
result = new (graph_->GetAllocator()) HFloatConstant(value);
@@ -626,7 +626,7 @@ HFloatConstant* SsaBuilder::GetFloatEquivalent(HIntConstant* constant) {
*/
HDoubleConstant* SsaBuilder::GetDoubleEquivalent(HLongConstant* constant) {
// We place the floating point constant next to this constant.
- HDoubleConstant* result = constant->GetNext()->AsDoubleConstant();
+ HDoubleConstant* result = constant->GetNext()->AsDoubleConstantOrNull();
if (result == nullptr) {
double value = bit_cast<double, int64_t>(constant->GetValue());
result = new (graph_->GetAllocator()) HDoubleConstant(value);
@@ -652,16 +652,16 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, DataType::
// We place the floating point /reference phi next to this phi.
HInstruction* next = phi->GetNext();
- if (next != nullptr
- && next->AsPhi()->GetRegNumber() == phi->GetRegNumber()
- && next->GetType() != type) {
+ if (next != nullptr &&
+ next->AsPhi()->GetRegNumber() == phi->GetRegNumber() &&
+ next->GetType() != type) {
// Move to the next phi to see if it is the one we are looking for.
next = next->GetNext();
}
- if (next == nullptr
- || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())
- || (next->GetType() != type)) {
+ if (next == nullptr ||
+ (next->AsPhi()->GetRegNumber() != phi->GetRegNumber()) ||
+ (next->GetType() != type)) {
ArenaAllocator* allocator = graph_->GetAllocator();
HInputsRef inputs = phi->GetInputs();
HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), inputs.size(), type);
diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc
index 2df0f34c7d..18c945381d 100644
--- a/compiler/optimizing/ssa_liveness_analysis_test.cc
+++ b/compiler/optimizing/ssa_liveness_analysis_test.cc
@@ -31,6 +31,7 @@ namespace art HIDDEN {
class SsaLivenessAnalysisTest : public OptimizingUnitTest {
protected:
void SetUp() override {
+ TEST_SETUP_DISABLED_FOR_RISCV64();
OptimizingUnitTest::SetUp();
graph_ = CreateGraph();
compiler_options_ = CommonCompilerTest::CreateCompilerOptions(kRuntimeISA, "default");
@@ -42,6 +43,11 @@ class SsaLivenessAnalysisTest : public OptimizingUnitTest {
graph_->SetEntryBlock(entry_);
}
+ void TearDown() override {
+ TEST_TEARDOWN_DISABLED_FOR_RISCV64();
+ OptimizingUnitTest::TearDown();
+ }
+
protected:
HBasicBlock* CreateSuccessor(HBasicBlock* block) {
HGraph* graph = block->GetGraph();
@@ -58,6 +64,7 @@ class SsaLivenessAnalysisTest : public OptimizingUnitTest {
};
TEST_F(SsaLivenessAnalysisTest, TestReturnArg) {
+ TEST_DISABLED_FOR_RISCV64();
HInstruction* arg = new (GetAllocator()) HParameterValue(
graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32);
entry_->AddInstruction(arg);
@@ -78,6 +85,7 @@ TEST_F(SsaLivenessAnalysisTest, TestReturnArg) {
}
TEST_F(SsaLivenessAnalysisTest, TestAput) {
+ TEST_DISABLED_FOR_RISCV64();
HInstruction* array = new (GetAllocator()) HParameterValue(
graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference);
HInstruction* index = new (GetAllocator()) HParameterValue(
@@ -147,6 +155,7 @@ TEST_F(SsaLivenessAnalysisTest, TestAput) {
}
TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) {
+ TEST_DISABLED_FOR_RISCV64();
HInstruction* array = new (GetAllocator()) HParameterValue(
graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference);
HInstruction* index = new (GetAllocator()) HParameterValue(
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index ce343dffec..1d9be3956a 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -76,7 +76,7 @@ void SsaDeadPhiElimination::MarkDeadPhis() {
HPhi* phi = worklist.back();
worklist.pop_back();
for (HInstruction* raw_input : phi->GetInputs()) {
- HPhi* input = raw_input->AsPhi();
+ HPhi* input = raw_input->AsPhiOrNull();
if (input != nullptr && input->IsDead()) {
// Input is a dead phi. Revive it and add to the worklist. We make sure
// that the phi was not dead initially (see definition of `initially_live`).
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 1a368ed347..2ecda7610e 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -51,7 +51,8 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes,
size_t fp_spill_mask,
uint32_t num_dex_registers,
bool baseline,
- bool debuggable) {
+ bool debuggable,
+ bool has_should_deoptimize_flag) {
DCHECK(!in_method_) << "Mismatched Begin/End calls";
in_method_ = true;
DCHECK_EQ(packed_frame_size_, 0u) << "BeginMethod was already called";
@@ -63,6 +64,7 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes,
num_dex_registers_ = num_dex_registers;
baseline_ = baseline;
debuggable_ = debuggable;
+ has_should_deoptimize_flag_ = has_should_deoptimize_flag;
if (kVerifyStackMaps) {
dchecks_.emplace_back([=](const CodeInfo& code_info) {
@@ -152,8 +154,10 @@ void StackMapStream::BeginStackMapEntry(
// Create lambda method, which will be executed at the very end to verify data.
// Parameters and local variables will be captured(stored) by the lambda "[=]".
dchecks_.emplace_back([=](const CodeInfo& code_info) {
+ // The `native_pc_offset` may have been overridden using `SetStackMapNativePcOffset(.)`.
+ uint32_t final_native_pc_offset = GetStackMapNativePcOffset(stack_map_index);
if (kind == StackMap::Kind::Default || kind == StackMap::Kind::OSR) {
- StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset,
+ StackMap stack_map = code_info.GetStackMapForNativePcOffset(final_native_pc_offset,
instruction_set_);
CHECK_EQ(stack_map.Row(), stack_map_index);
} else if (kind == StackMap::Kind::Catch) {
@@ -162,7 +166,7 @@ void StackMapStream::BeginStackMapEntry(
CHECK_EQ(stack_map.Row(), stack_map_index);
}
StackMap stack_map = code_info.GetStackMapAt(stack_map_index);
- CHECK_EQ(stack_map.GetNativePcOffset(instruction_set_), native_pc_offset);
+ CHECK_EQ(stack_map.GetNativePcOffset(instruction_set_), final_native_pc_offset);
CHECK_EQ(stack_map.GetKind(), static_cast<uint32_t>(kind));
CHECK_EQ(stack_map.GetDexPc(), dex_pc);
CHECK_EQ(code_info.GetRegisterMaskOf(stack_map), register_mask);
@@ -374,10 +378,12 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() {
DCHECK(in_stack_map_ == false) << "Mismatched Begin/End calls";
DCHECK(in_inline_info_ == false) << "Mismatched Begin/End calls";
- uint32_t flags = (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0;
+ uint32_t flags = 0;
+ flags |= (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0;
flags |= baseline_ ? CodeInfo::kIsBaseline : 0;
flags |= debuggable_ ? CodeInfo::kIsDebuggable : 0;
- DCHECK_LE(flags, kVarintMax); // Ensure flags can be read directly as byte.
+ flags |= has_should_deoptimize_flag_ ? CodeInfo::kHasShouldDeoptimizeFlag : 0;
+
uint32_t bit_table_flags = 0;
ForEachBitTable([&bit_table_flags](size_t i, auto bit_table) {
if (bit_table->size() != 0) { // Record which bit-tables are stored.
@@ -409,6 +415,8 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() {
CHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size());
CHECK_EQ(CodeInfo::HasInlineInfo(buffer.data()), inline_infos_.size() > 0);
CHECK_EQ(CodeInfo::IsBaseline(buffer.data()), baseline_);
+ CHECK_EQ(CodeInfo::IsDebuggable(buffer.data()), debuggable_);
+ CHECK_EQ(CodeInfo::HasShouldDeoptimizeFlag(buffer.data()), has_should_deoptimize_flag_);
// Verify all written data (usually only in debug builds).
if (kVerifyStackMaps) {
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 643af2da94..f027850ce6 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -66,7 +66,8 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> {
size_t fp_spill_mask,
uint32_t num_dex_registers,
bool baseline,
- bool debuggable);
+ bool debuggable,
+ bool has_should_deoptimize_flag = false);
void EndMethod(size_t code_size);
void BeginStackMapEntry(
@@ -129,8 +130,9 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> {
uint32_t core_spill_mask_ = 0;
uint32_t fp_spill_mask_ = 0;
uint32_t num_dex_registers_ = 0;
- bool baseline_;
- bool debuggable_;
+ bool baseline_ = false;
+ bool debuggable_ = false;
+ bool has_should_deoptimize_flag_ = false;
BitTableBuilder<StackMap> stack_maps_;
BitTableBuilder<RegisterMask> register_masks_;
BitmapTableBuilder stack_masks_;
diff --git a/compiler/optimizing/write_barrier_elimination.cc b/compiler/optimizing/write_barrier_elimination.cc
index eb70b670fe..6182125b74 100644
--- a/compiler/optimizing/write_barrier_elimination.cc
+++ b/compiler/optimizing/write_barrier_elimination.cc
@@ -21,6 +21,9 @@
#include "base/scoped_arena_containers.h"
#include "optimizing/nodes.h"
+// TODO(b/310755375, solanes): Disable WBE while we investigate crashes.
+constexpr bool kWBEEnabled = false;
+
namespace art HIDDEN {
class WBEVisitor final : public HGraphVisitor {
@@ -153,8 +156,10 @@ class WBEVisitor final : public HGraphVisitor {
};
bool WriteBarrierElimination::Run() {
- WBEVisitor wbe_visitor(graph_, stats_);
- wbe_visitor.VisitReversePostOrder();
+ if (kWBEEnabled) {
+ WBEVisitor wbe_visitor(graph_, stats_);
+ wbe_visitor.VisitReversePostOrder();
+ }
return true;
}
diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc
index e266618980..d86869ce0f 100644
--- a/compiler/optimizing/x86_memory_gen.cc
+++ b/compiler/optimizing/x86_memory_gen.cc
@@ -33,7 +33,7 @@ class MemoryOperandVisitor final : public HGraphVisitor {
private:
void VisitBoundsCheck(HBoundsCheck* check) override {
// Replace the length by the array itself, so that we can do compares to memory.
- HArrayLength* array_len = check->InputAt(1)->AsArrayLength();
+ HArrayLength* array_len = check->InputAt(1)->AsArrayLengthOrNull();
// We only want to replace an ArrayLength.
if (array_len == nullptr) {