diff options
Diffstat (limited to 'compiler/optimizing')
127 files changed, 19551 insertions, 16393 deletions
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc index 703584c537..9da2bfb8ef 100644 --- a/compiler/optimizing/block_builder.cc +++ b/compiler/optimizing/block_builder.cc @@ -20,7 +20,6 @@ #include "dex/bytecode_utils.h" #include "dex/code_item_accessors-inl.h" #include "dex/dex_file_exception_helpers.h" -#include "quicken_info.h" namespace art HIDDEN { @@ -40,9 +39,7 @@ HBasicBlockBuilder::HBasicBlockBuilder(HGraph* graph, local_allocator->Adapter(kArenaAllocGraphBuilder)), throwing_blocks_(kDefaultNumberOfThrowingBlocks, local_allocator->Adapter(kArenaAllocGraphBuilder)), - number_of_branches_(0u), - quicken_index_for_dex_pc_(std::less<uint32_t>(), - local_allocator->Adapter(kArenaAllocGraphBuilder)) {} + number_of_branches_(0u) {} HBasicBlock* HBasicBlockBuilder::MaybeCreateBlockAt(uint32_t dex_pc) { return MaybeCreateBlockAt(dex_pc, dex_pc); @@ -147,7 +144,6 @@ void HBasicBlockBuilder::ConnectBasicBlocks() { HBasicBlock* block = graph_->GetEntryBlock(); graph_->AddBlock(block); - size_t quicken_index = 0; bool is_throwing_block = false; // Calculate the qucikening index here instead of CreateBranchTargets since it's easier to // calculate in dex_pc order. @@ -158,8 +154,6 @@ void HBasicBlockBuilder::ConnectBasicBlocks() { // Check if this dex_pc address starts a new basic block. HBasicBlock* next_block = GetBlockAt(dex_pc); if (next_block != nullptr) { - // We only need quicken index entries for basic block boundaries. - quicken_index_for_dex_pc_.Put(dex_pc, quicken_index); if (block != nullptr) { // Last instruction did not end its basic block but a new one starts here. // It must have been a block falling through into the next one. @@ -169,10 +163,6 @@ void HBasicBlockBuilder::ConnectBasicBlocks() { is_throwing_block = false; graph_->AddBlock(block); } - // Make sure to increment this before the continues. - if (QuickenInfoTable::NeedsIndexForInstruction(&instruction)) { - ++quicken_index; - } if (block == nullptr) { // Ignore dead code. @@ -483,8 +473,4 @@ void HBasicBlockBuilder::BuildIntrinsic() { body->AddSuccessor(exit_block); } -size_t HBasicBlockBuilder::GetQuickenIndex(uint32_t dex_pc) const { - return quicken_index_for_dex_pc_.Get(dex_pc); -} - } // namespace art diff --git a/compiler/optimizing/block_builder.h b/compiler/optimizing/block_builder.h index 8668ef8221..1aa9375e5a 100644 --- a/compiler/optimizing/block_builder.h +++ b/compiler/optimizing/block_builder.h @@ -45,8 +45,6 @@ class HBasicBlockBuilder : public ValueObject { size_t GetNumberOfBranches() const { return number_of_branches_; } HBasicBlock* GetBlockAt(uint32_t dex_pc) const { return branch_targets_[dex_pc]; } - size_t GetQuickenIndex(uint32_t dex_pc) const; - private: // Creates a basic block starting at given `dex_pc`. HBasicBlock* MaybeCreateBlockAt(uint32_t dex_pc); @@ -83,9 +81,6 @@ class HBasicBlockBuilder : public ValueObject { ScopedArenaVector<HBasicBlock*> throwing_blocks_; size_t number_of_branches_; - // A table to quickly find the quicken index for the first instruction of a basic block. - ScopedArenaSafeMap<uint32_t, uint32_t> quicken_index_for_dex_pc_; - static constexpr size_t kDefaultNumberOfThrowingBlocks = 2u; DISALLOW_COPY_AND_ASSIGN(HBasicBlockBuilder); diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 919abfdc49..c0d4c37659 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1047,14 +1047,14 @@ class BCEVisitor final : public HGraphVisitor { HDiv* div = nullptr; int64_t const_divisor = 0; - if (HMul* mul = instruction->GetRight()->AsMul()) { + if (HMul* mul = instruction->GetRight()->AsMulOrNull()) { if (!mul->GetLeft()->IsDiv() || !mul->GetRight()->IsConstant()) { return false; } div = mul->GetLeft()->AsDiv(); const_divisor = Int64FromConstant(mul->GetRight()->AsConstant()); - } else if (HAdd* add = instruction->GetRight()->AsAdd()) { - HShl* shl = add->GetRight()->AsShl(); + } else if (HAdd* add = instruction->GetRight()->AsAddOrNull()) { + HShl* shl = add->GetRight()->AsShlOrNull(); if (!is_needed_shl(shl)) { return false; } @@ -1070,8 +1070,8 @@ class BCEVisitor final : public HGraphVisitor { return false; } const_divisor = (1LL << n) + 1; - } else if (HSub* sub = instruction->GetRight()->AsSub()) { - HShl* shl = sub->GetLeft()->AsShl(); + } else if (HSub* sub = instruction->GetRight()->AsSubOrNull()) { + HShl* shl = sub->GetLeft()->AsShlOrNull(); if (!is_needed_shl(shl)) { return false; } @@ -1378,8 +1378,7 @@ class BCEVisitor final : public HGraphVisitor { HInstruction* array_length, HInstruction* base, int32_t min_c, int32_t max_c) { - HBoundsCheck* bounds_check = - first_index_bounds_check_map_.Get(array_length->GetId())->AsBoundsCheck(); + HBoundsCheck* bounds_check = first_index_bounds_check_map_.Get(array_length->GetId()); // Construct deoptimization on single or double bounds on range [base-min_c,base+max_c], // for example either for a[0]..a[3] just 3 or for a[base-1]..a[base+3] both base-1 // and base+3, since we made the assumption any in between value may occur too. diff --git a/compiler/optimizing/code_generation_data.cc b/compiler/optimizing/code_generation_data.cc new file mode 100644 index 0000000000..7b23d46dc5 --- /dev/null +++ b/compiler/optimizing/code_generation_data.cc @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "class_linker.h" +#include "code_generation_data.h" +#include "code_generator.h" +#include "intern_table.h" +#include "mirror/object-inl.h" +#include "runtime.h" + +namespace art HIDDEN { + +void CodeGenerationData::EmitJitRoots( + /*out*/std::vector<Handle<mirror::Object>>* roots) { + DCHECK(roots->empty()); + roots->reserve(GetNumberOfJitRoots()); + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + size_t index = 0; + for (auto& entry : jit_string_roots_) { + // Update the `roots` with the string, and replace the address temporarily + // stored to the index in the table. + uint64_t address = entry.second; + roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address)); + DCHECK(roots->back() != nullptr); + DCHECK(roots->back()->IsString()); + entry.second = index; + // Ensure the string is strongly interned. This is a requirement on how the JIT + // handles strings. b/32995596 + class_linker->GetInternTable()->InternStrong(roots->back()->AsString()); + ++index; + } + for (auto& entry : jit_class_roots_) { + // Update the `roots` with the class, and replace the address temporarily + // stored to the index in the table. + uint64_t address = entry.second; + roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address)); + DCHECK(roots->back() != nullptr); + DCHECK(roots->back()->IsClass()); + entry.second = index; + ++index; + } +} + +} // namespace art diff --git a/compiler/optimizing/code_generation_data.h b/compiler/optimizing/code_generation_data.h new file mode 100644 index 0000000000..e78ba8f574 --- /dev/null +++ b/compiler/optimizing/code_generation_data.h @@ -0,0 +1,123 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATION_DATA_H_ +#define ART_COMPILER_OPTIMIZING_CODE_GENERATION_DATA_H_ + +#include <memory> + +#include "arch/instruction_set.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" +#include "code_generator.h" +#include "dex/string_reference.h" +#include "dex/type_reference.h" +#include "handle.h" +#include "mirror/class.h" +#include "mirror/object.h" +#include "mirror/string.h" +#include "stack_map_stream.h" + +namespace art HIDDEN { + +class CodeGenerationData : public DeletableArenaObject<kArenaAllocCodeGenerator> { + public: + static std::unique_ptr<CodeGenerationData> Create(ArenaStack* arena_stack, + InstructionSet instruction_set) { + ScopedArenaAllocator allocator(arena_stack); + void* memory = allocator.Alloc<CodeGenerationData>(kArenaAllocCodeGenerator); + return std::unique_ptr<CodeGenerationData>( + ::new (memory) CodeGenerationData(std::move(allocator), instruction_set)); + } + + ScopedArenaAllocator* GetScopedAllocator() { + return &allocator_; + } + + void AddSlowPath(SlowPathCode* slow_path) { + slow_paths_.emplace_back(std::unique_ptr<SlowPathCode>(slow_path)); + } + + ArrayRef<const std::unique_ptr<SlowPathCode>> GetSlowPaths() const { + return ArrayRef<const std::unique_ptr<SlowPathCode>>(slow_paths_); + } + + StackMapStream* GetStackMapStream() { return &stack_map_stream_; } + + void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string) { + jit_string_roots_.Overwrite(string_reference, + reinterpret_cast64<uint64_t>(string.GetReference())); + } + + uint64_t GetJitStringRootIndex(StringReference string_reference) const { + return jit_string_roots_.Get(string_reference); + } + + size_t GetNumberOfJitStringRoots() const { + return jit_string_roots_.size(); + } + + void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass) { + jit_class_roots_.Overwrite(type_reference, reinterpret_cast64<uint64_t>(klass.GetReference())); + } + + uint64_t GetJitClassRootIndex(TypeReference type_reference) const { + return jit_class_roots_.Get(type_reference); + } + + size_t GetNumberOfJitClassRoots() const { + return jit_class_roots_.size(); + } + + size_t GetNumberOfJitRoots() const { + return GetNumberOfJitStringRoots() + GetNumberOfJitClassRoots(); + } + + void EmitJitRoots(/*out*/std::vector<Handle<mirror::Object>>* roots) + REQUIRES_SHARED(Locks::mutator_lock_); + + private: + CodeGenerationData(ScopedArenaAllocator&& allocator, InstructionSet instruction_set) + : allocator_(std::move(allocator)), + stack_map_stream_(&allocator_, instruction_set), + slow_paths_(allocator_.Adapter(kArenaAllocCodeGenerator)), + jit_string_roots_(StringReferenceValueComparator(), + allocator_.Adapter(kArenaAllocCodeGenerator)), + jit_class_roots_(TypeReferenceValueComparator(), + allocator_.Adapter(kArenaAllocCodeGenerator)) { + slow_paths_.reserve(kDefaultSlowPathsCapacity); + } + + static constexpr size_t kDefaultSlowPathsCapacity = 8; + + ScopedArenaAllocator allocator_; + StackMapStream stack_map_stream_; + ScopedArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_; + + // Maps a StringReference (dex_file, string_index) to the index in the literal table. + // Entries are initially added with a pointer in the handle zone, and `EmitJitRoots` + // will compute all the indices. + ScopedArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_; + + // Maps a ClassReference (dex_file, type_index) to the index in the literal table. + // Entries are initially added with a pointer in the handle zone, and `EmitJitRoots` + // will compute all the indices. + ScopedArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_; +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATION_DATA_H_ diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index c9f42b52f5..b0e07e32ea 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -44,6 +44,7 @@ #include "base/leb128.h" #include "class_linker.h" #include "class_root-inl.h" +#include "code_generation_data.h" #include "dex/bytecode_utils.h" #include "dex/code_item_accessors-inl.h" #include "graph_visualizer.h" @@ -141,120 +142,20 @@ static bool CheckTypeConsistency(HInstruction* instruction) { return true; } -class CodeGenerator::CodeGenerationData : public DeletableArenaObject<kArenaAllocCodeGenerator> { - public: - static std::unique_ptr<CodeGenerationData> Create(ArenaStack* arena_stack, - InstructionSet instruction_set) { - ScopedArenaAllocator allocator(arena_stack); - void* memory = allocator.Alloc<CodeGenerationData>(kArenaAllocCodeGenerator); - return std::unique_ptr<CodeGenerationData>( - ::new (memory) CodeGenerationData(std::move(allocator), instruction_set)); - } - - ScopedArenaAllocator* GetScopedAllocator() { - return &allocator_; - } - - void AddSlowPath(SlowPathCode* slow_path) { - slow_paths_.emplace_back(std::unique_ptr<SlowPathCode>(slow_path)); - } - - ArrayRef<const std::unique_ptr<SlowPathCode>> GetSlowPaths() const { - return ArrayRef<const std::unique_ptr<SlowPathCode>>(slow_paths_); - } - - StackMapStream* GetStackMapStream() { return &stack_map_stream_; } - - void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string) { - jit_string_roots_.Overwrite(string_reference, - reinterpret_cast64<uint64_t>(string.GetReference())); - } - - uint64_t GetJitStringRootIndex(StringReference string_reference) const { - return jit_string_roots_.Get(string_reference); - } - - size_t GetNumberOfJitStringRoots() const { - return jit_string_roots_.size(); - } - - void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass) { - jit_class_roots_.Overwrite(type_reference, reinterpret_cast64<uint64_t>(klass.GetReference())); - } - - uint64_t GetJitClassRootIndex(TypeReference type_reference) const { - return jit_class_roots_.Get(type_reference); - } - - size_t GetNumberOfJitClassRoots() const { - return jit_class_roots_.size(); - } - - size_t GetNumberOfJitRoots() const { - return GetNumberOfJitStringRoots() + GetNumberOfJitClassRoots(); - } +bool CodeGenerator::EmitReadBarrier() const { + return GetCompilerOptions().EmitReadBarrier(); +} - void EmitJitRoots(/*out*/std::vector<Handle<mirror::Object>>* roots) - REQUIRES_SHARED(Locks::mutator_lock_); +bool CodeGenerator::EmitBakerReadBarrier() const { + return kUseBakerReadBarrier && GetCompilerOptions().EmitReadBarrier(); +} - private: - CodeGenerationData(ScopedArenaAllocator&& allocator, InstructionSet instruction_set) - : allocator_(std::move(allocator)), - stack_map_stream_(&allocator_, instruction_set), - slow_paths_(allocator_.Adapter(kArenaAllocCodeGenerator)), - jit_string_roots_(StringReferenceValueComparator(), - allocator_.Adapter(kArenaAllocCodeGenerator)), - jit_class_roots_(TypeReferenceValueComparator(), - allocator_.Adapter(kArenaAllocCodeGenerator)) { - slow_paths_.reserve(kDefaultSlowPathsCapacity); - } - - static constexpr size_t kDefaultSlowPathsCapacity = 8; - - ScopedArenaAllocator allocator_; - StackMapStream stack_map_stream_; - ScopedArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_; - - // Maps a StringReference (dex_file, string_index) to the index in the literal table. - // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots` - // will compute all the indices. - ScopedArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_; - - // Maps a ClassReference (dex_file, type_index) to the index in the literal table. - // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots` - // will compute all the indices. - ScopedArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_; -}; +bool CodeGenerator::EmitNonBakerReadBarrier() const { + return !kUseBakerReadBarrier && GetCompilerOptions().EmitReadBarrier(); +} -void CodeGenerator::CodeGenerationData::EmitJitRoots( - /*out*/std::vector<Handle<mirror::Object>>* roots) { - DCHECK(roots->empty()); - roots->reserve(GetNumberOfJitRoots()); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - size_t index = 0; - for (auto& entry : jit_string_roots_) { - // Update the `roots` with the string, and replace the address temporarily - // stored to the index in the table. - uint64_t address = entry.second; - roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address)); - DCHECK(roots->back() != nullptr); - DCHECK(roots->back()->IsString()); - entry.second = index; - // Ensure the string is strongly interned. This is a requirement on how the JIT - // handles strings. b/32995596 - class_linker->GetInternTable()->InternStrong(roots->back()->AsString()); - ++index; - } - for (auto& entry : jit_class_roots_) { - // Update the `roots` with the class, and replace the address temporarily - // stored to the index in the table. - uint64_t address = entry.second; - roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address)); - DCHECK(roots->back() != nullptr); - DCHECK(roots->back()->IsClass()); - entry.second = index; - ++index; - } +ReadBarrierOption CodeGenerator::GetCompilerReadBarrierOption() const { + return EmitReadBarrier() ? kWithReadBarrier : kWithoutReadBarrier; } ScopedArenaAllocator* CodeGenerator::GetScopedAllocator() { @@ -288,8 +189,8 @@ uint64_t CodeGenerator::GetJitClassRootIndex(TypeReference type_reference) { return code_generation_data_->GetJitClassRootIndex(type_reference); } -void CodeGenerator::EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED, - const uint8_t* roots_data ATTRIBUTE_UNUSED) { +void CodeGenerator::EmitJitRootPatches([[maybe_unused]] uint8_t* code, + [[maybe_unused]] const uint8_t* roots_data) { DCHECK(code_generation_data_ != nullptr); DCHECK_EQ(code_generation_data_->GetNumberOfJitStringRoots(), 0u); DCHECK_EQ(code_generation_data_->GetNumberOfJitClassRoots(), 0u); @@ -378,7 +279,7 @@ void CodeGenerator::InitializeCodeGenerationData() { code_generation_data_ = CodeGenerationData::Create(graph_->GetArenaStack(), GetInstructionSet()); } -void CodeGenerator::Compile(CodeAllocator* allocator) { +void CodeGenerator::Compile() { InitializeCodeGenerationData(); // The register allocator already called `InitializeCodeGeneration`, @@ -394,7 +295,8 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { fpu_spill_mask_, GetGraph()->GetNumberOfVRegs(), GetGraph()->IsCompilingBaseline(), - GetGraph()->IsDebuggable()); + GetGraph()->IsDebuggable(), + GetGraph()->HasShouldDeoptimizeFlag()); size_t frame_start = GetAssembler()->CodeSize(); GenerateFrameEntry(); @@ -443,32 +345,28 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { } // Finalize instructions in assember; - Finalize(allocator); + Finalize(); GetStackMapStream()->EndMethod(GetAssembler()->CodeSize()); } -void CodeGenerator::Finalize(CodeAllocator* allocator) { - size_t code_size = GetAssembler()->CodeSize(); - uint8_t* buffer = allocator->Allocate(code_size); - - MemoryRegion code(buffer, code_size); - GetAssembler()->FinalizeInstructions(code); +void CodeGenerator::Finalize() { + GetAssembler()->FinalizeCode(); } void CodeGenerator::EmitLinkerPatches( - ArenaVector<linker::LinkerPatch>* linker_patches ATTRIBUTE_UNUSED) { + [[maybe_unused]] ArenaVector<linker::LinkerPatch>* linker_patches) { // No linker patches by default. } -bool CodeGenerator::NeedsThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED) const { +bool CodeGenerator::NeedsThunkCode([[maybe_unused]] const linker::LinkerPatch& patch) const { // Code generators that create patches requiring thunk compilation should override this function. return false; } -void CodeGenerator::EmitThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED, - /*out*/ ArenaVector<uint8_t>* code ATTRIBUTE_UNUSED, - /*out*/ std::string* debug_name ATTRIBUTE_UNUSED) { +void CodeGenerator::EmitThunkCode([[maybe_unused]] const linker::LinkerPatch& patch, + [[maybe_unused]] /*out*/ ArenaVector<uint8_t>* code, + [[maybe_unused]] /*out*/ std::string* debug_name) { // Code generators that create patches requiring thunk compilation should override this function. LOG(FATAL) << "Unexpected call to EmitThunkCode()."; } @@ -730,7 +628,7 @@ void CodeGenerator::CreateUnresolvedFieldLocationSummary( } // Note that pSetXXStatic/pGetXXStatic always takes/returns an int or int64 - // regardless of the the type. Because of that we forced to special case + // regardless of the type. Because of that we forced to special case // the access to floating point values. if (is_get) { if (DataType::IsFloatingPointType(field_type)) { @@ -745,8 +643,8 @@ void CodeGenerator::CreateUnresolvedFieldLocationSummary( locations->SetOut(calling_convention.GetReturnLocation(field_type)); } } else { - size_t set_index = is_instance ? 1 : 0; - if (DataType::IsFloatingPointType(field_type)) { + size_t set_index = is_instance ? 1 : 0; + if (DataType::IsFloatingPointType(field_type)) { // The set value comes from a float location while the calling convention // expects it in a regular register location. Allocate a temp for it and // make the transfer at codegen. @@ -1028,6 +926,12 @@ std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph, new (allocator) arm64::CodeGeneratorARM64(graph, compiler_options, stats)); } #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 + case InstructionSet::kRiscv64: { + return std::unique_ptr<CodeGenerator>( + new (allocator) riscv64::CodeGeneratorRISCV64(graph, compiler_options, stats)); + } +#endif #ifdef ART_ENABLE_CODEGEN_x86 case InstructionSet::kX86: { return std::unique_ptr<CodeGenerator>( @@ -1736,10 +1640,8 @@ void CodeGenerator::ValidateInvokeRuntime(QuickEntrypointEnum entrypoint, // When (non-Baker) read barriers are enabled, some instructions // use a slow path to emit a read barrier, which does not trigger // GC. - (gUseReadBarrier && - !kUseBakerReadBarrier && + (EmitNonBakerReadBarrier() && (instruction->IsInstanceFieldGet() || - instruction->IsPredicatedInstanceFieldGet() || instruction->IsStaticFieldGet() || instruction->IsArrayGet() || instruction->IsLoadClass() || @@ -1776,11 +1678,11 @@ void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* in // PC-related information. DCHECK(kUseBakerReadBarrier); DCHECK(instruction->IsInstanceFieldGet() || - instruction->IsPredicatedInstanceFieldGet() || instruction->IsStaticFieldGet() || instruction->IsArrayGet() || instruction->IsArraySet() || instruction->IsLoadClass() || + instruction->IsLoadMethodType() || instruction->IsLoadString() || instruction->IsInstanceOf() || instruction->IsCheckCast() || @@ -1831,26 +1733,28 @@ void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* } } -void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { +LocationSummary* CodeGenerator::CreateSystemArrayCopyLocationSummary( + HInvoke* invoke, int32_t length_threshold, size_t num_temps) { // Check to see if we have known failures that will cause us to have to bail out // to the runtime, and just generate the runtime call directly. - HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); - HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull(); + HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull(); // The positions must be non-negative. if ((src_pos != nullptr && src_pos->GetValue() < 0) || (dest_pos != nullptr && dest_pos->GetValue() < 0)) { // We will have to fail anyways. - return; + return nullptr; } - // The length must be >= 0. - HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + // The length must be >= 0. If a positive `length_threshold` is provided, lengths + // greater or equal to the threshold are also handled by the normal implementation. + HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull(); if (length != nullptr) { int32_t len = length->GetValue(); - if (len < 0) { + if (len < 0 || (length_threshold > 0 && len >= length_threshold)) { // Just call as normal. - return; + return nullptr; } } @@ -1859,13 +1763,13 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { if (optimizations.GetDestinationIsSource()) { if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) { // We only support backward copying if source and destination are the same. - return; + return nullptr; } } if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) { // We currently don't intrinsify primitive copying. - return; + return nullptr; } ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); @@ -1879,9 +1783,10 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3))); locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4))); - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); + if (num_temps != 0u) { + locations->AddRegisterTemps(num_temps); + } + return locations; } void CodeGenerator::EmitJitRoots(uint8_t* code, diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 9872efaa4a..de6fc85da4 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -59,9 +59,6 @@ static int32_t constexpr kPrimIntMax = 0x7fffffff; // Maximum value for a primitive long. static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); -static const ReadBarrierOption gCompilerReadBarrierOption = - gUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier; - constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); constexpr size_t status_byte_offset = mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); @@ -73,6 +70,7 @@ constexpr uint32_t shifted_initialized_value = enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); class Assembler; +class CodeGenerationData; class CodeGenerator; class CompilerOptions; class StackMapStream; @@ -82,18 +80,6 @@ namespace linker { class LinkerPatch; } // namespace linker -class CodeAllocator { - public: - CodeAllocator() {} - virtual ~CodeAllocator() {} - - virtual uint8_t* Allocate(size_t size) = 0; - virtual ArrayRef<const uint8_t> GetMemory() const = 0; - - private: - DISALLOW_COPY_AND_ASSIGN(CodeAllocator); -}; - class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> { public: explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) { @@ -200,7 +186,7 @@ class FieldAccessCallingConvention { class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { public: // Compiles the graph to executable instructions. - void Compile(CodeAllocator* allocator); + void Compile(); static std::unique_ptr<CodeGenerator> Create(HGraph* graph, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); @@ -221,7 +207,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { } virtual void Initialize() = 0; - virtual void Finalize(CodeAllocator* allocator); + virtual void Finalize(); virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches); virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const; virtual void EmitThunkCode(const linker::LinkerPatch& patch, @@ -278,20 +264,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; } - static uint32_t ComputeRegisterMask(const int* registers, size_t length) { - uint32_t mask = 0; - for (size_t i = 0, e = length; i < e; ++i) { - mask |= (1 << registers[i]); - } - return mask; - } - virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; virtual InstructionSet GetInstructionSet() const = 0; - const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } - // Saves the register in the stack. Returns the size taken on stack. virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; // Restores the register from the stack. Returns the size taken on stack. @@ -398,6 +374,12 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // TODO: Replace with a catch-entering instruction that records the environment. void RecordCatchBlockInfo(); + const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } + bool EmitReadBarrier() const; + bool EmitBakerReadBarrier() const; + bool EmitNonBakerReadBarrier() const; + ReadBarrierOption GetCompilerReadBarrierOption() const; + // Get the ScopedArenaAllocator used for codegen memory allocation. ScopedArenaAllocator* GetScopedAllocator(); @@ -469,7 +451,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { Location to2, DataType::Type type2); - static bool InstanceOfNeedsReadBarrier(HInstanceOf* instance_of) { + bool InstanceOfNeedsReadBarrier(HInstanceOf* instance_of) { // Used only for kExactCheck, kAbstractClassCheck, kClassHierarchyCheck and kArrayObjectCheck. DCHECK(instance_of->GetTypeCheckKind() == TypeCheckKind::kExactCheck || instance_of->GetTypeCheckKind() == TypeCheckKind::kAbstractClassCheck || @@ -479,14 +461,14 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // If the target class is in the boot image, it's non-moveable and it doesn't matter // if we compare it with a from-space or to-space reference, the result is the same. // It's OK to traverse a class hierarchy jumping between from-space and to-space. - return gUseReadBarrier && !instance_of->GetTargetClass()->IsInBootImage(); + return EmitReadBarrier() && !instance_of->GetTargetClass()->IsInBootImage(); } - static ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) { + ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) { return InstanceOfNeedsReadBarrier(instance_of) ? kWithReadBarrier : kWithoutReadBarrier; } - static bool IsTypeCheckSlowPathFatal(HCheckCast* check_cast) { + bool IsTypeCheckSlowPathFatal(HCheckCast* check_cast) { switch (check_cast->GetTypeCheckKind()) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -494,7 +476,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { case TypeCheckKind::kArrayObjectCheck: case TypeCheckKind::kInterfaceCheck: { bool needs_read_barrier = - gUseReadBarrier && !check_cast->GetTargetClass()->IsInBootImage(); + EmitReadBarrier() && !check_cast->GetTargetClass()->IsInBootImage(); // We do not emit read barriers for HCheckCast, so we can get false negatives // and the slow path shall re-check and simply return if the cast is actually OK. return !needs_read_barrier; @@ -509,7 +491,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { UNREACHABLE(); } - static LocationSummary::CallKind GetCheckCastCallKind(HCheckCast* check_cast) { + LocationSummary::CallKind GetCheckCastCallKind(HCheckCast* check_cast) { return (IsTypeCheckSlowPathFatal(check_cast) && !check_cast->CanThrowIntoCatchBlock()) ? LocationSummary::kNoCall // In fact, call on a fatal (non-returning) slow path. : LocationSummary::kCallOnSlowPath; @@ -613,7 +595,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { if (kIsDebugBuild) { uint32_t shorty_len; const char* shorty = GetCriticalNativeShorty(invoke, &shorty_len); - DCHECK_EQ(GetCriticalNativeDirectCallFrameSize(shorty, shorty_len), out_frame_size); + CHECK_EQ(GetCriticalNativeDirectCallFrameSize(shorty, shorty_len), out_frame_size); } if (out_frame_size != 0u) { FinishCriticalNativeFrameSetup(out_frame_size, ¶llel_move); @@ -667,7 +649,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { static uint32_t GetBootImageOffset(ClassRoot class_root); static uint32_t GetBootImageOffsetOfIntrinsicDeclaringClass(HInvoke* invoke); - static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke); + static LocationSummary* CreateSystemArrayCopyLocationSummary( + HInvoke* invoke, int32_t length_threshold = -1, size_t num_temps = 3); void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; } @@ -687,7 +670,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { virtual HLoadClass::LoadKind GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) = 0; - static LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) { + LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) { switch (load->GetLoadKind()) { case HLoadString::LoadKind::kBssEntry: DCHECK(load->NeedsEnvironment()); @@ -697,7 +680,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { return LocationSummary::kCallOnMainOnly; case HLoadString::LoadKind::kJitTableAddress: DCHECK(!load->NeedsEnvironment()); - return gUseReadBarrier + return EmitReadBarrier() ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; @@ -731,6 +714,11 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array); static ScaleFactor ScaleFactorForType(DataType::Type type); + ArrayRef<const uint8_t> GetCode() const { + return ArrayRef<const uint8_t>(GetAssembler().CodeBufferBaseAddress(), + GetAssembler().CodeSize()); + } + protected: // Patch info used for recording locations of required linker patches and their targets, // i.e. target method, string, type or code identified by their dex file and index, @@ -761,6 +749,15 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { virtual HGraphVisitor* GetLocationBuilder() = 0; virtual HGraphVisitor* GetInstructionVisitor() = 0; + template <typename RegType> + static uint32_t ComputeRegisterMask(const RegType* registers, size_t length) { + uint32_t mask = 0; + for (size_t i = 0, e = length; i < e; ++i) { + mask |= (1 << registers[i]); + } + return mask; + } + // Returns the location of the first spilled entry for floating point registers, // relative to the stack pointer. uint32_t GetFpuSpillStart() const { @@ -814,6 +811,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { StackMapStream* GetStackMapStream(); + CodeGenerationData* GetCodeGenerationData() { + return code_generation_data_.get(); + } + void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string); uint64_t GetJitStringRootIndex(StringReference string_reference); void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass); @@ -848,8 +849,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { DisassemblyInformation* disasm_info_; private: - class CodeGenerationData; - void InitializeCodeGenerationData(); size_t GetStackOffsetOfSavedRegister(size_t index); void GenerateSlowPaths(); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 41db9a2542..9027976165 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -35,6 +35,7 @@ #include "interpreter/mterp/nterp.h" #include "intrinsics.h" #include "intrinsics_arm64.h" +#include "intrinsics_list.h" #include "intrinsics_utils.h" #include "linker/linker_patch.h" #include "lock_word.h" @@ -44,7 +45,9 @@ #include "offsets.h" #include "optimizing/common_arm64.h" #include "optimizing/nodes.h" +#include "profiling_info_builder.h" #include "thread.h" +#include "trace.h" #include "utils/arm64/assembler_arm64.h" #include "utils/assembler.h" #include "utils/stack_checks.h" @@ -88,6 +91,9 @@ using helpers::VIXLRegCodeFromART; using helpers::WRegisterFrom; using helpers::XRegisterFrom; +// TODO(mythria): Expand SystemRegister in vixl to include this value. +uint16_t SYS_CNTVCT_EL0 = SystemRegisterEncoder<1, 3, 14, 0, 2>::value; + // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump // table version generates 7 instructions and num_entries literals. Compare/jump sequence will // generates less code/data with a small num_entries. @@ -582,7 +588,6 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { obj_(obj), offset_(offset), index_(index) { - DCHECK(gUseReadBarrier); // If `obj` is equal to `out` or `ref`, it means the initial object // has been overwritten by (or after) the heap object reference load // to be instrumented, e.g.: @@ -597,13 +602,13 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { } void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(codegen->EmitReadBarrier()); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); LocationSummary* locations = instruction_->GetLocations(); DataType::Type type = DataType::Type::kReference; DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); DCHECK(instruction_->IsInstanceFieldGet() || - instruction_->IsPredicatedInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || instruction_->IsInstanceOf() || @@ -680,9 +685,9 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); DCHECK(instruction_->GetLocations()->Intrinsified()); HInvoke* invoke = instruction_->AsInvoke(); - DCHECK(IsUnsafeGetObject(invoke) || + DCHECK(IsUnsafeGetReference(invoke) || IsVarHandleGet(invoke) || - IsUnsafeCASObject(invoke) || + IsUnsafeCASReference(invoke) || IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic(); DCHECK_EQ(offset_, 0u); DCHECK(index_.IsRegister()); @@ -761,10 +766,10 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { public: ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) : SlowPathCodeARM64(instruction), out_(out), root_(root) { - DCHECK(gUseReadBarrier); } void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(codegen->EmitReadBarrier()); LocationSummary* locations = instruction_->GetLocations(); DataType::Type type = DataType::Type::kReference; DCHECK(locations->CanCall()); @@ -842,12 +847,20 @@ class MethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 { class CompileOptimizedSlowPathARM64 : public SlowPathCodeARM64 { public: - CompileOptimizedSlowPathARM64() : SlowPathCodeARM64(/* instruction= */ nullptr) {} + explicit CompileOptimizedSlowPathARM64(Register profiling_info) + : SlowPathCodeARM64(/* instruction= */ nullptr), + profiling_info_(profiling_info) {} void EmitNativeCode(CodeGenerator* codegen) override { uint32_t entrypoint_offset = GetThreadOffset<kArm64PointerSize>(kQuickCompileOptimized).Int32Value(); __ Bind(GetEntryLabel()); + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler()); + Register counter = temps.AcquireW(); + __ Mov(counter, ProfilingInfo::GetOptimizeThreshold()); + __ Strh(counter, + MemOperand(profiling_info_, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); __ Ldr(lr, MemOperand(tr, entrypoint_offset)); // Note: we don't record the call here (and therefore don't generate a stack // map), as the entrypoint should never be suspended. @@ -860,6 +873,10 @@ class CompileOptimizedSlowPathARM64 : public SlowPathCodeARM64 { } private: + // The register where the profiling info is stored when entering the slow + // path. + Register profiling_info_; + DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathARM64); }; @@ -936,6 +953,7 @@ Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const } namespace detail { + // Mark which intrinsics we don't have handcrafted code for. template <Intrinsics T> struct IsUnimplemented { @@ -950,15 +968,13 @@ struct IsUnimplemented { UNIMPLEMENTED_INTRINSIC_LIST_ARM64(TRUE_OVERRIDE) #undef TRUE_OVERRIDE -#include "intrinsics_list.h" static constexpr bool kIsIntrinsicUnimplemented[] = { - false, // kNone + false, // kNone #define IS_UNIMPLEMENTED(Intrinsic, ...) \ - IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, - INTRINSICS_LIST(IS_UNIMPLEMENTED) + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + ART_INTRINSICS_LIST(IS_UNIMPLEMENTED) #undef IS_UNIMPLEMENTED }; -#undef INTRINSICS_LIST } // namespace detail @@ -995,14 +1011,7 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - uint32_literals_(std::less<uint32_t>(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - uint64_literals_(std::less<uint64_t>(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - jit_string_patches_(StringReferenceValueComparator(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - jit_class_patches_(TypeReferenceValueComparator(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_patches_(&assembler_, graph->GetAllocator()), jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { // Save the link register (containing the return address) to mimic Quick. @@ -1036,7 +1045,7 @@ void CodeGeneratorARM64::EmitJumpTables() { } } -void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { +void CodeGeneratorARM64::Finalize() { EmitJumpTables(); // Emit JIT baker read barrier slow paths. @@ -1051,11 +1060,11 @@ void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { // Ensure we emit the literal pool. __ FinalizeCode(); - CodeGenerator::Finalize(allocator); + CodeGenerator::Finalize(); // Verify Baker read barrier linker patches. if (kIsDebugBuild) { - ArrayRef<const uint8_t> code = allocator->GetMemory(); + ArrayRef<const uint8_t> code(GetCode()); for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { DCHECK(info.label.IsBound()); uint32_t literal_offset = info.label.GetLocation(); @@ -1192,8 +1201,9 @@ void LocationsBuilderARM64::VisitMethodExitHook(HMethodExitHook* method_hook) { void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* instruction) { MacroAssembler* masm = GetVIXLAssembler(); UseScratchRegisterScope temps(masm); - Register temp = temps.AcquireX(); - Register value = temps.AcquireW(); + Register addr = temps.AcquireX(); + Register index = temps.AcquireX(); + Register value = index.W(); SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARM64(instruction); @@ -1213,9 +1223,44 @@ void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* in MemberOffset offset = instruction->IsMethodExitHook() ? instrumentation::Instrumentation::HaveMethodExitListenersOffset() : instrumentation::Instrumentation::HaveMethodEntryListenersOffset(); - __ Mov(temp, address + offset.Int32Value()); - __ Ldrb(value, MemOperand(temp, 0)); - __ Cbnz(value, slow_path->GetEntryLabel()); + __ Mov(addr, address + offset.Int32Value()); + __ Ldrb(value, MemOperand(addr, 0)); + __ Cmp(value, Operand(instrumentation::Instrumentation::kFastTraceListeners)); + // Check if there are any method entry / exit listeners. If no, continue. + __ B(lt, slow_path->GetExitLabel()); + // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners. + // If yes, just take the slow path. + __ B(gt, slow_path->GetEntryLabel()); + + // Check if there is place in the buffer to store a new entry, if no, take slow path. + uint32_t trace_buffer_index_offset = + Thread::TraceBufferIndexOffset<kArm64PointerSize>().Int32Value(); + __ Ldr(index, MemOperand(tr, trace_buffer_index_offset)); + __ Subs(index, index, kNumEntriesForWallClock); + __ B(lt, slow_path->GetEntryLabel()); + + // Update the index in the `Thread`. + __ Str(index, MemOperand(tr, trace_buffer_index_offset)); + // Calculate the entry address in the buffer. + // addr = base_addr + sizeof(void*) * index; + __ Ldr(addr, MemOperand(tr, Thread::TraceBufferPtrOffset<kArm64PointerSize>().SizeValue())); + __ ComputeAddress(addr, MemOperand(addr, index, LSL, TIMES_8)); + + Register tmp = index; + // Record method pointer and trace action. + __ Ldr(tmp, MemOperand(sp, 0)); + // Use last two bits to encode trace method action. For MethodEntry it is 0 + // so no need to set the bits since they are 0 already. + if (instruction->IsMethodExitHook()) { + DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4)); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1); + __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit))); + } + __ Str(tmp, MemOperand(addr, kMethodOffsetInBytes)); + // Record the timestamp. + __ Mrs(tmp, (SystemRegister)SYS_CNTVCT_EL0); + __ Str(tmp, MemOperand(addr, kTimestampOffsetInBytes)); __ Bind(slow_path->GetExitLabel()); } @@ -1254,21 +1299,21 @@ void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) { } if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { - SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARM64(); - AddSlowPath(slow_path); ProfilingInfo* info = GetGraph()->GetProfilingInfo(); DCHECK(info != nullptr); DCHECK(!HasEmptyFrame()); uint64_t address = reinterpret_cast64<uint64_t>(info); vixl::aarch64::Label done; UseScratchRegisterScope temps(masm); - Register temp = temps.AcquireX(); Register counter = temps.AcquireW(); - __ Ldr(temp, DeduplicateUint64Literal(address)); - __ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); + SlowPathCodeARM64* slow_path = + new (GetScopedAllocator()) CompileOptimizedSlowPathARM64(/* profiling_info= */ lr); + AddSlowPath(slow_path); + __ Ldr(lr, jit_patches_.DeduplicateUint64Literal(address)); + __ Ldrh(counter, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); __ Cbz(counter, slow_path->GetEntryLabel()); __ Add(counter, counter, -1); - __ Strh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); + __ Strh(counter, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); __ Bind(slow_path->GetExitLabel()); } } @@ -1532,15 +1577,15 @@ size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_ return kArm64WordSize; } -size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, - uint32_t reg_id ATTRIBUTE_UNUSED) { +size_t CodeGeneratorARM64::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index, + [[maybe_unused]] uint32_t reg_id) { LOG(FATAL) << "FP registers shouldn't be saved/restored individually, " << "use SaveRestoreLiveRegistersHelper"; UNREACHABLE(); } -size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, - uint32_t reg_id ATTRIBUTE_UNUSED) { +size_t CodeGeneratorARM64::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index, + [[maybe_unused]] uint32_t reg_id) { LOG(FATAL) << "FP registers shouldn't be saved/restored individually, " << "use SaveRestoreLiveRegistersHelper"; UNREACHABLE(); @@ -2136,14 +2181,10 @@ void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) { void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { - DCHECK(instruction->IsInstanceFieldGet() || - instruction->IsStaticFieldGet() || - instruction->IsPredicatedInstanceFieldGet()); - - bool is_predicated = instruction->IsPredicatedInstanceFieldGet(); + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); bool object_field_get_with_read_barrier = - gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); + (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_field_get_with_read_barrier @@ -2160,37 +2201,24 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction, } } // Input for object receiver. - locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister()); + locations->SetInAt(0, Location::RequiresRegister()); if (DataType::IsFloatingPointType(instruction->GetType())) { - if (is_predicated) { - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); - } else { - locations->SetOut(Location::RequiresFpuRegister()); - } + locations->SetOut(Location::RequiresFpuRegister()); } else { - if (is_predicated) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - } else { - // The output overlaps for an object field get when read barriers - // are enabled: we do not want the load to overwrite the object's - // location, as we need it to emit the read barrier. - locations->SetOut(Location::RequiresRegister(), - object_field_get_with_read_barrier ? Location::kOutputOverlap - : Location::kNoOutputOverlap); - } + // The output overlaps for an object field get when read barriers + // are enabled: we do not want the load to overwrite the object's + // location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { - DCHECK(instruction->IsInstanceFieldGet() || - instruction->IsStaticFieldGet() || - instruction->IsPredicatedInstanceFieldGet()); - bool is_predicated = instruction->IsPredicatedInstanceFieldGet(); + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = instruction->GetLocations(); - uint32_t receiver_input = is_predicated ? 1 : 0; + uint32_t receiver_input = 0; Location base_loc = locations->InAt(receiver_input); Location out = locations->Out(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); @@ -2199,8 +2227,7 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, MemOperand field = HeapOperand(InputRegisterAt(instruction, receiver_input), field_info.GetFieldOffset()); - if (gUseReadBarrier && kUseBakerReadBarrier && - load_type == DataType::Type::kReference) { + if (load_type == DataType::Type::kReference && codegen_->EmitBakerReadBarrier()) { // Object FieldGet with Baker's read barrier case. // /* HeapReference<Object> */ out = *(base + offset) Register base = RegisterFrom(base_loc, DataType::Type::kReference); @@ -2261,20 +2288,12 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, bool value_can_be_null, WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); - bool is_predicated = - instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet(); Register obj = InputRegisterAt(instruction, 0); CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1); CPURegister source = value; Offset offset = field_info.GetFieldOffset(); DataType::Type field_type = field_info.GetFieldType(); - std::optional<vixl::aarch64::Label> pred_is_null; - if (is_predicated) { - pred_is_null.emplace(); - __ Cbz(obj, &*pred_is_null); - } - { // We use a block to end the scratch scope before the write barrier, thus // freeing the temporary registers so they can be used in `MarkGCCard`. @@ -2306,10 +2325,6 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, Register(value), value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck); } - - if (is_predicated) { - __ Bind(&*pred_is_null); - } } void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { @@ -2647,7 +2662,7 @@ void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { bool object_array_get_with_read_barrier = - gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); + (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_array_get_with_read_barrier @@ -2703,10 +2718,9 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { // does not support the HIntermediateAddress instruction. DCHECK(!((type == DataType::Type::kReference) && instruction->GetArray()->IsIntermediateAddress() && - gUseReadBarrier && - !kUseBakerReadBarrier)); + codegen_->EmitNonBakerReadBarrier())); - if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) { + if (type == DataType::Type::kReference && codegen_->EmitBakerReadBarrier()) { // Object ArrayGet with Baker's read barrier case. // Note that a potential implicit null check is handled in the // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. @@ -3647,7 +3661,7 @@ void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) { } void InstructionCodeGeneratorARM64::VisitDoubleConstant( - HDoubleConstant* constant ATTRIBUTE_UNUSED) { + [[maybe_unused]] HDoubleConstant* constant) { // Will be generated at use site. } @@ -3655,8 +3669,7 @@ void LocationsBuilderARM64::VisitExit(HExit* exit) { exit->SetLocations(nullptr); } -void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { -} +void InstructionCodeGeneratorARM64::VisitExit([[maybe_unused]] HExit* exit) {} void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) { LocationSummary* locations = @@ -3664,7 +3677,7 @@ void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARM64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) { // Will be generated at use site. } @@ -3747,7 +3760,7 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct // The condition instruction has been materialized, compare the output to 0. Location cond_val = instruction->GetLocations()->InAt(condition_input_index); DCHECK(cond_val.IsRegister()); - if (true_target == nullptr) { + if (true_target == nullptr) { __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target); } else { __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target); @@ -3835,6 +3848,35 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) { false_target = nullptr; } + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { + if (GetGraph()->IsCompilingBaseline() && + codegen_->GetCompilerOptions().ProfileBranches() && + !Runtime::Current()->IsAotCompiler()) { + DCHECK(if_instr->InputAt(0)->IsCondition()); + ProfilingInfo* info = GetGraph()->GetProfilingInfo(); + DCHECK(info != nullptr); + BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc()); + // Currently, not all If branches are profiled. + if (cache != nullptr) { + uint64_t address = + reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value(); + static_assert( + BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2, + "Unexpected offsets for BranchCache"); + vixl::aarch64::Label done; + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireX(); + Register counter = temps.AcquireW(); + Register condition = InputRegisterAt(if_instr, 0).X(); + __ Mov(temp, address); + __ Ldrh(counter, MemOperand(temp, condition, LSL, 1)); + __ Add(counter, counter, 1); + __ Tbnz(counter, 16, &done); + __ Strh(counter, MemOperand(temp, condition, LSL, 1)); + __ Bind(&done); + } + } + } GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } @@ -3876,7 +3918,7 @@ static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) { } static inline Condition GetConditionForSelect(HCondition* condition) { - IfCondition cond = condition->AsCondition()->GetCondition(); + IfCondition cond = condition->GetCondition(); return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias()) : ARM64Condition(cond); } @@ -3888,8 +3930,8 @@ void LocationsBuilderARM64::VisitSelect(HSelect* select) { locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - HConstant* cst_true_value = select->GetTrueValue()->AsConstant(); - HConstant* cst_false_value = select->GetFalseValue()->AsConstant(); + HConstant* cst_true_value = select->GetTrueValue()->AsConstantOrNull(); + HConstant* cst_false_value = select->GetFalseValue()->AsConstantOrNull(); bool is_true_value_constant = cst_true_value != nullptr; bool is_false_value_constant = cst_false_value != nullptr; // Ask VIXL whether we should synthesize constants in registers. @@ -3972,23 +4014,10 @@ void CodeGeneratorARM64::GenerateNop() { __ Nop(); } -void LocationsBuilderARM64::VisitPredicatedInstanceFieldGet( - HPredicatedInstanceFieldGet* instruction) { - HandleFieldGet(instruction, instruction->GetFieldInfo()); -} - void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { HandleFieldGet(instruction, instruction->GetFieldInfo()); } -void InstructionCodeGeneratorARM64::VisitPredicatedInstanceFieldGet( - HPredicatedInstanceFieldGet* instruction) { - vixl::aarch64::Label finish; - __ Cbz(InputRegisterAt(instruction, 1), &finish); - HandleFieldGet(instruction, instruction->GetFieldInfo()); - __ Bind(&finish); -} - void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { HandleFieldGet(instruction, instruction->GetFieldInfo()); } @@ -4005,8 +4034,8 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins } // Temp is used for read barrier. -static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { - if (gUseReadBarrier && +static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) { + if (emit_read_barrier && (kUseBakerReadBarrier || type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -4019,11 +4048,11 @@ static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { // Interface case has 3 temps, one for holding the number of interfaces, one for the current // interface pointer, one for loading the current interface. // The other checks have one temp for loading the object's class. -static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { +static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) { if (type_check_kind == TypeCheckKind::kInterfaceCheck) { return 3; } - return 1 + NumberOfInstanceOfTemps(type_check_kind); + return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind); } void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { @@ -4035,7 +4064,7 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: { - bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); + bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction); call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; break; @@ -4066,7 +4095,8 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { // Note that TypeCheckSlowPathARM64 uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); // Add temps if necessary for read barriers. - locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); + locations->AddRegisterTemps( + NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind)); } void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { @@ -4079,7 +4109,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { : InputRegisterAt(instruction, 1); Location out_loc = locations->Out(); Register out = OutputRegister(instruction); - const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); + const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind); DCHECK_LE(num_temps, 1u); Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -4099,7 +4129,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + codegen_->ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, @@ -4117,7 +4147,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kAbstractClassCheck: { ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + codegen_->ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, @@ -4148,7 +4178,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kClassHierarchyCheck: { ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + codegen_->ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, @@ -4180,7 +4210,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: { ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + codegen_->ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, @@ -4297,7 +4327,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); + LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); @@ -4308,8 +4338,7 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { } else { locations->SetInAt(1, Location::RequiresRegister()); } - // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64. - locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); + locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind)); } void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { @@ -4320,7 +4349,7 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) ? Register() : InputRegisterAt(instruction, 1); - const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); + const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind); DCHECK_GE(num_temps, 1u); DCHECK_LE(num_temps, 3u); Location temp_loc = locations->GetTemp(0); @@ -4336,7 +4365,7 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { const uint32_t object_array_data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); + bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction); SlowPathCodeARM64* type_check_slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( instruction, is_type_check_slow_path_fatal); @@ -4478,12 +4507,11 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // /* HeapReference<Class> */ temp = temp->iftable_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - temp_loc, - iftable_offset, - maybe_temp2_loc, - kWithoutReadBarrier); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + iftable_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Iftable is never null. __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset)); // Loop through the iftable and check if any class matches. @@ -4525,7 +4553,7 @@ void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARM64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) { // Will be generated at use site. } @@ -4534,7 +4562,7 @@ void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARM64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) { // Will be generated at use site. } @@ -4566,24 +4594,26 @@ void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) { void CodeGeneratorARM64::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) { DCHECK_EQ(klass.GetCode(), 0u); - // We know the destination of an intrinsic, so no need to record inline - // caches. - if (!instruction->GetLocations()->Intrinsified() && - GetGraph()->IsCompilingBaseline() && - !Runtime::Current()->IsAotCompiler()) { - DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke()); + if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) { ProfilingInfo* info = GetGraph()->GetProfilingInfo(); DCHECK(info != nullptr); - InlineCache* cache = info->GetInlineCache(instruction->GetDexPc()); - uint64_t address = reinterpret_cast64<uint64_t>(cache); - vixl::aarch64::Label done; - __ Mov(x8, address); - __ Ldr(x9, MemOperand(x8, InlineCache::ClassesOffset().Int32Value())); - // Fast path for a monomorphic cache. - __ Cmp(klass, x9); - __ B(eq, &done); - InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc()); - __ Bind(&done); + InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(info, instruction->AsInvoke()); + if (cache != nullptr) { + uint64_t address = reinterpret_cast64<uint64_t>(cache); + vixl::aarch64::Label done; + __ Mov(x8, address); + __ Ldr(w9, MemOperand(x8, InlineCache::ClassesOffset().Int32Value())); + // Fast path for a monomorphic cache. + __ Cmp(klass.W(), w9); + __ B(eq, &done); + InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc()); + __ Bind(&done); + } else { + // This is unexpected, but we don't guarantee stable compilation across + // JIT runs so just warn about it. + ScopedObjectAccess soa(Thread::Current()); + LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod(); + } } } @@ -4709,8 +4739,8 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege } HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch( - const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - ArtMethod* method ATTRIBUTE_UNUSED) { + const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, + [[maybe_unused]] ArtMethod* method) { // On ARM64 we support all dispatch types. return desired_dispatch_info; } @@ -4749,7 +4779,8 @@ void CodeGeneratorARM64::LoadMethod(MethodLoadKind load_kind, Location temp, HIn case MethodLoadKind::kJitDirectAddress: { // Load method address from literal pool. __ Ldr(XRegisterFrom(temp), - DeduplicateUint64Literal(reinterpret_cast<uint64_t>(invoke->GetResolvedMethod()))); + jit_patches_.DeduplicateUint64Literal( + reinterpret_cast<uint64_t>(invoke->GetResolvedMethod()))); break; } case MethodLoadKind::kRuntimeCall: { @@ -4775,14 +4806,12 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall( __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset)); break; } - case MethodLoadKind::kRecursive: { + case MethodLoadKind::kRecursive: callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex()); break; - } - case MethodLoadKind::kRuntimeCall: { + case MethodLoadKind::kRuntimeCall: GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); return; // No code pointer retrieval; the runtime performs the call directly. - } case MethodLoadKind::kBootImageLinkTimePcRelative: DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()); if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) { @@ -4798,10 +4827,9 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall( break; } FALLTHROUGH_INTENDED; - default: { + default: LoadMethod(invoke->GetMethodLoadKind(), temp, invoke); break; - } } auto call_lr = [&]() { @@ -4906,6 +4934,7 @@ void CodeGeneratorARM64::GenerateVirtualCall( } // Instead of simply (possibly) unpoisoning `temp` here, we should // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an // intermediate/temporary reference and because the current // concurrent copying collector keeps the from-space memory // intact/accessible until the end of the marking phase (the @@ -5090,25 +5119,8 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch( return label; } -vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral( - uint64_t address) { - return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address)); -} - -vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral( - const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) { - ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); - return jit_string_patches_.GetOrCreate( - StringReference(&dex_file, string_index), - [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); }); -} - -vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral( - const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) { - ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); - return jit_class_patches_.GetOrCreate( - TypeReference(&dex_file, type_index), - [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); }); +void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { + jit_patches_.EmitJitRootPatches(code, roots_data, *GetCodeGenerationData()); } void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, @@ -5171,7 +5183,7 @@ void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg, void CodeGeneratorARM64::LoadTypeForBootImageIntrinsic(vixl::aarch64::Register reg, TypeReference target_type) { - // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()); // Add ADRP with its PC-relative type patch. vixl::aarch64::Label* adrp_label = @@ -5332,19 +5344,7 @@ void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch, assembler.FinalizeCode(); code->resize(assembler.CodeSize()); MemoryRegion code_region(code->data(), code->size()); - assembler.FinalizeInstructions(code_region); -} - -vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) { - return uint32_literals_.GetOrCreate( - value, - [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); }); -} - -vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) { - return uint64_literals_.GetOrCreate( - value, - [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); }); + assembler.CopyInstructions(code_region); } void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { @@ -5370,13 +5370,8 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { return; } - { - // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there - // are no pools emitted. - EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); - codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); - DCHECK(!codegen_->IsLeafMethod()); - } + codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); + DCHECK(!codegen_->IsLeafMethod()); codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } @@ -5421,7 +5416,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { load_kind == HLoadClass::LoadKind::kBssEntryPublic || load_kind == HLoadClass::LoadKind::kBssEntryPackage); - const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage(); + const bool requires_read_barrier = !cls->IsInBootImage() && codegen_->EmitReadBarrier(); LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; @@ -5434,12 +5429,14 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { locations->SetInAt(0, Location::RequiresRegister()); } locations->SetOut(Location::RequiresRegister()); - if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { - if (!gUseReadBarrier || kUseBakerReadBarrier) { + if (load_kind == HLoadClass::LoadKind::kBssEntry || + load_kind == HLoadClass::LoadKind::kBssEntryPublic || + load_kind == HLoadClass::LoadKind::kBssEntryPackage) { + if (codegen_->EmitNonBakerReadBarrier()) { + // For non-Baker read barrier we have a temp-clobbering call. + } else { // Rely on the type resolution or initialization and marking to save everything we need. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); - } else { - // For non-Baker read barrier we have a temp-clobbering call. } } } @@ -5460,9 +5457,8 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA Location out_loc = cls->GetLocations()->Out(); Register out = OutputRegister(cls); - const ReadBarrierOption read_barrier_option = cls->IsInBootImage() - ? kWithoutReadBarrier - : gCompilerReadBarrierOption; + const ReadBarrierOption read_barrier_option = + cls->IsInBootImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption(); bool generate_null_check = false; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { @@ -5600,7 +5596,7 @@ void LocationsBuilderARM64::VisitClearException(HClearException* clear) { new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); } -void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARM64::VisitClearException([[maybe_unused]] HClearException* clear) { __ Str(wzr, GetExceptionTlsAddress()); } @@ -5623,7 +5619,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( } void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { - LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); + LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; @@ -5631,11 +5627,11 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { } else { locations->SetOut(Location::RequiresRegister()); if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { - if (!gUseReadBarrier || kUseBakerReadBarrier) { + if (codegen_->EmitNonBakerReadBarrier()) { + // For non-Baker read barrier we have a temp-clobbering call. + } else { // Rely on the pResolveString and marking to save everything we need. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); - } else { - // For non-Baker read barrier we have a temp-clobbering call. } } } @@ -5685,7 +5681,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD temp, /* offset placeholder */ 0u, ldr_label, - gCompilerReadBarrierOption); + codegen_->GetCompilerReadBarrierOption()); SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load); codegen_->AddSlowPath(slow_path); @@ -5709,14 +5705,13 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD out.X(), /* offset= */ 0, /* fixup_label= */ nullptr, - gCompilerReadBarrierOption); + codegen_->GetCompilerReadBarrierOption()); return; } default: break; } - // TODO: Re-add the compiler code to do string dex cache lookup again. InvokeRuntimeCallingConvention calling_convention; DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode()); __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_); @@ -5730,7 +5725,7 @@ void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARM64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) { // Will be generated at use site. } @@ -5930,7 +5925,7 @@ void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) { HandleBinaryOp(instruction); } -void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { +void LocationsBuilderARM64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) { LOG(FATAL) << "Unreachable"; } @@ -5957,7 +5952,7 @@ void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) { } void InstructionCodeGeneratorARM64::VisitParameterValue( - HParameterValue* instruction ATTRIBUTE_UNUSED) { + [[maybe_unused]] HParameterValue* instruction) { // Nothing to do, the parameter is already at its location. } @@ -5968,7 +5963,7 @@ void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) { } void InstructionCodeGeneratorARM64::VisitCurrentMethod( - HCurrentMethod* instruction ATTRIBUTE_UNUSED) { + [[maybe_unused]] HCurrentMethod* instruction) { // Nothing to do, the method is already at its location. } @@ -5980,7 +5975,7 @@ void LocationsBuilderARM64::VisitPhi(HPhi* instruction) { locations->SetOut(Location::Any()); } -void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARM64::VisitPhi([[maybe_unused]] HPhi* instruction) { LOG(FATAL) << "Unreachable"; } @@ -6175,7 +6170,7 @@ void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor } void InstructionCodeGeneratorARM64::VisitConstructorFence( - HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + [[maybe_unused]] HConstructorFence* constructor_fence) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } @@ -6215,7 +6210,7 @@ void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) { instruction->SetLocations(nullptr); } -void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARM64::VisitReturnVoid([[maybe_unused]] HReturnVoid* instruction) { codegen_->GenerateFrameExit(); } @@ -6353,6 +6348,9 @@ void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) { // In suspend check slow path, usually there are no caller-save registers at all. // If SIMD instructions are present, however, we force spilling all live SIMD // registers in full width (since the runtime only saves/restores lower part). + // Note that only a suspend check can see live SIMD registers. In the + // loop optimization, we make sure this does not happen for any other slow + // path. locations->SetCustomSlowPathCallerSaves( GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); } @@ -6467,12 +6465,12 @@ void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) { HandleBinaryOp(instruction); } -void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { +void LocationsBuilderARM64::VisitBoundType([[maybe_unused]] HBoundType* instruction) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; } -void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARM64::VisitBoundType([[maybe_unused]] HBoundType* instruction) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; } @@ -6573,7 +6571,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister( DataType::Type type = DataType::Type::kReference; Register out_reg = RegisterFrom(out, type); if (read_barrier_option == kWithReadBarrier) { - CHECK(gUseReadBarrier); + DCHECK(codegen_->EmitReadBarrier()); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) @@ -6614,7 +6612,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters( Register out_reg = RegisterFrom(out, type); Register obj_reg = RegisterFrom(obj, type); if (read_barrier_option == kWithReadBarrier) { - CHECK(gUseReadBarrier); + DCHECK(codegen_->EmitReadBarrier()); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -6649,7 +6647,7 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad( DCHECK(fixup_label == nullptr || offset == 0u); Register root_reg = RegisterFrom(root, DataType::Type::kReference); if (read_barrier_option == kWithReadBarrier) { - DCHECK(gUseReadBarrier); + DCHECK(EmitReadBarrier()); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. @@ -6712,11 +6710,10 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad( MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } -void CodeGeneratorARM64::GenerateIntrinsicCasMoveWithBakerReadBarrier( +void CodeGeneratorARM64::GenerateIntrinsicMoveWithBakerReadBarrier( vixl::aarch64::Register marked_old_value, vixl::aarch64::Register old_value) { - DCHECK(gUseReadBarrier); - DCHECK(kUseBakerReadBarrier); + DCHECK(EmitBakerReadBarrier()); // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR. uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked_old_value.GetCode()); @@ -6737,8 +6734,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins const vixl::aarch64::MemOperand& src, bool needs_null_check, bool use_load_acquire) { - DCHECK(gUseReadBarrier); - DCHECK(kUseBakerReadBarrier); + DCHECK(EmitBakerReadBarrier()); // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the // Marking Register) to decide whether we need to enter the slow @@ -6833,8 +6829,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instru uint32_t data_offset, Location index, bool needs_null_check) { - DCHECK(gUseReadBarrier); - DCHECK(kUseBakerReadBarrier); + DCHECK(EmitBakerReadBarrier()); static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), @@ -6911,7 +6906,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instru void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { // The following condition is a compile-time one, so it does not have a run-time cost. - if (kIsDebugBuild && gUseReadBarrier && kUseBakerReadBarrier) { + if (kIsDebugBuild && EmitBakerReadBarrier()) { // The following condition is a run-time one; it is executed after the // previous compile-time test, to avoid penalizing non-debug builds. if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) { @@ -6940,7 +6935,7 @@ void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - DCHECK(gUseReadBarrier); + DCHECK(EmitReadBarrier()); // Insert a slow path based read barrier *after* the reference load. // @@ -6965,7 +6960,7 @@ void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - if (gUseReadBarrier) { + if (EmitReadBarrier()) { // Baker's read barriers shall be handled by the fast path // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier). DCHECK(!kUseBakerReadBarrier); @@ -6980,7 +6975,7 @@ void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root) { - DCHECK(gUseReadBarrier); + DCHECK(EmitReadBarrier()); // Insert a slow path based read barrier *after* the GC root load. // @@ -7018,32 +7013,6 @@ void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instructi } } -static void PatchJitRootUse(uint8_t* code, - const uint8_t* roots_data, - vixl::aarch64::Literal<uint32_t>* literal, - uint64_t index_in_table) { - uint32_t literal_offset = literal->GetOffset(); - uintptr_t address = - reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); - uint8_t* data = code + literal_offset; - reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address); -} - -void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { - for (const auto& entry : jit_string_patches_) { - const StringReference& string_reference = entry.first; - vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; - uint64_t index_in_table = GetJitStringRootIndex(string_reference); - PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); - } - for (const auto& entry : jit_class_patches_) { - const TypeReference& type_reference = entry.first; - vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; - uint64_t index_in_table = GetJitClassRootIndex(type_reference); - PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); - } -} - MemOperand InstructionCodeGeneratorARM64::VecNEONAddress( HVecMemoryOperation* instruction, UseScratchRegisterScope* temps_scope, diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 6190364d1d..7ff08f55cb 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -26,6 +26,7 @@ #include "dex/string_reference.h" #include "dex/type_reference.h" #include "driver/compiler_options.h" +#include "jit_patches_arm64.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/arm64/assembler_arm64.h" @@ -50,30 +51,29 @@ class CodeGeneratorARM64; // Use a local definition to prevent copying mistakes. static constexpr size_t kArm64WordSize = static_cast<size_t>(kArm64PointerSize); -// These constants are used as an approximate margin when emission of veneer and literal pools +// This constant is used as an approximate margin when emission of veneer and literal pools // must be blocked. static constexpr int kMaxMacroInstructionSizeInBytes = 15 * vixl::aarch64::kInstructionSize; -static constexpr int kInvokeCodeMarginSizeInBytes = 6 * kMaxMacroInstructionSizeInBytes; static const vixl::aarch64::Register kParameterCoreRegisters[] = { - vixl::aarch64::x1, - vixl::aarch64::x2, - vixl::aarch64::x3, - vixl::aarch64::x4, - vixl::aarch64::x5, - vixl::aarch64::x6, - vixl::aarch64::x7 + vixl::aarch64::x1, + vixl::aarch64::x2, + vixl::aarch64::x3, + vixl::aarch64::x4, + vixl::aarch64::x5, + vixl::aarch64::x6, + vixl::aarch64::x7 }; static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); static const vixl::aarch64::VRegister kParameterFPRegisters[] = { - vixl::aarch64::d0, - vixl::aarch64::d1, - vixl::aarch64::d2, - vixl::aarch64::d3, - vixl::aarch64::d4, - vixl::aarch64::d5, - vixl::aarch64::d6, - vixl::aarch64::d7 + vixl::aarch64::d0, + vixl::aarch64::d1, + vixl::aarch64::d2, + vixl::aarch64::d3, + vixl::aarch64::d4, + vixl::aarch64::d5, + vixl::aarch64::d6, + vixl::aarch64::d7 }; static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters); @@ -116,7 +116,7 @@ const vixl::aarch64::CPURegList callee_saved_core_registers( vixl::aarch64::CPURegister::kRegister, vixl::aarch64::kXRegSize, (kReserveMarkingRegister ? vixl::aarch64::x21.GetCode() : vixl::aarch64::x20.GetCode()), - vixl::aarch64::x30.GetCode()); + vixl::aarch64::x30.GetCode()); const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kVRegister, vixl::aarch64::kDRegSize, vixl::aarch64::d8.GetCode(), @@ -144,19 +144,8 @@ Location ARM64ReturnLocation(DataType::Type return_type); V(SystemArrayCopyByte) \ V(SystemArrayCopyInt) \ /* 1.8 */ \ - V(UnsafeGetAndAddInt) \ - V(UnsafeGetAndAddLong) \ - V(UnsafeGetAndSetInt) \ - V(UnsafeGetAndSetLong) \ - V(UnsafeGetAndSetObject) \ V(MethodHandleInvokeExact) \ - V(MethodHandleInvoke) \ - /* OpenJDK 11 */ \ - V(JdkUnsafeGetAndAddInt) \ - V(JdkUnsafeGetAndAddLong) \ - V(JdkUnsafeGetAndSetInt) \ - V(JdkUnsafeGetAndSetLong) \ - V(JdkUnsafeGetAndSetObject) + V(MethodHandleInvoke) class SlowPathCodeARM64 : public SlowPathCode { public: @@ -192,34 +181,34 @@ class JumpTableARM64 : public DeletableArenaObject<kArenaAllocSwitchTable> { DISALLOW_COPY_AND_ASSIGN(JumpTableARM64); }; -static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] = - { vixl::aarch64::x0, - vixl::aarch64::x1, - vixl::aarch64::x2, - vixl::aarch64::x3, - vixl::aarch64::x4, - vixl::aarch64::x5, - vixl::aarch64::x6, - vixl::aarch64::x7 }; +static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] = { + vixl::aarch64::x0, + vixl::aarch64::x1, + vixl::aarch64::x2, + vixl::aarch64::x3, + vixl::aarch64::x4, + vixl::aarch64::x5, + vixl::aarch64::x6, + vixl::aarch64::x7 +}; static constexpr size_t kRuntimeParameterCoreRegistersLength = arraysize(kRuntimeParameterCoreRegisters); -static const vixl::aarch64::VRegister kRuntimeParameterFpuRegisters[] = - { vixl::aarch64::d0, - vixl::aarch64::d1, - vixl::aarch64::d2, - vixl::aarch64::d3, - vixl::aarch64::d4, - vixl::aarch64::d5, - vixl::aarch64::d6, - vixl::aarch64::d7 }; +static const vixl::aarch64::VRegister kRuntimeParameterFpuRegisters[] = { + vixl::aarch64::d0, + vixl::aarch64::d1, + vixl::aarch64::d2, + vixl::aarch64::d3, + vixl::aarch64::d4, + vixl::aarch64::d5, + vixl::aarch64::d6, + vixl::aarch64::d7 +}; static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterCoreRegisters); class InvokeRuntimeCallingConvention : public CallingConvention<vixl::aarch64::Register, vixl::aarch64::VRegister> { public: - static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); - InvokeRuntimeCallingConvention() : CallingConvention(kRuntimeParameterCoreRegisters, kRuntimeParameterCoreRegistersLength, @@ -304,16 +293,16 @@ class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention { Location GetFieldIndexLocation() const override { return helpers::LocationFrom(vixl::aarch64::x0); } - Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { + Location GetReturnLocation([[maybe_unused]] DataType::Type type) const override { return helpers::LocationFrom(vixl::aarch64::x0); } - Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, + Location GetSetValueLocation([[maybe_unused]] DataType::Type type, bool is_instance) const override { return is_instance ? helpers::LocationFrom(vixl::aarch64::x2) : helpers::LocationFrom(vixl::aarch64::x1); } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { + Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override { return helpers::LocationFrom(vixl::aarch64::d0); } @@ -551,12 +540,31 @@ class InstructionCodeGeneratorARM64Sve : public InstructionCodeGeneratorARM64 { // register size (full SIMD register is used). void ValidateVectorLength(HVecOperation* instr) const; - // Returns default predicate register which is used as governing vector predicate - // to implement predicated loop execution. + vixl::aarch64::PRegister GetVecGoverningPReg(HVecOperation* instr) { + return GetVecPredSetFixedOutPReg(instr->GetGoverningPredicate()); + } + + // Returns a fixed p-reg for predicate setting instruction. + // + // Currently we only support diamond CF loops for predicated vectorization; also we don't have + // register allocator support for vector predicates. Thus we use fixed P-regs for loop main, + // True and False predicates as a temporary solution. // - // TODO: This is a hack to be addressed when register allocator supports SIMD types. - static vixl::aarch64::PRegister LoopPReg() { - return vixl::aarch64::p0; + // TODO: Support SIMD types and registers in ART. + static vixl::aarch64::PRegister GetVecPredSetFixedOutPReg(HVecPredSetOperation* instr) { + if (instr->IsVecPredWhile() || instr->IsVecPredSetAll()) { + // VecPredWhile and VecPredSetAll live ranges never overlap due to the current vectorization + // scheme: the former only is live inside a vectorized loop and the later is never in a + // loop and never spans across loops. + return vixl::aarch64::p0; + } else if (instr->IsVecPredNot()) { + // This relies on the fact that we only use PredNot manually in the autovectorizer, + // so there is only one of them in each loop. + return vixl::aarch64::p1; + } else { + DCHECK(instr->IsVecCondition()); + return vixl::aarch64::p2; + } } }; @@ -698,7 +706,7 @@ class CodeGeneratorARM64 : public CodeGenerator { return jump_tables_.back().get(); } - void Finalize(CodeAllocator* allocator) override; + void Finalize() override; // Code generation helpers. void MoveConstant(vixl::aarch64::CPURegister destination, HConstant* constant); @@ -737,9 +745,7 @@ class CodeGeneratorARM64 : public CodeGenerator { ParallelMoveResolverARM64* GetMoveResolver() override { return &move_resolver_; } - bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override { - return false; - } + bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; } // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. @@ -838,13 +844,21 @@ class CodeGeneratorARM64 : public CodeGenerator { // the associated patch for AOT or slow path for JIT. void EmitBakerReadBarrierCbnz(uint32_t custom_data); - vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address); + vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address) { + return jit_patches_.DeduplicateBootImageAddressLiteral(address); + } vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file, dex::StringIndex string_index, - Handle<mirror::String> handle); + Handle<mirror::String> handle) { + return jit_patches_.DeduplicateJitStringLiteral( + dex_file, string_index, handle, GetCodeGenerationData()); + } vixl::aarch64::Literal<uint32_t>* DeduplicateJitClassLiteral(const DexFile& dex_file, - dex::TypeIndex string_index, - Handle<mirror::Class> handle); + dex::TypeIndex class_index, + Handle<mirror::Class> handle) { + return jit_patches_.DeduplicateJitClassLiteral( + dex_file, class_index, handle, GetCodeGenerationData()); + } void EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, vixl::aarch64::Register reg); void EmitAddPlaceholder(vixl::aarch64::Label* fixup_label, @@ -879,9 +893,9 @@ class CodeGeneratorARM64 : public CodeGenerator { uint32_t offset, vixl::aarch64::Label* fixup_label, ReadBarrierOption read_barrier_option); - // Generate MOV for the `old_value` in intrinsic CAS and mark it with Baker read barrier. - void GenerateIntrinsicCasMoveWithBakerReadBarrier(vixl::aarch64::Register marked_old_value, - vixl::aarch64::Register old_value); + // Generate MOV for the `old_value` in intrinsic and mark it with Baker read barrier. + void GenerateIntrinsicMoveWithBakerReadBarrier(vixl::aarch64::Register marked_old_value, + vixl::aarch64::Register old_value); // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. // Overload suitable for Unsafe.getObject/-Volatile() intrinsic. @@ -1074,18 +1088,6 @@ class CodeGeneratorARM64 : public CodeGenerator { uint32_t encoded_data, /*out*/ std::string* debug_name); - using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>; - using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>; - using StringToLiteralMap = ArenaSafeMap<StringReference, - vixl::aarch64::Literal<uint32_t>*, - StringReferenceValueComparator>; - using TypeToLiteralMap = ArenaSafeMap<TypeReference, - vixl::aarch64::Literal<uint32_t>*, - TypeReferenceValueComparator>; - - vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value); - vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value); - // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. struct PcRelativePatchInfo : PatchInfo<vixl::aarch64::Label> { @@ -1158,14 +1160,7 @@ class CodeGeneratorARM64 : public CodeGenerator { // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; - // Deduplication map for 32-bit literals, used for JIT for boot image addresses. - Uint32ToLiteralMap uint32_literals_; - // Deduplication map for 64-bit literals, used for JIT for method address or method code. - Uint64ToLiteralMap uint64_literals_; - // Patches for string literals in JIT compiled code. - StringToLiteralMap jit_string_patches_; - // Patches for class literals in JIT compiled code. - TypeToLiteralMap jit_class_patches_; + JitPatchesARM64 jit_patches_; // Baker read barrier slow paths, mapping custom data (uint32_t) to label. // Wrap the label to work around vixl::aarch64::Label being non-copyable diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index d69e77045b..00c14b0b46 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -33,13 +33,16 @@ #include "interpreter/mterp/nterp.h" #include "intrinsics.h" #include "intrinsics_arm_vixl.h" +#include "intrinsics_list.h" #include "intrinsics_utils.h" #include "linker/linker_patch.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "mirror/var_handle.h" +#include "profiling_info_builder.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" +#include "trace.h" #include "utils/arm/assembler_arm_vixl.h" #include "utils/arm/managed_register_arm.h" #include "utils/assembler.h" @@ -147,7 +150,7 @@ static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { RegisterSet caller_saves = RegisterSet::Empty(); caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() - // that the the kPrimNot result register is the same as the first argument register. + // that the kPrimNot result register is the same as the first argument register. return caller_saves; } @@ -295,31 +298,6 @@ static LoadOperandType GetLoadOperandType(DataType::Type type) { } } -static StoreOperandType GetStoreOperandType(DataType::Type type) { - switch (type) { - case DataType::Type::kReference: - return kStoreWord; - case DataType::Type::kBool: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - return kStoreByte; - case DataType::Type::kUint16: - case DataType::Type::kInt16: - return kStoreHalfword; - case DataType::Type::kInt32: - return kStoreWord; - case DataType::Type::kInt64: - return kStoreWordPair; - case DataType::Type::kFloat32: - return kStoreSWord; - case DataType::Type::kFloat64: - return kStoreDWord; - default: - LOG(FATAL) << "Unreachable type " << type; - UNREACHABLE(); - } -} - void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); size_t orig_offset = stack_offset; @@ -743,7 +721,6 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL { obj_(obj), offset_(offset), index_(index) { - DCHECK(gUseReadBarrier); // If `obj` is equal to `out` or `ref`, it means the initial object // has been overwritten by (or after) the heap object reference load // to be instrumented, e.g.: @@ -758,13 +735,13 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL { } void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(codegen->EmitReadBarrier()); CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); LocationSummary* locations = instruction_->GetLocations(); vixl32::Register reg_out = RegisterFrom(out_); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.GetCode())); DCHECK(instruction_->IsInstanceFieldGet() || - instruction_->IsPredicatedInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || instruction_->IsInstanceOf() || @@ -840,7 +817,9 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL { DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); DCHECK(instruction_->GetLocations()->Intrinsified()); HInvoke* invoke = instruction_->AsInvoke(); - DCHECK(IsUnsafeGetObject(invoke) || IsVarHandleGet(invoke) || IsVarHandleCASFamily(invoke)) + DCHECK(IsUnsafeGetReference(invoke) || + IsVarHandleGet(invoke) || + IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic(); DCHECK_EQ(offset_, 0U); // Though UnsafeGet's offset location is a register pair, we only pass the low @@ -921,10 +900,10 @@ class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: ReadBarrierForRootSlowPathARMVIXL(HInstruction* instruction, Location out, Location root) : SlowPathCodeARMVIXL(instruction), out_(out), root_(root) { - DCHECK(gUseReadBarrier); } void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(codegen->EmitReadBarrier()); LocationSummary* locations = instruction_->GetLocations(); vixl32::Register reg_out = RegisterFrom(out_); DCHECK(locations->CanCall()); @@ -992,12 +971,20 @@ class MethodEntryExitHooksSlowPathARMVIXL : public SlowPathCodeARMVIXL { class CompileOptimizedSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: - CompileOptimizedSlowPathARMVIXL() : SlowPathCodeARMVIXL(/* instruction= */ nullptr) {} + explicit CompileOptimizedSlowPathARMVIXL(vixl32::Register profiling_info) + : SlowPathCodeARMVIXL(/* instruction= */ nullptr), + profiling_info_(profiling_info) {} void EmitNativeCode(CodeGenerator* codegen) override { uint32_t entry_point_offset = GetThreadOffset<kArmPointerSize>(kQuickCompileOptimized).Int32Value(); __ Bind(GetEntryLabel()); + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + UseScratchRegisterScope temps(arm_codegen->GetVIXLAssembler()); + vixl32::Register tmp = temps.Acquire(); + __ Mov(tmp, ProfilingInfo::GetOptimizeThreshold()); + __ Strh(tmp, + MemOperand(profiling_info_, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); __ Ldr(lr, MemOperand(tr, entry_point_offset)); // Note: we don't record the call here (and therefore don't generate a stack // map), as the entrypoint should never be suspended. @@ -1010,6 +997,8 @@ class CompileOptimizedSlowPathARMVIXL : public SlowPathCodeARMVIXL { } private: + vixl32::Register profiling_info_; + DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathARMVIXL); }; @@ -1102,27 +1091,27 @@ static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) { } // Saves the register in the stack. Returns the size taken on stack. -size_t CodeGeneratorARMVIXL::SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED, - uint32_t reg_id ATTRIBUTE_UNUSED) { +size_t CodeGeneratorARMVIXL::SaveCoreRegister([[maybe_unused]] size_t stack_index, + [[maybe_unused]] uint32_t reg_id) { TODO_VIXL32(FATAL); UNREACHABLE(); } // Restores the register from the stack. Returns the size taken on stack. -size_t CodeGeneratorARMVIXL::RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED, - uint32_t reg_id ATTRIBUTE_UNUSED) { +size_t CodeGeneratorARMVIXL::RestoreCoreRegister([[maybe_unused]] size_t stack_index, + [[maybe_unused]] uint32_t reg_id) { TODO_VIXL32(FATAL); UNREACHABLE(); } -size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, - uint32_t reg_id ATTRIBUTE_UNUSED) { +size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index, + [[maybe_unused]] uint32_t reg_id) { TODO_VIXL32(FATAL); UNREACHABLE(); } -size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, - uint32_t reg_id ATTRIBUTE_UNUSED) { +size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index, + [[maybe_unused]] uint32_t reg_id) { TODO_VIXL32(FATAL); UNREACHABLE(); } @@ -1908,6 +1897,7 @@ vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction, } namespace detail { + // Mark which intrinsics we don't have handcrafted code for. template <Intrinsics T> struct IsUnimplemented { @@ -1922,15 +1912,13 @@ struct IsUnimplemented { UNIMPLEMENTED_INTRINSIC_LIST_ARM(TRUE_OVERRIDE) #undef TRUE_OVERRIDE -#include "intrinsics_list.h" static constexpr bool kIsIntrinsicUnimplemented[] = { - false, // kNone + false, // kNone #define IS_UNIMPLEMENTED(Intrinsic, ...) \ - IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, - INTRINSICS_LIST(IS_UNIMPLEMENTED) + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + ART_INTRINSICS_LIST(IS_UNIMPLEMENTED) #undef IS_UNIMPLEMENTED }; -#undef INTRINSICS_LIST } // namespace detail @@ -2024,7 +2012,7 @@ void CodeGeneratorARMVIXL::FixJumpTables() { #define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> // NOLINT -void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) { +void CodeGeneratorARMVIXL::Finalize() { FixJumpTables(); // Emit JIT baker read barrier slow paths. @@ -2037,11 +2025,11 @@ void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) { } GetAssembler()->FinalizeCode(); - CodeGenerator::Finalize(allocator); + CodeGenerator::Finalize(); // Verify Baker read barrier linker patches. if (kIsDebugBuild) { - ArrayRef<const uint8_t> code = allocator->GetMemory(); + ArrayRef<const uint8_t> code(GetCode()); for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { DCHECK(info.label.IsBound()); uint32_t literal_offset = info.label.GetLocation(); @@ -2188,11 +2176,16 @@ void LocationsBuilderARMVIXL::VisitMethodExitHook(HMethodExitHook* method_hook) LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); locations->SetInAt(0, parameter_visitor_.GetReturnLocation(method_hook->InputAt(0)->GetType())); + // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to + // compute the address to store the timestamp counter. + locations->AddRegisterTemps(3); } void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* instruction) { - UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); + LocationSummary* locations = instruction->GetLocations(); + vixl32::Register addr = RegisterFrom(locations->GetTemp(0)); + vixl32::Register value = RegisterFrom(locations->GetTemp(1)); + vixl32::Register tmp = RegisterFrom(locations->GetTemp(2)); SlowPathCodeARMVIXL* slow_path = new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction); @@ -2204,20 +2197,61 @@ void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is // disabled in debuggable runtime. The other bit is used when this method itself requires a // deoptimization due to redefinition. So it is safe to just check for non-zero value here. - GetAssembler()->LoadFromOffset(kLoadWord, - temp, - sp, - codegen_->GetStackOffsetOfShouldDeoptimizeFlag()); - __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel()); + GetAssembler()->LoadFromOffset( + kLoadWord, value, sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()); + __ CompareAndBranchIfNonZero(value, slow_path->GetEntryLabel()); } MemberOffset offset = instruction->IsMethodExitHook() ? instrumentation::Instrumentation::HaveMethodExitListenersOffset() : instrumentation::Instrumentation::HaveMethodEntryListenersOffset(); uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation()); - __ Mov(temp, address + offset.Int32Value()); - __ Ldrb(temp, MemOperand(temp, 0)); - __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel()); + __ Mov(addr, address + offset.Int32Value()); + __ Ldrb(value, MemOperand(addr, 0)); + __ Cmp(value, instrumentation::Instrumentation::kFastTraceListeners); + // Check if there are any trace method entry / exit listeners. If no, continue. + __ B(lt, slow_path->GetExitLabel()); + // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners. + // If yes, just take the slow path. + __ B(gt, slow_path->GetEntryLabel()); + + // Check if there is place in the buffer to store a new entry, if no, take slow path. + uint32_t trace_buffer_index_offset = + Thread::TraceBufferIndexOffset<kArmPointerSize>().Int32Value(); + vixl32::Register index = value; + __ Ldr(index, MemOperand(tr, trace_buffer_index_offset)); + __ Subs(index, index, kNumEntriesForWallClock); + __ B(lt, slow_path->GetEntryLabel()); + + // Update the index in the `Thread`. + __ Str(index, MemOperand(tr, trace_buffer_index_offset)); + // Calculate the entry address in the buffer. + // addr = base_addr + sizeof(void*) * index + __ Ldr(addr, MemOperand(tr, Thread::TraceBufferPtrOffset<kArmPointerSize>().SizeValue())); + __ Add(addr, addr, Operand(index, LSL, TIMES_4)); + + // Record method pointer and trace action. + __ Ldr(tmp, MemOperand(sp, 0)); + // Use last two bits to encode trace method action. For MethodEntry it is 0 + // so no need to set the bits since they are 0 already. + if (instruction->IsMethodExitHook()) { + DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4)); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1); + __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit))); + } + __ Str(tmp, MemOperand(addr, kMethodOffsetInBytes)); + + vixl32::Register tmp1 = index; + // See Architecture Reference Manual ARMv7-A and ARMv7-R edition section B4.1.34. + __ Mrrc(/* lower 32-bit */ tmp, + /* higher 32-bit */ tmp1, + /* coproc= */ 15, + /* opc1= */ 1, + /* crm= */ 14); + static_assert(kHighTimestampOffsetInBytes == + kTimestampOffsetInBytes + static_cast<uint32_t>(kRuntimePointerSize)); + __ Strd(tmp, tmp1, MemOperand(addr, kTimestampOffsetInBytes)); __ Bind(slow_path->GetExitLabel()); } @@ -2228,7 +2262,11 @@ void InstructionCodeGeneratorARMVIXL::VisitMethodExitHook(HMethodExitHook* instr } void LocationsBuilderARMVIXL::VisitMethodEntryHook(HMethodEntryHook* method_hook) { - new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to + // compute the address to store the timestamp counter. + locations->AddRegisterTemps(3); } void InstructionCodeGeneratorARMVIXL::VisitMethodEntryHook(HMethodEntryHook* instruction) { @@ -2262,14 +2300,15 @@ void CodeGeneratorARMVIXL::MaybeIncrementHotness(bool is_frame_entry) { } if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { - SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARMVIXL(); - AddSlowPath(slow_path); ProfilingInfo* info = GetGraph()->GetProfilingInfo(); DCHECK(info != nullptr); DCHECK(!HasEmptyFrame()); uint32_t address = reinterpret_cast32<uint32_t>(info); UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register tmp = temps.Acquire(); + SlowPathCodeARMVIXL* slow_path = + new (GetScopedAllocator()) CompileOptimizedSlowPathARMVIXL(/* profiling_info= */ lr); + AddSlowPath(slow_path); __ Mov(lr, address); __ Ldrh(tmp, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); __ Adds(tmp, tmp, -1); @@ -2824,8 +2863,7 @@ void LocationsBuilderARMVIXL::VisitExit(HExit* exit) { exit->SetLocations(nullptr); } -void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { -} +void InstructionCodeGeneratorARMVIXL::VisitExit([[maybe_unused]] HExit* exit) {} void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition, vixl32::Label* true_target, @@ -2963,6 +3001,11 @@ void LocationsBuilderARMVIXL::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); + if (GetGraph()->IsCompilingBaseline() && + codegen_->GetCompilerOptions().ProfileBranches() && + !Runtime::Current()->IsAotCompiler()) { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -2973,6 +3016,36 @@ void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) { nullptr : codegen_->GetLabelOf(true_successor); vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { + if (GetGraph()->IsCompilingBaseline() && + codegen_->GetCompilerOptions().ProfileBranches() && + !Runtime::Current()->IsAotCompiler()) { + DCHECK(if_instr->InputAt(0)->IsCondition()); + ProfilingInfo* info = GetGraph()->GetProfilingInfo(); + DCHECK(info != nullptr); + BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc()); + // Currently, not all If branches are profiled. + if (cache != nullptr) { + uint32_t address = + reinterpret_cast32<uint32_t>(cache) + BranchCache::FalseOffset().Int32Value(); + static_assert( + BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2, + "Unexpected offsets for BranchCache"); + vixl32::Label done; + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + vixl32::Register counter = RegisterFrom(if_instr->GetLocations()->GetTemp(0)); + vixl32::Register condition = InputRegisterAt(if_instr, 0); + __ Mov(temp, address); + __ Ldrh(counter, MemOperand(temp, condition, LSL, 1)); + __ Adds(counter, counter, 1); + __ Uxth(counter, counter); + __ CompareAndBranchIfZero(counter, &done); + __ Strh(counter, MemOperand(temp, condition, LSL, 1)); + __ Bind(&done); + } + } + } GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } @@ -3422,7 +3495,7 @@ void LocationsBuilderARMVIXL::VisitIntConstant(HIntConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorARMVIXL::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARMVIXL::VisitIntConstant([[maybe_unused]] HIntConstant* constant) { // Will be generated at use site. } @@ -3432,7 +3505,7 @@ void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorARMVIXL::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARMVIXL::VisitNullConstant([[maybe_unused]] HNullConstant* constant) { // Will be generated at use site. } @@ -3442,7 +3515,7 @@ void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorARMVIXL::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARMVIXL::VisitLongConstant([[maybe_unused]] HLongConstant* constant) { // Will be generated at use site. } @@ -3453,7 +3526,7 @@ void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) { } void InstructionCodeGeneratorARMVIXL::VisitFloatConstant( - HFloatConstant* constant ATTRIBUTE_UNUSED) { + [[maybe_unused]] HFloatConstant* constant) { // Will be generated at use site. } @@ -3464,7 +3537,7 @@ void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) { } void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant( - HDoubleConstant* constant ATTRIBUTE_UNUSED) { + [[maybe_unused]] HDoubleConstant* constant) { // Will be generated at use site. } @@ -3473,7 +3546,7 @@ void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* construct } void InstructionCodeGeneratorARMVIXL::VisitConstructorFence( - HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + [[maybe_unused]] HConstructorFence* constructor_fence) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } @@ -3489,7 +3562,7 @@ void LocationsBuilderARMVIXL::VisitReturnVoid(HReturnVoid* ret) { ret->SetLocations(nullptr); } -void InstructionCodeGeneratorARMVIXL::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARMVIXL::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) { codegen_->GenerateFrameExit(); } @@ -3612,26 +3685,27 @@ void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) { void CodeGeneratorARMVIXL::MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl32::Register klass) { DCHECK_EQ(r0.GetCode(), klass.GetCode()); - // We know the destination of an intrinsic, so no need to record inline - // caches. - if (!instruction->GetLocations()->Intrinsified() && - GetGraph()->IsCompilingBaseline() && - !Runtime::Current()->IsAotCompiler()) { - DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke()); + if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) { ProfilingInfo* info = GetGraph()->GetProfilingInfo(); - DCHECK(info != nullptr); - InlineCache* cache = info->GetInlineCache(instruction->GetDexPc()); - uint32_t address = reinterpret_cast32<uint32_t>(cache); - vixl32::Label done; - UseScratchRegisterScope temps(GetVIXLAssembler()); - temps.Exclude(ip); - __ Mov(r4, address); - __ Ldr(ip, MemOperand(r4, InlineCache::ClassesOffset().Int32Value())); - // Fast path for a monomorphic cache. - __ Cmp(klass, ip); - __ B(eq, &done, /* is_far_target= */ false); - InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc()); - __ Bind(&done); + InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(info, instruction->AsInvoke()); + if (cache != nullptr) { + uint32_t address = reinterpret_cast32<uint32_t>(cache); + vixl32::Label done; + UseScratchRegisterScope temps(GetVIXLAssembler()); + temps.Exclude(ip); + __ Mov(r4, address); + __ Ldr(ip, MemOperand(r4, InlineCache::ClassesOffset().Int32Value())); + // Fast path for a monomorphic cache. + __ Cmp(klass, ip); + __ B(eq, &done, /* is_far_target= */ false); + InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc()); + __ Bind(&done); + } else { + // This is unexpected, but we don't guarantee stable compilation across + // JIT runs so just warn about it. + ScopedObjectAccess soa(Thread::Current()); + LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod(); + } } } @@ -5617,7 +5691,7 @@ void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) } void InstructionCodeGeneratorARMVIXL::VisitParameterValue( - HParameterValue* instruction ATTRIBUTE_UNUSED) { + [[maybe_unused]] HParameterValue* instruction) { // Nothing to do, the parameter is already at its location. } @@ -5628,7 +5702,7 @@ void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod( - HCurrentMethod* instruction ATTRIBUTE_UNUSED) { + [[maybe_unused]] HCurrentMethod* instruction) { // Nothing to do, the method is already at its location. } @@ -5769,7 +5843,7 @@ void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) { locations->SetOut(Location::Any()); } -void InstructionCodeGeneratorARMVIXL::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARMVIXL::VisitPhi([[maybe_unused]] HPhi* instruction) { LOG(FATAL) << "Unreachable"; } @@ -5893,10 +5967,7 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, LocationSummary* locations = instruction->GetLocations(); vixl32::Register base = InputRegisterAt(instruction, 0); Location value = locations->InAt(1); - std::optional<vixl::aarch32::Label> pred_is_null; - bool is_predicated = - instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet(); bool is_volatile = field_info.IsVolatile(); bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); DataType::Type field_type = field_info.GetFieldType(); @@ -5904,11 +5975,6 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); - if (is_predicated) { - pred_is_null.emplace(); - __ CompareAndBranchIfZero(base, &*pred_is_null, /* is_far_target= */ false); - } - if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); } @@ -6018,21 +6084,14 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } - - if (is_predicated) { - __ Bind(&*pred_is_null); - } } void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { - DCHECK(instruction->IsInstanceFieldGet() || - instruction->IsStaticFieldGet() || - instruction->IsPredicatedInstanceFieldGet()); + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); bool object_field_get_with_read_barrier = - gUseReadBarrier && (field_info.GetFieldType() == DataType::Type::kReference); - bool is_predicated = instruction->IsPredicatedInstanceFieldGet(); + (field_info.GetFieldType() == DataType::Type::kReference) && codegen_->EmitReadBarrier(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_field_get_with_read_barrier @@ -6042,7 +6101,7 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } // Input for object receiver. - locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister()); + locations->SetInAt(0, Location::RequiresRegister()); bool volatile_for_double = field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kFloat64) @@ -6057,20 +6116,10 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, object_field_get_with_read_barrier; if (DataType::IsFloatingPointType(instruction->GetType())) { - if (is_predicated) { - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); - } else { - locations->SetOut(Location::RequiresFpuRegister()); - } + locations->SetOut(Location::RequiresFpuRegister()); } else { - if (is_predicated) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - } else { - locations->SetOut(Location::RequiresRegister(), - (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap)); - } + locations->SetOut(Location::RequiresRegister(), + (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap)); } if (volatile_for_double) { // ARM encoding have some additional constraints for ldrexd/strexd: @@ -6104,8 +6153,7 @@ Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* inpu Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode) { DCHECK(!DataType::IsFloatingPointType(constant->GetType())); - if (constant->IsConstant() && - CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) { + if (constant->IsConstant() && CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) { return Location::ConstantLocation(constant); } return Location::RequiresRegister(); @@ -6171,12 +6219,10 @@ bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst, void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { - DCHECK(instruction->IsInstanceFieldGet() || - instruction->IsStaticFieldGet() || - instruction->IsPredicatedInstanceFieldGet()); + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = instruction->GetLocations(); - uint32_t receiver_input = instruction->IsPredicatedInstanceFieldGet() ? 1 : 0; + uint32_t receiver_input = 0; vixl32::Register base = InputRegisterAt(instruction, receiver_input); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); @@ -6202,7 +6248,7 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, case DataType::Type::kReference: { // /* HeapReference<Object> */ out = *(base + offset) - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location(); // Note that a potential implicit null check is handled in this // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call. @@ -6299,19 +6345,6 @@ void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instructi HandleFieldGet(instruction, instruction->GetFieldInfo()); } -void LocationsBuilderARMVIXL::VisitPredicatedInstanceFieldGet( - HPredicatedInstanceFieldGet* instruction) { - HandleFieldGet(instruction, instruction->GetFieldInfo()); -} - -void InstructionCodeGeneratorARMVIXL::VisitPredicatedInstanceFieldGet( - HPredicatedInstanceFieldGet* instruction) { - vixl::aarch32::Label finish; - __ CompareAndBranchIfZero(InputRegisterAt(instruction, 1), &finish, false); - HandleFieldGet(instruction, instruction->GetFieldInfo()); - __ Bind(&finish); -} - void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { HandleFieldGet(instruction, instruction->GetFieldInfo()); } @@ -6512,7 +6545,7 @@ void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(DataType::Type type, void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { bool object_array_get_with_read_barrier = - gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); + (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_array_get_with_read_barrier @@ -6660,14 +6693,14 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { // The read barrier instrumentation of object ArrayGet // instructions does not support the HIntermediateAddress // instruction. - DCHECK(!(has_intermediate_address && gUseReadBarrier)); + DCHECK(!(has_intermediate_address && codegen_->EmitReadBarrier())); static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // Note that a potential implicit null check is handled in this // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call. DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); @@ -7234,7 +7267,7 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp, } } -void LocationsBuilderARMVIXL::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { +void LocationsBuilderARMVIXL::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) { LOG(FATAL) << "Unreachable"; } @@ -7591,7 +7624,7 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { load_kind == HLoadClass::LoadKind::kBssEntryPublic || load_kind == HLoadClass::LoadKind::kBssEntryPackage); - const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage(); + const bool requires_read_barrier = !cls->IsInBootImage() && codegen_->EmitReadBarrier(); LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; @@ -7604,12 +7637,14 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { locations->SetInAt(0, Location::RequiresRegister()); } locations->SetOut(Location::RequiresRegister()); - if (load_kind == HLoadClass::LoadKind::kBssEntry) { - if (!gUseReadBarrier || kUseBakerReadBarrier) { + if (load_kind == HLoadClass::LoadKind::kBssEntry || + load_kind == HLoadClass::LoadKind::kBssEntryPublic || + load_kind == HLoadClass::LoadKind::kBssEntryPackage) { + if (codegen_->EmitNonBakerReadBarrier()) { + // For non-Baker read barrier we have a temp-clobbering call. + } else { // Rely on the type resolution or initialization and marking to save everything we need. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); - } else { - // For non-Baker read barrier we have a temp-clobbering call. } } } @@ -7631,9 +7666,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ Location out_loc = locations->Out(); vixl32::Register out = OutputRegister(cls); - const ReadBarrierOption read_barrier_option = cls->IsInBootImage() - ? kWithoutReadBarrier - : gCompilerReadBarrierOption; + const ReadBarrierOption read_barrier_option = + cls->IsInBootImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption(); bool generate_null_check = false; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { @@ -7840,7 +7874,7 @@ HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( } void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { - LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); + LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kRuntimeCall) { @@ -7848,11 +7882,11 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { } else { locations->SetOut(Location::RequiresRegister()); if (load_kind == HLoadString::LoadKind::kBssEntry) { - if (!gUseReadBarrier || kUseBakerReadBarrier) { + if (codegen_->EmitNonBakerReadBarrier()) { + // For non-Baker read barrier we have a temp-clobbering call. + } else { // Rely on the pResolveString and marking to save everything we need, including temps. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); - } else { - // For non-Baker read barrier we have a temp-clobbering call. } } } @@ -7887,7 +7921,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE codegen_->EmitMovwMovtPlaceholder(labels, out); // All aligned loads are implicitly atomic consume operations on ARM. codegen_->GenerateGcRootFieldLoad( - load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption); + load, out_loc, out, /*offset=*/0, codegen_->GetCompilerReadBarrierOption()); LoadStringSlowPathARMVIXL* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load); codegen_->AddSlowPath(slow_path); @@ -7908,14 +7942,13 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE load->GetString())); // /* GcRoot<mirror::String> */ out = *out codegen_->GenerateGcRootFieldLoad( - load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption); + load, out_loc, out, /*offset=*/0, codegen_->GetCompilerReadBarrierOption()); return; } default: break; } - // TODO: Re-add the compiler code to do string dex cache lookup again. DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall); InvokeRuntimeCallingConventionARMVIXL calling_convention; __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); @@ -7944,7 +7977,7 @@ void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) { new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); } -void InstructionCodeGeneratorARMVIXL::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARMVIXL::VisitClearException([[maybe_unused]] HClearException* clear) { UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); __ Mov(temp, 0); @@ -7964,8 +7997,8 @@ void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) { } // Temp is used for read barrier. -static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { - if (gUseReadBarrier && +static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) { + if (emit_read_barrier && (kUseBakerReadBarrier || type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -7978,11 +8011,11 @@ static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { // Interface case has 3 temps, one for holding the number of interfaces, one for the current // interface pointer, one for loading the current interface. // The other checks have one temp for loading the object's class. -static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { +static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) { if (type_check_kind == TypeCheckKind::kInterfaceCheck) { return 3; } - return 1 + NumberOfInstanceOfTemps(type_check_kind); + return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind); } void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { @@ -7994,7 +8027,7 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: { - bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); + bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction); call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; break; @@ -8024,7 +8057,8 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { // The "out" register is used as a temporary, so it overlaps with the inputs. // Note that TypeCheckSlowPathARM uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); + locations->AddRegisterTemps( + NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind)); } void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { @@ -8037,7 +8071,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) : InputRegisterAt(instruction, 1); Location out_loc = locations->Out(); vixl32::Register out = OutputRegister(instruction); - const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); + const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind); DCHECK_LE(num_temps, 1u); Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -8059,7 +8093,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) switch (type_check_kind) { case TypeCheckKind::kExactCheck: { ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + codegen_->ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, @@ -8094,7 +8128,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) case TypeCheckKind::kAbstractClassCheck: { ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + codegen_->ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, @@ -8122,7 +8156,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) case TypeCheckKind::kClassHierarchyCheck: { ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + codegen_->ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, @@ -8178,7 +8212,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) case TypeCheckKind::kArrayObjectCheck: { ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + codegen_->ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, @@ -8307,7 +8341,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) { TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); + LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); @@ -8318,7 +8352,8 @@ void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) { } else { locations->SetInAt(1, Location::RequiresRegister()); } - locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); + locations->AddRegisterTemps( + NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind)); } void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { @@ -8331,7 +8366,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { : InputRegisterAt(instruction, 1); Location temp_loc = locations->GetTemp(0); vixl32::Register temp = RegisterFrom(temp_loc); - const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); + const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind); DCHECK_LE(num_temps, 3u); Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation(); Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation(); @@ -8344,7 +8379,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { const uint32_t object_array_data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); + bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction); SlowPathCodeARMVIXL* type_check_slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL( instruction, is_type_check_slow_path_fatal); @@ -8490,12 +8525,11 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // /* HeapReference<Class> */ temp = temp->iftable_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - temp_loc, - iftable_offset, - maybe_temp2_loc, - kWithoutReadBarrier); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + iftable_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Iftable is never null. __ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset)); // Loop through the iftable and check if any class matches. @@ -8900,7 +8934,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister( ReadBarrierOption read_barrier_option) { vixl32::Register out_reg = RegisterFrom(out); if (read_barrier_option == kWithReadBarrier) { - CHECK(gUseReadBarrier); + DCHECK(codegen_->EmitReadBarrier()); DCHECK(maybe_temp.IsRegister()) << maybe_temp; if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. @@ -8935,7 +8969,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters( vixl32::Register out_reg = RegisterFrom(out); vixl32::Register obj_reg = RegisterFrom(obj); if (read_barrier_option == kWithReadBarrier) { - CHECK(gUseReadBarrier); + DCHECK(codegen_->EmitReadBarrier()); if (kUseBakerReadBarrier) { DCHECK(maybe_temp.IsRegister()) << maybe_temp; // Load with fast path based Baker's read barrier. @@ -8964,7 +8998,7 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( ReadBarrierOption read_barrier_option) { vixl32::Register root_reg = RegisterFrom(root); if (read_barrier_option == kWithReadBarrier) { - DCHECK(gUseReadBarrier); + DCHECK(EmitReadBarrier()); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. @@ -9025,11 +9059,10 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( MaybeGenerateMarkingRegisterCheck(/* code= */ 20); } -void CodeGeneratorARMVIXL::GenerateIntrinsicCasMoveWithBakerReadBarrier( +void CodeGeneratorARMVIXL::GenerateIntrinsicMoveWithBakerReadBarrier( vixl::aarch32::Register marked_old_value, vixl::aarch32::Register old_value) { - DCHECK(gUseReadBarrier); - DCHECK(kUseBakerReadBarrier); + DCHECK(EmitBakerReadBarrier()); // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR. // For low registers, we can reuse the GC root narrow entrypoint, for high registers @@ -9062,8 +9095,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i vixl32::Register obj, const vixl32::MemOperand& src, bool needs_null_check) { - DCHECK(gUseReadBarrier); - DCHECK(kUseBakerReadBarrier); + DCHECK(EmitBakerReadBarrier()); // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the // Marking Register) to decide whether we need to enter the slow @@ -9155,8 +9187,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref, Location index, Location temp, bool needs_null_check) { - DCHECK(gUseReadBarrier); - DCHECK(kUseBakerReadBarrier); + DCHECK(EmitBakerReadBarrier()); static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), @@ -9221,7 +9252,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref, void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { // The following condition is a compile-time one, so it does not have a run-time cost. - if (kIsDebugBuild && gUseReadBarrier && kUseBakerReadBarrier) { + if (kIsDebugBuild && EmitBakerReadBarrier()) { // The following condition is a run-time one; it is executed after the // previous compile-time test, to avoid penalizing non-debug builds. if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) { @@ -9251,7 +9282,7 @@ void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - DCHECK(gUseReadBarrier); + DCHECK(EmitReadBarrier()); // Insert a slow path based read barrier *after* the reference load. // @@ -9277,7 +9308,7 @@ void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instructio Location obj, uint32_t offset, Location index) { - if (gUseReadBarrier) { + if (EmitReadBarrier()) { // Baker's read barriers shall be handled by the fast path // (CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier). DCHECK(!kUseBakerReadBarrier); @@ -9292,7 +9323,7 @@ void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instructio void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root) { - DCHECK(gUseReadBarrier); + DCHECK(EmitReadBarrier()); // Insert a slow path based read barrier *after* the GC root load. // @@ -9667,7 +9698,7 @@ void CodeGeneratorARMVIXL::LoadBootImageAddress(vixl32::Register reg, void CodeGeneratorARMVIXL::LoadTypeForBootImageIntrinsic(vixl::aarch32::Register reg, TypeReference target_type) { - // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()); PcRelativePatchInfo* labels = NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex()); @@ -9828,7 +9859,7 @@ void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch, assembler.FinalizeCode(); code->resize(assembler.CodeSize()); MemoryRegion code_region(code->data(), code->size()); - assembler.FinalizeInstructions(code_region); + assembler.CopyInstructions(code_region); } VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal( @@ -9867,12 +9898,12 @@ void InstructionCodeGeneratorARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulat } } -void LocationsBuilderARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { +void LocationsBuilderARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; } -void InstructionCodeGeneratorARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; } diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index f5abe6951a..00e0bfa399 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -162,22 +162,37 @@ using VIXLUInt32Literal = vixl::aarch32::Literal<uint32_t>; /* 1.8 */ \ V(MathFmaDouble) \ V(MathFmaFloat) \ - V(UnsafeGetAndAddInt) \ - V(UnsafeGetAndAddLong) \ - V(UnsafeGetAndSetInt) \ - V(UnsafeGetAndSetLong) \ - V(UnsafeGetAndSetObject) \ V(MethodHandleInvokeExact) \ V(MethodHandleInvoke) \ /* OpenJDK 11 */ \ V(JdkUnsafeCASLong) /* High register pressure */ \ - V(JdkUnsafeGetAndAddInt) \ - V(JdkUnsafeGetAndAddLong) \ - V(JdkUnsafeGetAndSetInt) \ - V(JdkUnsafeGetAndSetLong) \ - V(JdkUnsafeGetAndSetObject) \ V(JdkUnsafeCompareAndSetLong) +ALWAYS_INLINE inline StoreOperandType GetStoreOperandType(DataType::Type type) { + switch (type) { + case DataType::Type::kReference: + return kStoreWord; + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + return kStoreByte; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + return kStoreHalfword; + case DataType::Type::kInt32: + return kStoreWord; + case DataType::Type::kInt64: + return kStoreWordPair; + case DataType::Type::kFloat32: + return kStoreSWord; + case DataType::Type::kFloat64: + return kStoreDWord; + default: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + class JumpTableARMVIXL : public DeletableArenaObject<kArenaAllocSwitchTable> { public: explicit JumpTableARMVIXL(HPackedSwitch* switch_instr) @@ -620,7 +635,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { block_labels_.resize(GetGraph()->GetBlocks().size()); } - void Finalize(CodeAllocator* allocator) override; + void Finalize() override; bool NeedsTwoRegisters(DataType::Type type) const override { return type == DataType::Type::kFloat64 || type == DataType::Type::kInt64; @@ -725,9 +740,9 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Register obj, uint32_t offset, ReadBarrierOption read_barrier_option); - // Generate MOV for an intrinsic CAS to mark the old value with Baker read barrier. - void GenerateIntrinsicCasMoveWithBakerReadBarrier(vixl::aarch32::Register marked_old_value, - vixl::aarch32::Register old_value); + // Generate MOV for an intrinsic to mark the old value with Baker read barrier. + void GenerateIntrinsicMoveWithBakerReadBarrier(vixl::aarch32::Register marked_old_value, + vixl::aarch32::Register old_value); // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. // Overload suitable for Unsafe.getObject/-Volatile() intrinsic. diff --git a/compiler/optimizing/code_generator_riscv64.cc b/compiler/optimizing/code_generator_riscv64.cc new file mode 100644 index 0000000000..182c1d4d05 --- /dev/null +++ b/compiler/optimizing/code_generator_riscv64.cc @@ -0,0 +1,6883 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_riscv64.h" + +#include "android-base/logging.h" +#include "android-base/macros.h" +#include "arch/riscv64/jni_frame_riscv64.h" +#include "arch/riscv64/registers_riscv64.h" +#include "base/arena_containers.h" +#include "base/macros.h" +#include "class_root-inl.h" +#include "code_generator_utils.h" +#include "dwarf/register.h" +#include "gc/heap.h" +#include "gc/space/image_space.h" +#include "heap_poisoning.h" +#include "intrinsics_list.h" +#include "intrinsics_riscv64.h" +#include "jit/profiling_info.h" +#include "linker/linker_patch.h" +#include "mirror/class-inl.h" +#include "optimizing/nodes.h" +#include "optimizing/profiling_info_builder.h" +#include "runtime.h" +#include "scoped_thread_state_change-inl.h" +#include "stack_map_stream.h" +#include "trace.h" +#include "utils/label.h" +#include "utils/riscv64/assembler_riscv64.h" +#include "utils/stack_checks.h" + +namespace art HIDDEN { +namespace riscv64 { + +// Placeholder values embedded in instructions, patched at link time. +constexpr uint32_t kLinkTimeOffsetPlaceholderHigh = 0x12345; +constexpr uint32_t kLinkTimeOffsetPlaceholderLow = 0x678; + +// Compare-and-jump packed switch generates approx. 3 + 1.5 * N 32-bit +// instructions for N cases. +// Table-based packed switch generates approx. 10 32-bit instructions +// and N 32-bit data words for N cases. +// We switch to the table-based method starting with 6 entries. +static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 6; + +static constexpr XRegister kCoreCalleeSaves[] = { + // S1(TR) is excluded as the ART thread register. + S0, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, RA +}; + +static constexpr FRegister kFpuCalleeSaves[] = { + FS0, FS1, FS2, FS3, FS4, FS5, FS6, FS7, FS8, FS9, FS10, FS11 +}; + +#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kRiscv64PointerSize, x).Int32Value() + +Location RegisterOrZeroBitPatternLocation(HInstruction* instruction) { + DCHECK(!DataType::IsFloatingPointType(instruction->GetType())); + return IsZeroBitPattern(instruction) + ? Location::ConstantLocation(instruction) + : Location::RequiresRegister(); +} + +Location FpuRegisterOrZeroBitPatternLocation(HInstruction* instruction) { + DCHECK(DataType::IsFloatingPointType(instruction->GetType())); + return IsZeroBitPattern(instruction) + ? Location::ConstantLocation(instruction) + : Location::RequiresFpuRegister(); +} + +XRegister InputXRegisterOrZero(Location location) { + if (location.IsConstant()) { + DCHECK(location.GetConstant()->IsZeroBitPattern()); + return Zero; + } else { + return location.AsRegister<XRegister>(); + } +} + +Location ValueLocationForStore(HInstruction* value) { + if (IsZeroBitPattern(value)) { + return Location::ConstantLocation(value); + } else if (DataType::IsFloatingPointType(value->GetType())) { + return Location::RequiresFpuRegister(); + } else { + return Location::RequiresRegister(); + } +} + +Location Riscv64ReturnLocation(DataType::Type return_type) { + switch (return_type) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kUint32: + case DataType::Type::kInt32: + case DataType::Type::kReference: + case DataType::Type::kUint64: + case DataType::Type::kInt64: + return Location::RegisterLocation(A0); + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + return Location::FpuRegisterLocation(FA0); + + case DataType::Type::kVoid: + return Location::NoLocation(); + } + UNREACHABLE(); +} + +static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + DCHECK_EQ( + calling_convention.GetRegisterAt(0), + calling_convention.GetReturnLocation(DataType::Type::kReference).AsRegister<XRegister>()); + return caller_saves; +} + +template <ClassStatus kStatus> +static constexpr int64_t ShiftedSignExtendedClassStatusValue() { + // This is used only for status values that have the highest bit set. + static_assert(CLZ(enum_cast<uint32_t>(kStatus)) == status_lsb_position); + constexpr uint32_t kShiftedStatusValue = enum_cast<uint32_t>(kStatus) << status_lsb_position; + static_assert(kShiftedStatusValue >= 0x80000000u); + return static_cast<int64_t>(kShiftedStatusValue) - (INT64_C(1) << 32); +} + +// Split a 64-bit address used by JIT to the nearest 4KiB-aligned base address and a 12-bit +// signed offset. It is usually cheaper to materialize the aligned address than the full address. +std::pair<uint64_t, int32_t> SplitJitAddress(uint64_t address) { + uint64_t bits0_11 = address & UINT64_C(0xfff); + uint64_t bit11 = address & UINT64_C(0x800); + // Round the address to nearest 4KiB address because the `imm12` has range [-0x800, 0x800). + uint64_t base_address = (address & ~UINT64_C(0xfff)) + (bit11 << 1); + int32_t imm12 = dchecked_integral_cast<int32_t>(bits0_11) - + dchecked_integral_cast<int32_t>(bit11 << 1); + return {base_address, imm12}; +} + +int32_t ReadBarrierMarkEntrypointOffset(Location ref) { + DCHECK(ref.IsRegister()); + int reg = ref.reg(); + DCHECK(T0 <= reg && reg <= T6 && reg != TR) << reg; + // Note: Entrypoints for registers X30 (T5) and X31 (T6) are stored in entries + // for X0 (Zero) and X1 (RA) because these are not valid registers for marking + // and we currently have slots only up to register 29. + int entry_point_number = (reg >= 30) ? reg - 30 : reg; + return Thread::ReadBarrierMarkEntryPointsOffset<kRiscv64PointerSize>(entry_point_number); +} + +Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) { + return Riscv64ReturnLocation(return_type); +} + +Location InvokeDexCallingConventionVisitorRISCV64::GetReturnLocation(DataType::Type type) const { + return Riscv64ReturnLocation(type); +} + +Location InvokeDexCallingConventionVisitorRISCV64::GetMethodLocation() const { + return Location::RegisterLocation(kArtMethodRegister); +} + +Location InvokeDexCallingConventionVisitorRISCV64::GetNextLocation(DataType::Type type) { + Location next_location; + if (type == DataType::Type::kVoid) { + LOG(FATAL) << "Unexpected parameter type " << type; + } + + // Note: Unlike the RISC-V C/C++ calling convention, managed ABI does not use + // GPRs to pass FP args when we run out of FPRs. + if (DataType::IsFloatingPointType(type) && + float_index_ < calling_convention.GetNumberOfFpuRegisters()) { + next_location = + Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(float_index_++)); + } else if (!DataType::IsFloatingPointType(type) && + (gp_index_ < calling_convention.GetNumberOfRegisters())) { + next_location = Location::RegisterLocation(calling_convention.GetRegisterAt(gp_index_++)); + } else { + size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_); + next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) : + Location::StackSlot(stack_offset); + } + + // Space on the stack is reserved for all arguments. + stack_index_ += DataType::Is64BitType(type) ? 2 : 1; + + return next_location; +} + +Location CriticalNativeCallingConventionVisitorRiscv64::GetNextLocation(DataType::Type type) { + DCHECK_NE(type, DataType::Type::kReference); + + Location location = Location::NoLocation(); + if (DataType::IsFloatingPointType(type)) { + if (fpr_index_ < kParameterFpuRegistersLength) { + location = Location::FpuRegisterLocation(kParameterFpuRegisters[fpr_index_]); + ++fpr_index_; + } else { + // Native ABI allows passing excessive FP args in GPRs. This is facilitated by + // inserting fake conversion intrinsic calls (`Double.doubleToRawLongBits()` + // or `Float.floatToRawIntBits()`) by `CriticalNativeAbiFixupRiscv64`. + // Remaining FP args shall be passed on the stack. + CHECK_EQ(gpr_index_, kRuntimeParameterCoreRegistersLength); + } + } else { + // Native ABI uses the same core registers as a runtime call. + if (gpr_index_ < kRuntimeParameterCoreRegistersLength) { + location = Location::RegisterLocation(kRuntimeParameterCoreRegisters[gpr_index_]); + ++gpr_index_; + } + } + if (location.IsInvalid()) { + // Only a `float` gets a single slot. Integral args need to be sign-extended to 64 bits. + if (type == DataType::Type::kFloat32) { + location = Location::StackSlot(stack_offset_); + } else { + location = Location::DoubleStackSlot(stack_offset_); + } + stack_offset_ += kFramePointerSize; + + if (for_register_allocation_) { + location = Location::Any(); + } + } + return location; +} + +Location CriticalNativeCallingConventionVisitorRiscv64::GetReturnLocation( + DataType::Type type) const { + // The result is returned the same way in native ABI and managed ABI. No result conversion is + // needed, see comments in `Riscv64JniCallingConvention::RequiresSmallResultTypeExtension()`. + InvokeDexCallingConventionVisitorRISCV64 dex_calling_convention; + return dex_calling_convention.GetReturnLocation(type); +} + +Location CriticalNativeCallingConventionVisitorRiscv64::GetMethodLocation() const { + // Pass the method in the hidden argument T0. + return Location::RegisterLocation(T0); +} + +#define __ down_cast<CodeGeneratorRISCV64*>(codegen)->GetAssembler()-> // NOLINT + +void LocationsBuilderRISCV64::HandleInvoke(HInvoke* instruction) { + InvokeDexCallingConventionVisitorRISCV64 calling_convention_visitor; + CodeGenerator::CreateCommonInvokeLocationSummary(instruction, &calling_convention_visitor); +} + +class CompileOptimizedSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + CompileOptimizedSlowPathRISCV64(XRegister base, int32_t imm12) + : SlowPathCodeRISCV64(/*instruction=*/ nullptr), + base_(base), + imm12_(imm12) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + uint32_t entrypoint_offset = + GetThreadOffset<kRiscv64PointerSize>(kQuickCompileOptimized).Int32Value(); + __ Bind(GetEntryLabel()); + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + riscv64::ScratchRegisterScope srs(riscv64_codegen->GetAssembler()); + XRegister counter = srs.AllocateXRegister(); + __ LoadConst32(counter, ProfilingInfo::GetOptimizeThreshold()); + __ Sh(counter, base_, imm12_); + __ Loadd(RA, TR, entrypoint_offset); + // Note: we don't record the call here (and therefore don't generate a stack + // map), as the entrypoint should never be suspended. + __ Jalr(RA); + __ J(GetExitLabel()); + } + + const char* GetDescription() const override { return "CompileOptimizedSlowPath"; } + + private: + XRegister base_; + const int32_t imm12_; + + DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathRISCV64); +}; + +class SuspendCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + SuspendCheckSlowPathRISCV64(HSuspendCheck* instruction, HBasicBlock* successor) + : SlowPathCodeRISCV64(instruction), successor_(successor) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); // Only saves live vector registers for SIMD. + riscv64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); + RestoreLiveRegisters(codegen, locations); // Only restores live vector registers for SIMD. + if (successor_ == nullptr) { + __ J(GetReturnLabel()); + } else { + __ J(riscv64_codegen->GetLabelOf(successor_)); + } + } + + Riscv64Label* GetReturnLabel() { + DCHECK(successor_ == nullptr); + return &return_label_; + } + + const char* GetDescription() const override { return "SuspendCheckSlowPathRISCV64"; } + + HBasicBlock* GetSuccessor() const { return successor_; } + + private: + // If not null, the block to branch to after the suspend check. + HBasicBlock* const successor_; + + // If `successor_` is null, the label to branch to after the suspend check. + Riscv64Label return_label_; + + DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathRISCV64); +}; + +class NullCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + explicit NullCheckSlowPathRISCV64(HNullCheck* instr) : SlowPathCodeRISCV64(instr) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + __ Bind(GetEntryLabel()); + if (instruction_->CanThrowIntoCatchBlock()) { + // Live registers will be restored in the catch block if caught. + SaveLiveRegisters(codegen, instruction_->GetLocations()); + } + riscv64_codegen->InvokeRuntime( + kQuickThrowNullPointer, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); + } + + bool IsFatal() const override { return true; } + + const char* GetDescription() const override { return "NullCheckSlowPathRISCV64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathRISCV64); +}; + +class BoundsCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + explicit BoundsCheckSlowPathRISCV64(HBoundsCheck* instruction) + : SlowPathCodeRISCV64(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + __ Bind(GetEntryLabel()); + if (instruction_->CanThrowIntoCatchBlock()) { + // Live registers will be restored in the catch block if caught. + SaveLiveRegisters(codegen, instruction_->GetLocations()); + } + // We're moving two locations to locations that could overlap, so we need a parallel + // move resolver. + InvokeRuntimeCallingConvention calling_convention; + codegen->EmitParallelMoves(locations->InAt(0), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + DataType::Type::kInt32, + locations->InAt(1), + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + DataType::Type::kInt32); + QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() ? + kQuickThrowStringBounds : + kQuickThrowArrayBounds; + riscv64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); + } + + bool IsFatal() const override { return true; } + + const char* GetDescription() const override { return "BoundsCheckSlowPathRISCV64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathRISCV64); +}; + +class LoadClassSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + LoadClassSlowPathRISCV64(HLoadClass* cls, HInstruction* at) : SlowPathCodeRISCV64(at), cls_(cls) { + DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); + } + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + Location out = locations->Out(); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); + + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), riscv64_codegen->GetGraph()->GetDexFile()) || + riscv64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) || + ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(), + &cls_->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); + if (cls_->NeedsAccessCheck()) { + CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>(); + riscv64_codegen->InvokeRuntime( + kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this); + } else { + CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); + riscv64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); + } + // If we also must_do_clinit, the resolved type is now in the correct register. + } else { + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + riscv64_codegen->MoveLocation( + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source, cls_->GetType()); + } + if (must_do_clinit) { + riscv64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); + } + + // Move the class to the desired location. + if (out.IsValid()) { + DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); + DataType::Type type = DataType::Type::kReference; + DCHECK_EQ(type, instruction_->GetType()); + riscv64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type); + } + RestoreLiveRegisters(codegen, locations); + + __ J(GetExitLabel()); + } + + const char* GetDescription() const override { return "LoadClassSlowPathRISCV64"; } + + private: + // The class this slow path will load. + HLoadClass* const cls_; + + DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathRISCV64); +}; + +class DeoptimizationSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + explicit DeoptimizationSlowPathRISCV64(HDeoptimize* instruction) + : SlowPathCodeRISCV64(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + __ Bind(GetEntryLabel()); + LocationSummary* locations = instruction_->GetLocations(); + SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConvention calling_convention; + __ LoadConst32(calling_convention.GetRegisterAt(0), + static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); + riscv64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); + } + + const char* GetDescription() const override { return "DeoptimizationSlowPathRISCV64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathRISCV64); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + ReadBarrierForRootSlowPathRISCV64(HInstruction* instruction, Location out, Location root) + : SlowPathCodeRISCV64(instruction), out_(out), root_(root) { + } + + void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(codegen->EmitReadBarrier()); + LocationSummary* locations = instruction_->GetLocations(); + DataType::Type type = DataType::Type::kReference; + XRegister reg_out = out_.AsRegister<XRegister>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsLoadClass() || + instruction_->IsLoadString() || + (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier for GC root slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + riscv64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + root_, + DataType::Type::kReference); + riscv64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + riscv64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + __ J(GetExitLabel()); + } + + const char* GetDescription() const override { return "ReadBarrierForRootSlowPathRISCV64"; } + + private: + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathRISCV64); +}; + +class MethodEntryExitHooksSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + explicit MethodEntryExitHooksSlowPathRISCV64(HInstruction* instruction) + : SlowPathCodeRISCV64(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + QuickEntrypointEnum entry_point = + (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook; + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + if (instruction_->IsMethodExitHook()) { + __ Li(A4, riscv64_codegen->GetFrameSize()); + } + riscv64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this); + RestoreLiveRegisters(codegen, locations); + __ J(GetExitLabel()); + } + + const char* GetDescription() const override { + return "MethodEntryExitHooksSlowPathRISCV"; + } + + private: + DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathRISCV64); +}; + +class ArraySetSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + explicit ArraySetSlowPathRISCV64(HInstruction* instruction) : SlowPathCodeRISCV64(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); + parallel_move.AddMove( + locations->InAt(0), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + DataType::Type::kReference, + nullptr); + parallel_move.AddMove( + locations->InAt(1), + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + DataType::Type::kInt32, + nullptr); + parallel_move.AddMove( + locations->InAt(2), + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + DataType::Type::kReference, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + riscv64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); + RestoreLiveRegisters(codegen, locations); + __ J(GetExitLabel()); + } + + const char* GetDescription() const override { return "ArraySetSlowPathRISCV64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathRISCV64); +}; + +class TypeCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + explicit TypeCheckSlowPathRISCV64(HInstruction* instruction, bool is_fatal) + : SlowPathCodeRISCV64(instruction), is_fatal_(is_fatal) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + + uint32_t dex_pc = instruction_->GetDexPc(); + DCHECK(instruction_->IsCheckCast() + || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + + __ Bind(GetEntryLabel()); + if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { + SaveLiveRegisters(codegen, locations); + } + + // We're moving two locations to locations that could overlap, so we need a parallel + // move resolver. + InvokeRuntimeCallingConvention calling_convention; + codegen->EmitParallelMoves(locations->InAt(0), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + DataType::Type::kReference, + locations->InAt(1), + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + DataType::Type::kReference); + if (instruction_->IsInstanceOf()) { + riscv64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); + DataType::Type ret_type = instruction_->GetType(); + Location ret_loc = calling_convention.GetReturnLocation(ret_type); + riscv64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); + } else { + DCHECK(instruction_->IsCheckCast()); + riscv64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); + } + + if (!is_fatal_) { + RestoreLiveRegisters(codegen, locations); + __ J(GetExitLabel()); + } + } + + const char* GetDescription() const override { return "TypeCheckSlowPathRISCV64"; } + + bool IsFatal() const override { return is_fatal_; } + + private: + const bool is_fatal_; + + DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathRISCV64); +}; + +class DivZeroCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + explicit DivZeroCheckSlowPathRISCV64(HDivZeroCheck* instruction) + : SlowPathCodeRISCV64(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + __ Bind(GetEntryLabel()); + riscv64_codegen->InvokeRuntime( + kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); + } + + bool IsFatal() const override { return true; } + + const char* GetDescription() const override { return "DivZeroCheckSlowPathRISCV64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathRISCV64); +}; + +class ReadBarrierMarkSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + ReadBarrierMarkSlowPathRISCV64(HInstruction* instruction, Location ref, Location entrypoint) + : SlowPathCodeRISCV64(instruction), ref_(ref), entrypoint_(entrypoint) { + DCHECK(entrypoint.IsRegister()); + } + + const char* GetDescription() const override { return "ReadBarrierMarkSlowPathRISCV64"; } + + void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(codegen->EmitReadBarrier()); + LocationSummary* locations = instruction_->GetLocations(); + XRegister ref_reg = ref_.AsRegister<XRegister>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsArraySet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + DCHECK(ref_reg >= T0 && ref_reg != TR); + + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in A0 and V0 respectively): + // + // A0 <- ref + // V0 <- ReadBarrierMark(A0) + // ref <- V0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + riscv64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); + DCHECK_NE(entrypoint_.AsRegister<XRegister>(), TMP); // A taken branch can clobber `TMP`. + __ Jalr(entrypoint_.AsRegister<XRegister>()); // Clobbers `RA` (used as the `entrypoint_`). + __ J(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + + // The location of the already loaded entrypoint. + const Location entrypoint_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathRISCV64); +}; + +class LoadStringSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + explicit LoadStringSlowPathRISCV64(HLoadString* instruction) + : SlowPathCodeRISCV64(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(instruction_->IsLoadString()); + DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry); + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + InvokeRuntimeCallingConvention calling_convention; + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + __ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_); + riscv64_codegen->InvokeRuntime( + kQuickResolveString, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + + DataType::Type type = DataType::Type::kReference; + DCHECK_EQ(type, instruction_->GetType()); + riscv64_codegen->MoveLocation( + locations->Out(), calling_convention.GetReturnLocation(type), type); + RestoreLiveRegisters(codegen, locations); + + __ J(GetExitLabel()); + } + + const char* GetDescription() const override { return "LoadStringSlowPathRISCV64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathRISCV64); +}; + +#undef __ +#define __ down_cast<Riscv64Assembler*>(GetAssembler())-> // NOLINT + +template <typename Reg, + void (Riscv64Assembler::*opS)(Reg, FRegister, FRegister), + void (Riscv64Assembler::*opD)(Reg, FRegister, FRegister)> +inline void InstructionCodeGeneratorRISCV64::FpBinOp( + Reg rd, FRegister rs1, FRegister rs2, DataType::Type type) { + Riscv64Assembler* assembler = down_cast<CodeGeneratorRISCV64*>(codegen_)->GetAssembler(); + if (type == DataType::Type::kFloat32) { + (assembler->*opS)(rd, rs1, rs2); + } else { + DCHECK_EQ(type, DataType::Type::kFloat64); + (assembler->*opD)(rd, rs1, rs2); + } +} + +void InstructionCodeGeneratorRISCV64::FAdd( + FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<FRegister, &Riscv64Assembler::FAddS, &Riscv64Assembler::FAddD>(rd, rs1, rs2, type); +} + +inline void InstructionCodeGeneratorRISCV64::FSub( + FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<FRegister, &Riscv64Assembler::FSubS, &Riscv64Assembler::FSubD>(rd, rs1, rs2, type); +} + +inline void InstructionCodeGeneratorRISCV64::FDiv( + FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<FRegister, &Riscv64Assembler::FDivS, &Riscv64Assembler::FDivD>(rd, rs1, rs2, type); +} + +inline void InstructionCodeGeneratorRISCV64::FMul( + FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<FRegister, &Riscv64Assembler::FMulS, &Riscv64Assembler::FMulD>(rd, rs1, rs2, type); +} + +inline void InstructionCodeGeneratorRISCV64::FMin( + FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<FRegister, &Riscv64Assembler::FMinS, &Riscv64Assembler::FMinD>(rd, rs1, rs2, type); +} + +inline void InstructionCodeGeneratorRISCV64::FMax( + FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<FRegister, &Riscv64Assembler::FMaxS, &Riscv64Assembler::FMaxD>(rd, rs1, rs2, type); +} + +inline void InstructionCodeGeneratorRISCV64::FEq( + XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<XRegister, &Riscv64Assembler::FEqS, &Riscv64Assembler::FEqD>(rd, rs1, rs2, type); +} + +inline void InstructionCodeGeneratorRISCV64::FLt( + XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<XRegister, &Riscv64Assembler::FLtS, &Riscv64Assembler::FLtD>(rd, rs1, rs2, type); +} + +inline void InstructionCodeGeneratorRISCV64::FLe( + XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<XRegister, &Riscv64Assembler::FLeS, &Riscv64Assembler::FLeD>(rd, rs1, rs2, type); +} + +template <typename Reg, + void (Riscv64Assembler::*opS)(Reg, FRegister), + void (Riscv64Assembler::*opD)(Reg, FRegister)> +inline void InstructionCodeGeneratorRISCV64::FpUnOp( + Reg rd, FRegister rs1, DataType::Type type) { + Riscv64Assembler* assembler = down_cast<CodeGeneratorRISCV64*>(codegen_)->GetAssembler(); + if (type == DataType::Type::kFloat32) { + (assembler->*opS)(rd, rs1); + } else { + DCHECK_EQ(type, DataType::Type::kFloat64); + (assembler->*opD)(rd, rs1); + } +} + +inline void InstructionCodeGeneratorRISCV64::FAbs( + FRegister rd, FRegister rs1, DataType::Type type) { + FpUnOp<FRegister, &Riscv64Assembler::FAbsS, &Riscv64Assembler::FAbsD>(rd, rs1, type); +} + +inline void InstructionCodeGeneratorRISCV64::FNeg( + FRegister rd, FRegister rs1, DataType::Type type) { + FpUnOp<FRegister, &Riscv64Assembler::FNegS, &Riscv64Assembler::FNegD>(rd, rs1, type); +} + +inline void InstructionCodeGeneratorRISCV64::FMv( + FRegister rd, FRegister rs1, DataType::Type type) { + FpUnOp<FRegister, &Riscv64Assembler::FMvS, &Riscv64Assembler::FMvD>(rd, rs1, type); +} + +inline void InstructionCodeGeneratorRISCV64::FMvX( + XRegister rd, FRegister rs1, DataType::Type type) { + FpUnOp<XRegister, &Riscv64Assembler::FMvXW, &Riscv64Assembler::FMvXD>(rd, rs1, type); +} + +void InstructionCodeGeneratorRISCV64::FClass( + XRegister rd, FRegister rs1, DataType::Type type) { + FpUnOp<XRegister, &Riscv64Assembler::FClassS, &Riscv64Assembler::FClassD>(rd, rs1, type); +} + +void InstructionCodeGeneratorRISCV64::Load( + Location out, XRegister rs1, int32_t offset, DataType::Type type) { + switch (type) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + __ Loadbu(out.AsRegister<XRegister>(), rs1, offset); + break; + case DataType::Type::kInt8: + __ Loadb(out.AsRegister<XRegister>(), rs1, offset); + break; + case DataType::Type::kUint16: + __ Loadhu(out.AsRegister<XRegister>(), rs1, offset); + break; + case DataType::Type::kInt16: + __ Loadh(out.AsRegister<XRegister>(), rs1, offset); + break; + case DataType::Type::kInt32: + __ Loadw(out.AsRegister<XRegister>(), rs1, offset); + break; + case DataType::Type::kInt64: + __ Loadd(out.AsRegister<XRegister>(), rs1, offset); + break; + case DataType::Type::kReference: + __ Loadwu(out.AsRegister<XRegister>(), rs1, offset); + break; + case DataType::Type::kFloat32: + __ FLoadw(out.AsFpuRegister<FRegister>(), rs1, offset); + break; + case DataType::Type::kFloat64: + __ FLoadd(out.AsFpuRegister<FRegister>(), rs1, offset); + break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorRISCV64::Store( + Location value, XRegister rs1, int32_t offset, DataType::Type type) { + DCHECK_IMPLIES(value.IsConstant(), IsZeroBitPattern(value.GetConstant())); + if (kPoisonHeapReferences && type == DataType::Type::kReference && !value.IsConstant()) { + riscv64::ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + __ Mv(tmp, value.AsRegister<XRegister>()); + codegen_->PoisonHeapReference(tmp); + __ Storew(tmp, rs1, offset); + return; + } + switch (type) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + __ Storeb(InputXRegisterOrZero(value), rs1, offset); + break; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + __ Storeh(InputXRegisterOrZero(value), rs1, offset); + break; + case DataType::Type::kFloat32: + if (!value.IsConstant()) { + __ FStorew(value.AsFpuRegister<FRegister>(), rs1, offset); + break; + } + FALLTHROUGH_INTENDED; + case DataType::Type::kInt32: + case DataType::Type::kReference: + __ Storew(InputXRegisterOrZero(value), rs1, offset); + break; + case DataType::Type::kFloat64: + if (!value.IsConstant()) { + __ FStored(value.AsFpuRegister<FRegister>(), rs1, offset); + break; + } + FALLTHROUGH_INTENDED; + case DataType::Type::kInt64: + __ Stored(InputXRegisterOrZero(value), rs1, offset); + break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorRISCV64::StoreSeqCst(Location value, + XRegister rs1, + int32_t offset, + DataType::Type type, + HInstruction* instruction) { + if (DataType::Size(type) >= 4u) { + // Use AMOSWAP for 32-bit and 64-bit data types. + ScratchRegisterScope srs(GetAssembler()); + XRegister swap_src = kNoXRegister; + if (kPoisonHeapReferences && type == DataType::Type::kReference && !value.IsConstant()) { + swap_src = srs.AllocateXRegister(); + __ Mv(swap_src, value.AsRegister<XRegister>()); + codegen_->PoisonHeapReference(swap_src); + } else if (DataType::IsFloatingPointType(type) && !value.IsConstant()) { + swap_src = srs.AllocateXRegister(); + FMvX(swap_src, value.AsFpuRegister<FRegister>(), type); + } else { + swap_src = InputXRegisterOrZero(value); + } + XRegister addr = rs1; + if (offset != 0) { + addr = srs.AllocateXRegister(); + __ AddConst64(addr, rs1, offset); + } + if (DataType::Is64BitType(type)) { + __ AmoSwapD(Zero, swap_src, addr, AqRl::kRelease); + } else { + __ AmoSwapW(Zero, swap_src, addr, AqRl::kRelease); + } + if (instruction != nullptr) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + } else { + // Use fences for smaller data types. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + Store(value, rs1, offset, type); + if (instruction != nullptr) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } +} + +void InstructionCodeGeneratorRISCV64::ShNAdd( + XRegister rd, XRegister rs1, XRegister rs2, DataType::Type type) { + switch (type) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + DCHECK_EQ(DataType::SizeShift(type), 0u); + __ Add(rd, rs1, rs2); + break; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + DCHECK_EQ(DataType::SizeShift(type), 1u); + __ Sh1Add(rd, rs1, rs2); + break; + case DataType::Type::kInt32: + case DataType::Type::kReference: + case DataType::Type::kFloat32: + DCHECK_EQ(DataType::SizeShift(type), 2u); + __ Sh2Add(rd, rs1, rs2); + break; + case DataType::Type::kInt64: + case DataType::Type::kFloat64: + DCHECK_EQ(DataType::SizeShift(type), 3u); + __ Sh3Add(rd, rs1, rs2); + break; + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + +Riscv64Assembler* ParallelMoveResolverRISCV64::GetAssembler() const { + return codegen_->GetAssembler(); +} + +void ParallelMoveResolverRISCV64::EmitMove(size_t index) { + MoveOperands* move = moves_[index]; + codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType()); +} + +void ParallelMoveResolverRISCV64::EmitSwap(size_t index) { + MoveOperands* move = moves_[index]; + codegen_->SwapLocations(move->GetDestination(), move->GetSource(), move->GetType()); +} + +void ParallelMoveResolverRISCV64::SpillScratch([[maybe_unused]] int reg) { + LOG(FATAL) << "Unimplemented"; + UNREACHABLE(); +} + +void ParallelMoveResolverRISCV64::RestoreScratch([[maybe_unused]] int reg) { + LOG(FATAL) << "Unimplemented"; + UNREACHABLE(); +} + +void ParallelMoveResolverRISCV64::Exchange(int index1, int index2, bool double_slot) { + // We have 2 scratch X registers and 1 scratch F register that we can use. We prefer + // to use X registers for the swap but if both offsets are too big, we need to reserve + // one of the X registers for address adjustment and use an F register. + bool use_fp_tmp2 = false; + if (!IsInt<12>(index2)) { + if (!IsInt<12>(index1)) { + use_fp_tmp2 = true; + } else { + std::swap(index1, index2); + } + } + DCHECK_IMPLIES(!IsInt<12>(index2), use_fp_tmp2); + + Location loc1(double_slot ? Location::DoubleStackSlot(index1) : Location::StackSlot(index1)); + Location loc2(double_slot ? Location::DoubleStackSlot(index2) : Location::StackSlot(index2)); + riscv64::ScratchRegisterScope srs(GetAssembler()); + Location tmp = Location::RegisterLocation(srs.AllocateXRegister()); + DataType::Type tmp_type = double_slot ? DataType::Type::kInt64 : DataType::Type::kInt32; + Location tmp2 = use_fp_tmp2 + ? Location::FpuRegisterLocation(srs.AllocateFRegister()) + : Location::RegisterLocation(srs.AllocateXRegister()); + DataType::Type tmp2_type = use_fp_tmp2 + ? (double_slot ? DataType::Type::kFloat64 : DataType::Type::kFloat32) + : tmp_type; + + codegen_->MoveLocation(tmp, loc1, tmp_type); + codegen_->MoveLocation(tmp2, loc2, tmp2_type); + if (use_fp_tmp2) { + codegen_->MoveLocation(loc2, tmp, tmp_type); + } else { + // We cannot use `Stored()` or `Storew()` via `MoveLocation()` because we have + // no more scratch registers available. Use `Sd()` or `Sw()` explicitly. + DCHECK(IsInt<12>(index2)); + if (double_slot) { + __ Sd(tmp.AsRegister<XRegister>(), SP, index2); + } else { + __ Sw(tmp.AsRegister<XRegister>(), SP, index2); + } + srs.FreeXRegister(tmp.AsRegister<XRegister>()); // Free a temporary for `MoveLocation()`. + } + codegen_->MoveLocation(loc1, tmp2, tmp2_type); +} + +InstructionCodeGeneratorRISCV64::InstructionCodeGeneratorRISCV64(HGraph* graph, + CodeGeneratorRISCV64* codegen) + : InstructionCodeGenerator(graph, codegen), + assembler_(codegen->GetAssembler()), + codegen_(codegen) {} + +void InstructionCodeGeneratorRISCV64::GenerateClassInitializationCheck( + SlowPathCodeRISCV64* slow_path, XRegister class_reg) { + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + XRegister tmp2 = srs.AllocateXRegister(); + + // We shall load the full 32-bit status word with sign-extension and compare as unsigned + // to a sign-extended shifted status value. This yields the same comparison as loading and + // materializing unsigned but the constant is materialized with a single LUI instruction. + __ Loadw(tmp, class_reg, mirror::Class::StatusOffset().SizeValue()); // Sign-extended. + __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kVisiblyInitialized>()); + __ Bltu(tmp, tmp2, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void InstructionCodeGeneratorRISCV64::GenerateBitstringTypeCheckCompare( + HTypeCheckInstruction* instruction, XRegister temp) { + UNUSED(instruction); + UNUSED(temp); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::GenerateSuspendCheck(HSuspendCheck* instruction, + HBasicBlock* successor) { + if (instruction->IsNoOp()) { + if (successor != nullptr) { + __ J(codegen_->GetLabelOf(successor)); + } + return; + } + + if (codegen_->CanUseImplicitSuspendCheck()) { + LOG(FATAL) << "Unimplemented ImplicitSuspendCheck"; + return; + } + + SuspendCheckSlowPathRISCV64* slow_path = + down_cast<SuspendCheckSlowPathRISCV64*>(instruction->GetSlowPath()); + + if (slow_path == nullptr) { + slow_path = + new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathRISCV64(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + __ Loadw(tmp, TR, Thread::ThreadFlagsOffset<kRiscv64PointerSize>().Int32Value()); + static_assert(Thread::SuspendOrCheckpointRequestFlags() != std::numeric_limits<uint32_t>::max()); + static_assert(IsPowerOfTwo(Thread::SuspendOrCheckpointRequestFlags() + 1u)); + // Shift out other bits. Use an instruction that can be 16-bit with the "C" Standard Extension. + __ Slli(tmp, tmp, CLZ(static_cast<uint64_t>(Thread::SuspendOrCheckpointRequestFlags()))); + if (successor == nullptr) { + __ Bnez(tmp, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetReturnLabel()); + } else { + __ Beqz(tmp, codegen_->GetLabelOf(successor)); + __ J(slow_path->GetEntryLabel()); + // slow_path will return to GetLabelOf(successor). + } +} + +void InstructionCodeGeneratorRISCV64::GenerateReferenceLoadOneRegister( + HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option) { + XRegister out_reg = out.AsRegister<XRegister>(); + if (read_barrier_option == kWithReadBarrier) { + DCHECK(codegen_->EmitReadBarrier()); + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + out_reg, + offset, + maybe_temp, + /* needs_null_check= */ false); + } else { + // Load with slow path based read barrier. + // Save the value of `out` into `maybe_temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + __ Mv(maybe_temp.AsRegister<XRegister>(), out_reg); + // /* HeapReference<Object> */ out = *(out + offset) + __ Loadwu(out_reg, out_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + __ Loadwu(out_reg, out_reg, offset); + codegen_->MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorRISCV64::GenerateReferenceLoadTwoRegisters( + HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option) { + XRegister out_reg = out.AsRegister<XRegister>(); + XRegister obj_reg = obj.AsRegister<XRegister>(); + if (read_barrier_option == kWithReadBarrier) { + DCHECK(codegen_->EmitReadBarrier()); + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + obj_reg, + offset, + maybe_temp, + /* needs_null_check= */ false); + } else { + // Load with slow path based read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ Loadwu(out_reg, obj_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ Loadwu(out_reg, obj_reg, offset); + codegen_->MaybeUnpoisonHeapReference(out_reg); + } +} + +SlowPathCodeRISCV64* CodeGeneratorRISCV64::AddGcRootBakerBarrierBarrierSlowPath( + HInstruction* instruction, Location root, Location temp) { + SlowPathCodeRISCV64* slow_path = + new (GetScopedAllocator()) ReadBarrierMarkSlowPathRISCV64(instruction, root, temp); + AddSlowPath(slow_path); + return slow_path; +} + +void CodeGeneratorRISCV64::EmitBakerReadBarierMarkingCheck( + SlowPathCodeRISCV64* slow_path, Location root, Location temp) { + const int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(root); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ Loadd(temp.AsRegister<XRegister>(), TR, entry_point_offset); + __ Bnez(temp.AsRegister<XRegister>(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorRISCV64::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + XRegister obj, + uint32_t offset, + ReadBarrierOption read_barrier_option, + Riscv64Label* label_low) { + DCHECK_IMPLIES(label_low != nullptr, offset == kLinkTimeOffsetPlaceholderLow) << offset; + XRegister root_reg = root.AsRegister<XRegister>(); + if (read_barrier_option == kWithReadBarrier) { + DCHECK(EmitReadBarrier()); + if (kUseBakerReadBarrier) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (T6) the read barrier mark entry point corresponding + // to register `root`. If `temp` is null, it means that `GetIsGcMarking()` + // is false, and vice versa. + // + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp != null) { + // root = temp(root) + // } + // + // TODO(riscv64): Introduce a "marking register" that holds the pointer to one of the + // register marking entrypoints if marking (null if not marking) and make sure that + // marking entrypoints for other registers are at known offsets, so that we can call + // them using the "marking register" plus the offset embedded in the JALR instruction. + + if (label_low != nullptr) { + __ Bind(label_low); + } + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ Loadwu(root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Use RA as temp. It is clobbered in the slow path anyway. + Location temp = Location::RegisterLocation(RA); + SlowPathCodeRISCV64* slow_path = + AddGcRootBakerBarrierBarrierSlowPath(instruction, root, temp); + EmitBakerReadBarierMarkingCheck(slow_path, root, temp); + } else { + // GC root loaded through a slow path for read barriers other + // than Baker's. + // /* GcRoot<mirror::Object>* */ root = obj + offset + if (label_low != nullptr) { + __ Bind(label_low); + } + __ AddConst32(root_reg, obj, offset); + // /* mirror::Object* */ root = root->Read() + GenerateReadBarrierForRootSlow(instruction, root, root); + } + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + if (label_low != nullptr) { + __ Bind(label_low); + } + __ Loadwu(root_reg, obj, offset); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. + } +} + +void InstructionCodeGeneratorRISCV64::GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, + Riscv64Label* true_target, + Riscv64Label* false_target) { + HInstruction* cond = instruction->InputAt(condition_input_index); + + if (true_target == nullptr && false_target == nullptr) { + // Nothing to do. The code always falls through. + return; + } else if (cond->IsIntConstant()) { + // Constant condition, statically compared against "true" (integer value 1). + if (cond->AsIntConstant()->IsTrue()) { + if (true_target != nullptr) { + __ J(true_target); + } + } else { + DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue(); + if (false_target != nullptr) { + __ J(false_target); + } + } + return; + } + + // The following code generates these patterns: + // (1) true_target == nullptr && false_target != nullptr + // - opposite condition true => branch to false_target + // (2) true_target != nullptr && false_target == nullptr + // - condition true => branch to true_target + // (3) true_target != nullptr && false_target != nullptr + // - condition true => branch to true_target + // - branch to false_target + if (IsBooleanValueOrMaterializedCondition(cond)) { + // The condition instruction has been materialized, compare the output to 0. + Location cond_val = instruction->GetLocations()->InAt(condition_input_index); + DCHECK(cond_val.IsRegister()); + if (true_target == nullptr) { + __ Beqz(cond_val.AsRegister<XRegister>(), false_target); + } else { + __ Bnez(cond_val.AsRegister<XRegister>(), true_target); + } + } else { + // The condition instruction has not been materialized, use its inputs as + // the comparison and its condition as the branch condition. + HCondition* condition = cond->AsCondition(); + DataType::Type type = condition->InputAt(0)->GetType(); + LocationSummary* locations = condition->GetLocations(); + IfCondition if_cond = condition->GetCondition(); + Riscv64Label* branch_target = true_target; + + if (true_target == nullptr) { + if_cond = condition->GetOppositeCondition(); + branch_target = false_target; + } + + switch (type) { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateFpCondition(if_cond, condition->IsGtBias(), type, locations, branch_target); + break; + default: + // Integral types and reference equality. + GenerateIntLongCompareAndBranch(if_cond, locations, branch_target); + break; + } + } + + // If neither branch falls through (case 3), the conditional branch to `true_target` + // was already emitted (case 2) and we need to emit a jump to `false_target`. + if (true_target != nullptr && false_target != nullptr) { + __ J(false_target); + } +} + +void InstructionCodeGeneratorRISCV64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DataType::Type type = instruction->GetResultType(); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + XRegister out = locations->Out().AsRegister<XRegister>(); + XRegister dividend = locations->InAt(0).AsRegister<XRegister>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + DCHECK(imm == 1 || imm == -1); + + if (instruction->IsRem()) { + __ Mv(out, Zero); + } else { + if (imm == -1) { + if (type == DataType::Type::kInt32) { + __ Subw(out, Zero, dividend); + } else { + DCHECK_EQ(type, DataType::Type::kInt64); + __ Sub(out, Zero, dividend); + } + } else if (out != dividend) { + __ Mv(out, dividend); + } + } +} + +void InstructionCodeGeneratorRISCV64::DivRemByPowerOfTwo(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DataType::Type type = instruction->GetResultType(); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type; + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + XRegister out = locations->Out().AsRegister<XRegister>(); + XRegister dividend = locations->InAt(0).AsRegister<XRegister>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + int64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); + int ctz_imm = CTZ(abs_imm); + DCHECK_GE(ctz_imm, 1); // Division by +/-1 is handled by `DivRemOneOrMinusOne()`. + + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + // Calculate the negative dividend adjustment `tmp = dividend < 0 ? abs_imm - 1 : 0`. + // This adjustment is needed for rounding the division result towards zero. + if (type == DataType::Type::kInt32 || ctz_imm == 1) { + // A 32-bit dividend is sign-extended to 64-bit, so we can use the upper bits. + // And for a 64-bit division by +/-2, we need just the sign bit. + DCHECK_IMPLIES(type == DataType::Type::kInt32, ctz_imm < 32); + __ Srli(tmp, dividend, 64 - ctz_imm); + } else { + // For other 64-bit divisions, we need to replicate the sign bit. + __ Srai(tmp, dividend, 63); + __ Srli(tmp, tmp, 64 - ctz_imm); + } + // The rest of the calculation can use 64-bit operations even for 32-bit div/rem. + __ Add(tmp, tmp, dividend); + if (instruction->IsDiv()) { + __ Srai(out, tmp, ctz_imm); + if (imm < 0) { + __ Neg(out, out); + } + } else { + if (ctz_imm <= 11) { + __ Andi(tmp, tmp, -abs_imm); + } else { + ScratchRegisterScope srs2(GetAssembler()); + XRegister tmp2 = srs2.AllocateXRegister(); + __ Li(tmp2, -abs_imm); + __ And(tmp, tmp, tmp2); + } + __ Sub(out, dividend, tmp); + } +} + +void InstructionCodeGeneratorRISCV64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + LocationSummary* locations = instruction->GetLocations(); + XRegister dividend = locations->InAt(0).AsRegister<XRegister>(); + XRegister out = locations->Out().AsRegister<XRegister>(); + Location second = locations->InAt(1); + int64_t imm = Int64FromConstant(second.GetConstant()); + DataType::Type type = instruction->GetResultType(); + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + + // TODO: optimize with constant. + __ LoadConst64(tmp, imm); + if (instruction->IsDiv()) { + if (type == DataType::Type::kInt32) { + __ Divw(out, dividend, tmp); + } else { + __ Div(out, dividend, tmp); + } + } else { + if (type == DataType::Type::kInt32) { + __ Remw(out, dividend, tmp); + } else { + __ Rem(out, dividend, tmp); + } + } +} + +void InstructionCodeGeneratorRISCV64::GenerateDivRemIntegral(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DataType::Type type = instruction->GetResultType(); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type; + + LocationSummary* locations = instruction->GetLocations(); + XRegister out = locations->Out().AsRegister<XRegister>(); + Location second = locations->InAt(1); + + if (second.IsConstant()) { + int64_t imm = Int64FromConstant(second.GetConstant()); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else if (IsPowerOfTwo(AbsOrMin(imm))) { + DivRemByPowerOfTwo(instruction); + } else { + DCHECK(imm <= -2 || imm >= 2); + GenerateDivRemWithAnyConstant(instruction); + } + } else { + XRegister dividend = locations->InAt(0).AsRegister<XRegister>(); + XRegister divisor = second.AsRegister<XRegister>(); + if (instruction->IsDiv()) { + if (type == DataType::Type::kInt32) { + __ Divw(out, dividend, divisor); + } else { + __ Div(out, dividend, divisor); + } + } else { + if (type == DataType::Type::kInt32) { + __ Remw(out, dividend, divisor); + } else { + __ Rem(out, dividend, divisor); + } + } + } +} + +void InstructionCodeGeneratorRISCV64::GenerateIntLongCondition(IfCondition cond, + LocationSummary* locations) { + XRegister rd = locations->Out().AsRegister<XRegister>(); + GenerateIntLongCondition(cond, locations, rd, /*to_all_bits=*/ false); +} + +void InstructionCodeGeneratorRISCV64::GenerateIntLongCondition(IfCondition cond, + LocationSummary* locations, + XRegister rd, + bool to_all_bits) { + XRegister rs1 = locations->InAt(0).AsRegister<XRegister>(); + Location rs2_location = locations->InAt(1); + bool use_imm = rs2_location.IsConstant(); + int64_t imm = use_imm ? CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant()) : 0; + XRegister rs2 = use_imm ? kNoXRegister : rs2_location.AsRegister<XRegister>(); + bool reverse_condition = false; + switch (cond) { + case kCondEQ: + case kCondNE: + if (!use_imm) { + __ Sub(rd, rs1, rs2); // SUB is OK here even for 32-bit comparison. + } else if (imm != 0) { + DCHECK(IsInt<12>(-imm)); + __ Addi(rd, rs1, -imm); // ADDI is OK here even for 32-bit comparison. + } // else test `rs1` directly without subtraction for `use_imm && imm == 0`. + if (cond == kCondEQ) { + __ Seqz(rd, (use_imm && imm == 0) ? rs1 : rd); + } else { + __ Snez(rd, (use_imm && imm == 0) ? rs1 : rd); + } + break; + + case kCondLT: + case kCondGE: + if (use_imm) { + DCHECK(IsInt<12>(imm)); + __ Slti(rd, rs1, imm); + } else { + __ Slt(rd, rs1, rs2); + } + // Calculate `rs1 >= rhs` as `!(rs1 < rhs)` since there's only the SLT but no SGE. + reverse_condition = (cond == kCondGE); + break; + + case kCondLE: + case kCondGT: + if (use_imm) { + // Calculate `rs1 <= imm` as `rs1 < imm + 1`. + DCHECK(IsInt<12>(imm + 1)); // The value that overflows would fail this check. + __ Slti(rd, rs1, imm + 1); + } else { + __ Slt(rd, rs2, rs1); + } + // Calculate `rs1 > imm` as `!(rs1 < imm + 1)` and calculate + // `rs1 <= rs2` as `!(rs2 < rs1)` since there's only the SLT but no SGE. + reverse_condition = ((cond == kCondGT) == use_imm); + break; + + case kCondB: + case kCondAE: + if (use_imm) { + // Sltiu sign-extends its 12-bit immediate operand before the comparison + // and thus lets us compare directly with unsigned values in the ranges + // [0, 0x7ff] and [0x[ffffffff]fffff800, 0x[ffffffff]ffffffff]. + DCHECK(IsInt<12>(imm)); + __ Sltiu(rd, rs1, imm); + } else { + __ Sltu(rd, rs1, rs2); + } + // Calculate `rs1 AE rhs` as `!(rs1 B rhs)` since there's only the SLTU but no SGEU. + reverse_condition = (cond == kCondAE); + break; + + case kCondBE: + case kCondA: + if (use_imm) { + // Calculate `rs1 BE imm` as `rs1 B imm + 1`. + // Sltiu sign-extends its 12-bit immediate operand before the comparison + // and thus lets us compare directly with unsigned values in the ranges + // [0, 0x7ff] and [0x[ffffffff]fffff800, 0x[ffffffff]ffffffff]. + DCHECK(IsInt<12>(imm + 1)); // The value that overflows would fail this check. + __ Sltiu(rd, rs1, imm + 1); + } else { + __ Sltu(rd, rs2, rs1); + } + // Calculate `rs1 A imm` as `!(rs1 B imm + 1)` and calculate + // `rs1 BE rs2` as `!(rs2 B rs1)` since there's only the SLTU but no SGEU. + reverse_condition = ((cond == kCondA) == use_imm); + break; + } + if (to_all_bits) { + // Store the result to all bits; in other words, "true" is represented by -1. + if (reverse_condition) { + __ Addi(rd, rd, -1); // 0 -> -1, 1 -> 0 + } else { + __ Neg(rd, rd); // 0 -> 0, 1 -> -1 + } + } else { + if (reverse_condition) { + __ Xori(rd, rd, 1); + } + } +} + +void InstructionCodeGeneratorRISCV64::GenerateIntLongCompareAndBranch(IfCondition cond, + LocationSummary* locations, + Riscv64Label* label) { + XRegister left = locations->InAt(0).AsRegister<XRegister>(); + Location right_location = locations->InAt(1); + if (right_location.IsConstant()) { + DCHECK_EQ(CodeGenerator::GetInt64ValueOf(right_location.GetConstant()), 0); + switch (cond) { + case kCondEQ: + case kCondBE: // <= 0 if zero + __ Beqz(left, label); + break; + case kCondNE: + case kCondA: // > 0 if non-zero + __ Bnez(left, label); + break; + case kCondLT: + __ Bltz(left, label); + break; + case kCondGE: + __ Bgez(left, label); + break; + case kCondLE: + __ Blez(left, label); + break; + case kCondGT: + __ Bgtz(left, label); + break; + case kCondB: // always false + break; + case kCondAE: // always true + __ J(label); + break; + } + } else { + XRegister right_reg = right_location.AsRegister<XRegister>(); + switch (cond) { + case kCondEQ: + __ Beq(left, right_reg, label); + break; + case kCondNE: + __ Bne(left, right_reg, label); + break; + case kCondLT: + __ Blt(left, right_reg, label); + break; + case kCondGE: + __ Bge(left, right_reg, label); + break; + case kCondLE: + __ Ble(left, right_reg, label); + break; + case kCondGT: + __ Bgt(left, right_reg, label); + break; + case kCondB: + __ Bltu(left, right_reg, label); + break; + case kCondAE: + __ Bgeu(left, right_reg, label); + break; + case kCondBE: + __ Bleu(left, right_reg, label); + break; + case kCondA: + __ Bgtu(left, right_reg, label); + break; + } + } +} + +void InstructionCodeGeneratorRISCV64::GenerateFpCondition(IfCondition cond, + bool gt_bias, + DataType::Type type, + LocationSummary* locations, + Riscv64Label* label) { + DCHECK_EQ(label != nullptr, locations->Out().IsInvalid()); + ScratchRegisterScope srs(GetAssembler()); + XRegister rd = + (label != nullptr) ? srs.AllocateXRegister() : locations->Out().AsRegister<XRegister>(); + GenerateFpCondition(cond, gt_bias, type, locations, label, rd, /*to_all_bits=*/ false); +} + +void InstructionCodeGeneratorRISCV64::GenerateFpCondition(IfCondition cond, + bool gt_bias, + DataType::Type type, + LocationSummary* locations, + Riscv64Label* label, + XRegister rd, + bool to_all_bits) { + // RISCV-V FP compare instructions yield the following values: + // l<r l=r l>r Unordered + // FEQ l,r 0 1 0 0 + // FLT l,r 1 0 0 0 + // FLT r,l 0 0 1 0 + // FLE l,r 1 1 0 0 + // FLE r,l 0 1 1 0 + // + // We can calculate the `Compare` results using the following formulas: + // l<r l=r l>r Unordered + // Compare/gt_bias -1 0 1 1 = ((FLE l,r) ^ 1) - (FLT l,r) + // Compare/lt_bias -1 0 1 -1 = ((FLE r,l) - 1) + (FLT r,l) + // These are emitted in `VisitCompare()`. + // + // This function emits a fused `Condition(Compare(., .), 0)`. If we compare the + // `Compare` results above with 0, we get the following values and formulas: + // l<r l=r l>r Unordered + // CondEQ/- 0 1 0 0 = (FEQ l, r) + // CondNE/- 1 0 1 1 = (FEQ l, r) ^ 1 + // CondLT/gt_bias 1 0 0 0 = (FLT l,r) + // CondLT/lt_bias 1 0 0 1 = (FLE r,l) ^ 1 + // CondLE/gt_bias 1 1 0 0 = (FLE l,r) + // CondLE/lt_bias 1 1 0 1 = (FLT r,l) ^ 1 + // CondGT/gt_bias 0 0 1 1 = (FLE l,r) ^ 1 + // CondGT/lt_bias 0 0 1 0 = (FLT r,l) + // CondGE/gt_bias 0 1 1 1 = (FLT l,r) ^ 1 + // CondGE/lt_bias 0 1 1 0 = (FLE r,l) + // (CondEQ/CondNE comparison with zero yields the same result with gt_bias and lt_bias.) + // + // If the condition is not materialized, the `^ 1` is not emitted, + // instead the condition is reversed by emitting BEQZ instead of BNEZ. + + FRegister rs1 = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister rs2 = locations->InAt(1).AsFpuRegister<FRegister>(); + + bool reverse_condition = false; + switch (cond) { + case kCondEQ: + FEq(rd, rs1, rs2, type); + break; + case kCondNE: + FEq(rd, rs1, rs2, type); + reverse_condition = true; + break; + case kCondLT: + if (gt_bias) { + FLt(rd, rs1, rs2, type); + } else { + FLe(rd, rs2, rs1, type); + reverse_condition = true; + } + break; + case kCondLE: + if (gt_bias) { + FLe(rd, rs1, rs2, type); + } else { + FLt(rd, rs2, rs1, type); + reverse_condition = true; + } + break; + case kCondGT: + if (gt_bias) { + FLe(rd, rs1, rs2, type); + reverse_condition = true; + } else { + FLt(rd, rs2, rs1, type); + } + break; + case kCondGE: + if (gt_bias) { + FLt(rd, rs1, rs2, type); + reverse_condition = true; + } else { + FLe(rd, rs2, rs1, type); + } + break; + default: + LOG(FATAL) << "Unexpected floating-point condition " << cond; + UNREACHABLE(); + } + + if (label != nullptr) { + if (reverse_condition) { + __ Beqz(rd, label); + } else { + __ Bnez(rd, label); + } + } else if (to_all_bits) { + // Store the result to all bits; in other words, "true" is represented by -1. + if (reverse_condition) { + __ Addi(rd, rd, -1); // 0 -> -1, 1 -> 0 + } else { + __ Neg(rd, rd); // 0 -> 0, 1 -> -1 + } + } else { + if (reverse_condition) { + __ Xori(rd, rd, 1); + } + } +} + +void CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + XRegister obj, + uint32_t offset, + Location temp, + bool needs_null_check) { + GenerateReferenceLoadWithBakerReadBarrier( + instruction, ref, obj, offset, /*index=*/ Location::NoLocation(), temp, needs_null_check); +} + +void CodeGeneratorRISCV64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + XRegister obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check) { + GenerateReferenceLoadWithBakerReadBarrier( + instruction, ref, obj, data_offset, index, temp, needs_null_check); +} + +void CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + XRegister obj, + uint32_t offset, + Location index, + Location temp, + bool needs_null_check) { + // For now, use the same approach as for GC roots plus unpoison the reference if needed. + // TODO(riscv64): Implement checking if the holder is black. + UNUSED(temp); + + DCHECK(EmitBakerReadBarrier()); + XRegister reg = ref.AsRegister<XRegister>(); + if (index.IsValid()) { + DCHECK(!needs_null_check); + DCHECK(index.IsRegister()); + DataType::Type type = DataType::Type::kReference; + DCHECK_EQ(type, instruction->GetType()); + if (instruction->IsArrayGet()) { + // /* HeapReference<Object> */ ref = *(obj + index * element_size + offset) + instruction_visitor_.ShNAdd(reg, index.AsRegister<XRegister>(), obj, type); + } else { + // /* HeapReference<Object> */ ref = *(obj + index + offset) + DCHECK(instruction->IsInvoke()); + DCHECK(instruction->GetLocations()->Intrinsified()); + __ Add(reg, index.AsRegister<XRegister>(), obj); + } + __ Loadwu(reg, reg, offset); + } else { + // /* HeapReference<Object> */ ref = *(obj + offset) + __ Loadwu(reg, obj, offset); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + } + MaybeUnpoisonHeapReference(reg); + + // Slow path marking the reference. + XRegister tmp = RA; // Use RA as temp. It is clobbered in the slow path anyway. + SlowPathCodeRISCV64* slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathRISCV64( + instruction, ref, Location::RegisterLocation(tmp)); + AddSlowPath(slow_path); + + const int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(ref); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ Loadd(tmp, TR, entry_point_offset); + __ Bnez(tmp, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +SlowPathCodeRISCV64* CodeGeneratorRISCV64::AddReadBarrierSlowPath(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + UNUSED(instruction); + UNUSED(out); + UNUSED(ref); + UNUSED(obj); + UNUSED(offset); + UNUSED(index); + LOG(FATAL) << "Unimplemented"; + UNREACHABLE(); +} + +void CodeGeneratorRISCV64::GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + UNUSED(instruction); + UNUSED(out); + UNUSED(ref); + UNUSED(obj); + UNUSED(offset); + UNUSED(index); + LOG(FATAL) << "Unimplemented"; +} + +void CodeGeneratorRISCV64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (EmitReadBarrier()) { + // Baker's read barriers shall be handled by the fast path + // (CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier). + DCHECK(!kUseBakerReadBarrier); + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + UnpoisonHeapReference(out.AsRegister<XRegister>()); + } +} + +void CodeGeneratorRISCV64::GenerateReadBarrierForRootSlow(HInstruction* instruction, + Location out, + Location root) { + DCHECK(EmitReadBarrier()); + + // Insert a slow path based read barrier *after* the GC root load. + // + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCodeRISCV64* slow_path = + new (GetScopedAllocator()) ReadBarrierForRootSlowPathRISCV64(instruction, out, root); + AddSlowPath(slow_path); + + __ J(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void InstructionCodeGeneratorRISCV64::HandleGoto(HInstruction* instruction, + HBasicBlock* successor) { + if (successor->IsExitBlock()) { + DCHECK(instruction->GetPrevious()->AlwaysThrows()); + return; // no code needed + } + + HBasicBlock* block = instruction->GetBlock(); + HInstruction* previous = instruction->GetPrevious(); + HLoopInformation* info = block->GetLoopInformation(); + + if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { + codegen_->MaybeIncrementHotness(/*is_frame_entry=*/ false); + GenerateSuspendCheck(info->GetSuspendCheck(), successor); + return; // `GenerateSuspendCheck()` emitted the jump. + } + if (block->IsEntryBlock() && previous != nullptr && previous->IsSuspendCheck()) { + GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); + } + if (!codegen_->GoesToNextBlock(block, successor)) { + __ J(codegen_->GetLabelOf(successor)); + } +} + +void InstructionCodeGeneratorRISCV64::GenPackedSwitchWithCompares(XRegister adjusted, + XRegister temp, + uint32_t num_entries, + HBasicBlock* switch_block) { + // Note: The `adjusted` register holds `value - lower_bound`. If the `lower_bound` is 0, + // `adjusted` is the original `value` register and we must not clobber it. Otherwise, + // `adjusted` is the `temp`. The caller already emitted the `adjusted < num_entries` check. + + // Create a set of compare/jumps. + ArrayRef<HBasicBlock* const> successors(switch_block->GetSuccessors()); + uint32_t index = 0; + for (; num_entries - index >= 2u; index += 2u) { + // Jump to `successors[index]` if `value == lower_bound + index`. + // Note that `adjusted` holds `value - lower_bound - index`. + __ Beqz(adjusted, codegen_->GetLabelOf(successors[index])); + if (num_entries - index == 2u) { + break; // The last entry shall match, so the branch shall be unconditional. + } + // Jump to `successors[index + 1]` if `value == lower_bound + index + 1`. + // Modify `adjusted` to hold `value - lower_bound - index - 2` for this comparison. + __ Addi(temp, adjusted, -2); + adjusted = temp; + __ Bltz(adjusted, codegen_->GetLabelOf(successors[index + 1])); + } + // For the last entry, unconditionally jump to `successors[num_entries - 1]`. + __ J(codegen_->GetLabelOf(successors[num_entries - 1u])); +} + +void InstructionCodeGeneratorRISCV64::GenTableBasedPackedSwitch(XRegister adjusted, + XRegister temp, + uint32_t num_entries, + HBasicBlock* switch_block) { + // Note: The `adjusted` register holds `value - lower_bound`. If the `lower_bound` is 0, + // `adjusted` is the original `value` register and we must not clobber it. Otherwise, + // `adjusted` is the `temp`. The caller already emitted the `adjusted < num_entries` check. + + // Create a jump table. + ArenaVector<Riscv64Label*> labels(num_entries, + __ GetAllocator()->Adapter(kArenaAllocSwitchTable)); + const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); + for (uint32_t i = 0; i < num_entries; i++) { + labels[i] = codegen_->GetLabelOf(successors[i]); + } + JumpTable* table = __ CreateJumpTable(std::move(labels)); + + // Load the address of the jump table. + // Note: The `LoadLabelAddress()` emits AUIPC+ADD. It is possible to avoid the ADD and + // instead embed that offset in the LW below as well as all jump table entries but + // that would need some invasive changes in the jump table handling in the assembler. + ScratchRegisterScope srs(GetAssembler()); + XRegister table_base = srs.AllocateXRegister(); + __ LoadLabelAddress(table_base, table->GetLabel()); + + // Load the PC difference from the jump table. + // TODO(riscv64): Use SH2ADD from the Zba extension. + __ Slli(temp, adjusted, 2); + __ Add(temp, temp, table_base); + __ Lw(temp, temp, 0); + + // Compute the absolute target address by adding the table start address + // (the table contains offsets to targets relative to its start). + __ Add(temp, temp, table_base); + // And jump. + __ Jr(temp); +} + +int32_t InstructionCodeGeneratorRISCV64::VecAddress(LocationSummary* locations, + size_t size, + /*out*/ XRegister* adjusted_base) { + UNUSED(locations); + UNUSED(size); + UNUSED(adjusted_base); + LOG(FATAL) << "Unimplemented"; + UNREACHABLE(); +} + +void LocationsBuilderRISCV64::HandleBinaryOp(HBinaryOperation* instruction) { + DCHECK_EQ(instruction->InputCount(), 2u); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + DataType::Type type = instruction->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { + locations->SetInAt(0, Location::RequiresRegister()); + HInstruction* right = instruction->InputAt(1); + bool can_use_imm = false; + if (instruction->IsMin() || instruction->IsMax()) { + can_use_imm = IsZeroBitPattern(instruction); + } else if (right->IsConstant()) { + int64_t imm = CodeGenerator::GetInt64ValueOf(right->AsConstant()); + can_use_imm = IsInt<12>(instruction->IsSub() ? -imm : imm); + } + if (can_use_imm) { + locations->SetInAt(1, Location::ConstantLocation(right)); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + if (instruction->IsMin() || instruction->IsMax()) { + locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); + } else { + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + } + break; + + default: + LOG(FATAL) << "Unexpected " << instruction->DebugName() << " type " << type; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorRISCV64::HandleBinaryOp(HBinaryOperation* instruction) { + DataType::Type type = instruction->GetType(); + LocationSummary* locations = instruction->GetLocations(); + + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { + XRegister rd = locations->Out().AsRegister<XRegister>(); + XRegister rs1 = locations->InAt(0).AsRegister<XRegister>(); + Location rs2_location = locations->InAt(1); + + bool use_imm = rs2_location.IsConstant(); + XRegister rs2 = use_imm ? kNoXRegister : rs2_location.AsRegister<XRegister>(); + int64_t imm = use_imm ? CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant()) : 0; + + if (instruction->IsAnd()) { + if (use_imm) { + __ Andi(rd, rs1, imm); + } else { + __ And(rd, rs1, rs2); + } + } else if (instruction->IsOr()) { + if (use_imm) { + __ Ori(rd, rs1, imm); + } else { + __ Or(rd, rs1, rs2); + } + } else if (instruction->IsXor()) { + if (use_imm) { + __ Xori(rd, rs1, imm); + } else { + __ Xor(rd, rs1, rs2); + } + } else if (instruction->IsAdd() || instruction->IsSub()) { + if (type == DataType::Type::kInt32) { + if (use_imm) { + __ Addiw(rd, rs1, instruction->IsSub() ? -imm : imm); + } else if (instruction->IsAdd()) { + __ Addw(rd, rs1, rs2); + } else { + DCHECK(instruction->IsSub()); + __ Subw(rd, rs1, rs2); + } + } else { + if (use_imm) { + __ Addi(rd, rs1, instruction->IsSub() ? -imm : imm); + } else if (instruction->IsAdd()) { + __ Add(rd, rs1, rs2); + } else { + DCHECK(instruction->IsSub()); + __ Sub(rd, rs1, rs2); + } + } + } else if (instruction->IsMin()) { + DCHECK_IMPLIES(use_imm, imm == 0); + __ Min(rd, rs1, use_imm ? Zero : rs2); + } else { + DCHECK(instruction->IsMax()); + DCHECK_IMPLIES(use_imm, imm == 0); + __ Max(rd, rs1, use_imm ? Zero : rs2); + } + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + FRegister rd = locations->Out().AsFpuRegister<FRegister>(); + FRegister rs1 = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister rs2 = locations->InAt(1).AsFpuRegister<FRegister>(); + if (instruction->IsAdd()) { + FAdd(rd, rs1, rs2, type); + } else if (instruction->IsSub()) { + FSub(rd, rs1, rs2, type); + } else { + DCHECK(instruction->IsMin() || instruction->IsMax()); + // If one of the operands is NaN and the other is not, riscv64 instructions FMIN/FMAX + // return the other operand while we want to return the NaN operand. + DCHECK_NE(rd, rs1); // Requested `Location::kOutputOverlap`. + DCHECK_NE(rd, rs2); // Requested `Location::kOutputOverlap`. + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + XRegister tmp2 = srs.AllocateXRegister(); + Riscv64Label done; + // Return `rs1` if it's NaN. + FClass(tmp, rs1, type); + __ Li(tmp2, kFClassNaNMinValue); + FMv(rd, rs1, type); + __ Bgeu(tmp, tmp2, &done); + // Return `rs2` if it's NaN. + FClass(tmp, rs2, type); + FMv(rd, rs2, type); + __ Bgeu(tmp, tmp2, &done); + // Calculate Min/Max for non-NaN arguments. + if (instruction->IsMin()) { + FMin(rd, rs1, rs2, type); + } else { + FMax(rd, rs1, rs2, type); + } + __ Bind(&done); + } + break; + } + default: + LOG(FATAL) << "Unexpected binary operation type " << type; + UNREACHABLE(); + } +} + +void LocationsBuilderRISCV64::HandleCondition(HCondition* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + switch (instruction->InputAt(0)->GetType()) { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + break; + + default: { + locations->SetInAt(0, Location::RequiresRegister()); + HInstruction* rhs = instruction->InputAt(1); + bool use_imm = false; + if (rhs->IsConstant()) { + int64_t imm = CodeGenerator::GetInt64ValueOf(rhs->AsConstant()); + if (instruction->IsEmittedAtUseSite()) { + // For `HIf`, materialize all non-zero constants with an `HParallelMove`. + // Note: For certain constants and conditions, the code could be improved. + // For example, 2048 takes two instructions to materialize but the negative + // -2048 could be embedded in ADDI for EQ/NE comparison. + use_imm = (imm == 0); + } else { + // Constants that cannot be embedded in an instruction's 12-bit immediate shall be + // materialized with an `HParallelMove`. This simplifies the code and avoids cases + // with arithmetic overflow. Adjust the `imm` if needed for a particular instruction. + switch (instruction->GetCondition()) { + case kCondEQ: + case kCondNE: + imm = -imm; // ADDI with negative immediate (there is no SUBI). + break; + case kCondLE: + case kCondGT: + case kCondBE: + case kCondA: + imm += 1; // SLTI/SLTIU with adjusted immediate (there is no SLEI/SLEIU). + break; + default: + break; + } + use_imm = IsInt<12>(imm); + } + } + if (use_imm) { + locations->SetInAt(1, Location::ConstantLocation(rhs)); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } + break; + } + } + if (!instruction->IsEmittedAtUseSite()) { + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } +} + +void InstructionCodeGeneratorRISCV64::HandleCondition(HCondition* instruction) { + if (instruction->IsEmittedAtUseSite()) { + return; + } + + DataType::Type type = instruction->InputAt(0)->GetType(); + LocationSummary* locations = instruction->GetLocations(); + switch (type) { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateFpCondition(instruction->GetCondition(), instruction->IsGtBias(), type, locations); + return; + default: + // Integral types and reference equality. + GenerateIntLongCondition(instruction->GetCondition(), locations); + return; + } +} + +void LocationsBuilderRISCV64::HandleShift(HBinaryOperation* instruction) { + DCHECK(instruction->IsShl() || + instruction->IsShr() || + instruction->IsUShr() || + instruction->IsRor()); + + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + DataType::Type type = instruction->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + } + default: + LOG(FATAL) << "Unexpected shift type " << type; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorRISCV64::HandleShift(HBinaryOperation* instruction) { + DCHECK(instruction->IsShl() || + instruction->IsShr() || + instruction->IsUShr() || + instruction->IsRor()); + LocationSummary* locations = instruction->GetLocations(); + DataType::Type type = instruction->GetType(); + + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { + XRegister rd = locations->Out().AsRegister<XRegister>(); + XRegister rs1 = locations->InAt(0).AsRegister<XRegister>(); + Location rs2_location = locations->InAt(1); + + if (rs2_location.IsConstant()) { + int64_t imm = CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant()); + uint32_t shamt = + imm & (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance); + + if (shamt == 0) { + if (rd != rs1) { + __ Mv(rd, rs1); + } + } else if (type == DataType::Type::kInt32) { + if (instruction->IsShl()) { + __ Slliw(rd, rs1, shamt); + } else if (instruction->IsShr()) { + __ Sraiw(rd, rs1, shamt); + } else if (instruction->IsUShr()) { + __ Srliw(rd, rs1, shamt); + } else { + DCHECK(instruction->IsRor()); + __ Roriw(rd, rs1, shamt); + } + } else { + if (instruction->IsShl()) { + __ Slli(rd, rs1, shamt); + } else if (instruction->IsShr()) { + __ Srai(rd, rs1, shamt); + } else if (instruction->IsUShr()) { + __ Srli(rd, rs1, shamt); + } else { + DCHECK(instruction->IsRor()); + __ Rori(rd, rs1, shamt); + } + } + } else { + XRegister rs2 = rs2_location.AsRegister<XRegister>(); + if (type == DataType::Type::kInt32) { + if (instruction->IsShl()) { + __ Sllw(rd, rs1, rs2); + } else if (instruction->IsShr()) { + __ Sraw(rd, rs1, rs2); + } else if (instruction->IsUShr()) { + __ Srlw(rd, rs1, rs2); + } else { + DCHECK(instruction->IsRor()); + __ Rorw(rd, rs1, rs2); + } + } else { + if (instruction->IsShl()) { + __ Sll(rd, rs1, rs2); + } else if (instruction->IsShr()) { + __ Sra(rd, rs1, rs2); + } else if (instruction->IsUShr()) { + __ Srl(rd, rs1, rs2); + } else { + DCHECK(instruction->IsRor()); + __ Ror(rd, rs1, rs2); + } + } + } + break; + } + default: + LOG(FATAL) << "Unexpected shift operation type " << type; + } +} + +void CodeGeneratorRISCV64::MarkGCCard(XRegister object, + XRegister value, + bool value_can_be_null) { + Riscv64Label done; + ScratchRegisterScope srs(GetAssembler()); + XRegister card = srs.AllocateXRegister(); + XRegister temp = srs.AllocateXRegister(); + if (value_can_be_null) { + __ Beqz(value, &done); + } + // Load the address of the card table into `card`. + __ Loadd(card, TR, Thread::CardTableOffset<kRiscv64PointerSize>().Int32Value()); + + // Calculate the address of the card corresponding to `object`. + __ Srli(temp, object, gc::accounting::CardTable::kCardShift); + __ Add(temp, card, temp); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the SB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). + __ Sb(card, temp, 0); // No scratch register left for `Storeb()`. + if (value_can_be_null) { + __ Bind(&done); + } +} + +void LocationsBuilderRISCV64::HandleFieldSet(HInstruction* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, ValueLocationForStore(instruction->InputAt(1))); +} + +void InstructionCodeGeneratorRISCV64::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + bool value_can_be_null, + WriteBarrierKind write_barrier_kind) { + DataType::Type type = field_info.GetFieldType(); + LocationSummary* locations = instruction->GetLocations(); + XRegister obj = locations->InAt(0).AsRegister<XRegister>(); + Location value = locations->InAt(1); + DCHECK_IMPLIES(value.IsConstant(), IsZeroBitPattern(value.GetConstant())); + bool is_volatile = field_info.IsVolatile(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + + if (is_volatile) { + StoreSeqCst(value, obj, offset, type, instruction); + } else { + Store(value, obj, offset, type); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1)) && + write_barrier_kind != WriteBarrierKind::kDontEmit) { + codegen_->MarkGCCard( + obj, + value.AsRegister<XRegister>(), + value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck); + } +} + +void LocationsBuilderRISCV64::HandleFieldGet(HInstruction* instruction) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + + bool object_field_get_with_read_barrier = + (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, + object_field_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); + + // Input for object receiver. + locations->SetInAt(0, Location::RequiresRegister()); + + if (DataType::IsFloatingPointType(instruction->GetType())) { + locations->SetOut(Location::RequiresFpuRegister()); + } else { + // The output overlaps for an object field get when read barriers + // are enabled: we do not want the load to overwrite the object's + // location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); + } + + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorRISCV64::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); + DataType::Type type = instruction->GetType(); + LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); + XRegister obj = obj_loc.AsRegister<XRegister>(); + Location dst_loc = locations->Out(); + bool is_volatile = field_info.IsVolatile(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + + if (type == DataType::Type::kReference && codegen_->EmitBakerReadBarrier()) { + // /* HeapReference<Object> */ dst = *(obj + offset) + Location temp_loc = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier call. + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + dst_loc, + obj, + offset, + temp_loc, + /* needs_null_check= */ true); + } else { + Load(dst_loc, obj, offset, type); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + + if (type == DataType::Type::kReference && !codegen_->EmitBakerReadBarrier()) { + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset); + } +} + +void InstructionCodeGeneratorRISCV64::GenerateMethodEntryExitHook(HInstruction* instruction) { + SlowPathCodeRISCV64* slow_path = + new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathRISCV64(instruction); + codegen_->AddSlowPath(slow_path); + + ScratchRegisterScope temps(GetAssembler()); + XRegister tmp = temps.AllocateXRegister(); + + if (instruction->IsMethodExitHook()) { + // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it + // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check + // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is + // disabled in debuggable runtime. The other bit is used when this method itself requires a + // deoptimization due to redefinition. So it is safe to just check for non-zero value here. + __ Loadwu(tmp, SP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()); + __ Bnez(tmp, slow_path->GetEntryLabel()); + } + + uint64_t hook_offset = instruction->IsMethodExitHook() ? + instrumentation::Instrumentation::HaveMethodExitListenersOffset().SizeValue() : + instrumentation::Instrumentation::HaveMethodEntryListenersOffset().SizeValue(); + auto [base_hook_address, hook_imm12] = SplitJitAddress( + reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()) + hook_offset); + __ LoadConst64(tmp, base_hook_address); + __ Lbu(tmp, tmp, hook_imm12); + // Check if there are any method entry / exit listeners. If no, continue. + __ Beqz(tmp, slow_path->GetExitLabel()); + // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners. + // If yes, just take the slow path. + static_assert(instrumentation::Instrumentation::kFastTraceListeners == 1u); + __ Addi(tmp, tmp, -1); + __ Bnez(tmp, slow_path->GetEntryLabel()); + + // Check if there is place in the buffer to store a new entry, if no, take the slow path. + int32_t trace_buffer_index_offset = + Thread::TraceBufferIndexOffset<kRiscv64PointerSize>().Int32Value(); + __ Loadd(tmp, TR, trace_buffer_index_offset); + __ Addi(tmp, tmp, -dchecked_integral_cast<int32_t>(kNumEntriesForWallClock)); + __ Bltz(tmp, slow_path->GetEntryLabel()); + + // Update the index in the `Thread`. + __ Stored(tmp, TR, trace_buffer_index_offset); + + // Allocate second core scratch register. We can no longer use `Stored()` + // and similar macro instructions because there is no core scratch register left. + XRegister tmp2 = temps.AllocateXRegister(); + + // Calculate the entry address in the buffer. + // /*addr*/ tmp = TR->GetMethodTraceBuffer() + sizeof(void*) * /*index*/ tmp; + __ Loadd(tmp2, TR, Thread::TraceBufferPtrOffset<kRiscv64PointerSize>().SizeValue()); + __ Sh3Add(tmp, tmp, tmp2); + + // Record method pointer and trace action. + __ Ld(tmp2, SP, 0); + // Use last two bits to encode trace method action. For MethodEntry it is 0 + // so no need to set the bits since they are 0 already. + DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4)); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1); + if (instruction->IsMethodExitHook()) { + __ Ori(tmp2, tmp2, enum_cast<int32_t>(TraceAction::kTraceMethodExit)); + } + static_assert(IsInt<12>(kMethodOffsetInBytes)); // No free scratch register for `Stored()`. + __ Sd(tmp2, tmp, kMethodOffsetInBytes); + + // Record the timestamp. + __ RdTime(tmp2); + static_assert(IsInt<12>(kTimestampOffsetInBytes)); // No free scratch register for `Stored()`. + __ Sd(tmp2, tmp, kTimestampOffsetInBytes); + + __ Bind(slow_path->GetExitLabel()); +} + +void LocationsBuilderRISCV64::VisitAbove(HAbove* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitAbove(HAbove* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitAboveOrEqual(HAboveOrEqual* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitAboveOrEqual(HAboveOrEqual* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorRISCV64::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + XRegister in = locations->InAt(0).AsRegister<XRegister>(); + XRegister out = locations->Out().AsRegister<XRegister>(); + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + __ Sraiw(tmp, in, 31); + __ Xor(out, in, tmp); + __ Subw(out, out, tmp); + break; + } + case DataType::Type::kInt64: { + XRegister in = locations->InAt(0).AsRegister<XRegister>(); + XRegister out = locations->Out().AsRegister<XRegister>(); + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + __ Srai(tmp, in, 63); + __ Xor(out, in, tmp); + __ Sub(out, out, tmp); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + FAbs(locations->Out().AsFpuRegister<FRegister>(), + locations->InAt(0).AsFpuRegister<FRegister>(), + abs->GetResultType()); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + +void LocationsBuilderRISCV64::VisitAdd(HAdd* instruction) { + HandleBinaryOp(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitAdd(HAdd* instruction) { + HandleBinaryOp(instruction); +} + +void LocationsBuilderRISCV64::VisitAnd(HAnd* instruction) { + HandleBinaryOp(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitAnd(HAnd* instruction) { + HandleBinaryOp(instruction); +} + +void LocationsBuilderRISCV64::VisitArrayGet(HArrayGet* instruction) { + DataType::Type type = instruction->GetType(); + bool object_array_get_with_read_barrier = + (type == DataType::Type::kReference) && codegen_->EmitReadBarrier(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(instruction, + object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (DataType::IsFloatingPointType(type)) { + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + } else { + // The output overlaps in the case of an object array get with + // read barriers enabled: we do not want the move to overwrite the + // array's location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); + } + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorRISCV64::GenerateArrayLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorRISCV64::VisitArrayGet(HArrayGet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); + XRegister obj = obj_loc.AsRegister<XRegister>(); + Location out_loc = locations->Out(); + Location index = locations->InAt(1); + uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); + DataType::Type type = instruction->GetType(); + const bool maybe_compressed_char_at = + mirror::kUseStringCompression && instruction->IsStringCharAt(); + + Riscv64Label string_char_at_done; + if (maybe_compressed_char_at) { + DCHECK_EQ(type, DataType::Type::kUint16); + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + Riscv64Label uncompressed_load; + { + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + __ Loadw(tmp, obj, count_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ Andi(tmp, tmp, 0x1); + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ Bnez(tmp, &uncompressed_load); + } + XRegister out = out_loc.AsRegister<XRegister>(); + if (index.IsConstant()) { + int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); + __ Loadbu(out, obj, data_offset + const_index); + } else { + __ Add(out, obj, index.AsRegister<XRegister>()); + __ Loadbu(out, out, data_offset); + } + __ J(&string_char_at_done); + __ Bind(&uncompressed_load); + } + + if (type == DataType::Type::kReference && codegen_->EmitBakerReadBarrier()) { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + // /* HeapReference<Object> */ out = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + // Note that a potential implicit null check could be handled in these + // `CodeGeneratorRISCV64::Generate{Array,Field}LoadWithBakerReadBarrier()` calls + // but we currently do not support implicit null checks on `HArrayGet`. + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); + Location temp = locations->GetTemp(0); + if (index.IsConstant()) { + // Array load with a constant index can be treated as a field load. + static constexpr size_t shift = DataType::SizeShift(DataType::Type::kReference); + size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << shift) + data_offset; + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + offset, + temp, + /* needs_null_check= */ false); + } else { + codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + index, + temp, + /* needs_null_check= */ false); + } + } else if (index.IsConstant()) { + int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); + int32_t offset = data_offset + (const_index << DataType::SizeShift(type)); + Load(out_loc, obj, offset, type); + if (!maybe_compressed_char_at) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + if (type == DataType::Type::kReference) { + DCHECK(!codegen_->EmitBakerReadBarrier()); + // If read barriers are enabled, emit read barriers other than Baker's using + // a slow path (and also unpoison the loaded reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); + } + } else { + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + ShNAdd(tmp, index.AsRegister<XRegister>(), obj, type); + Load(out_loc, tmp, data_offset, type); + if (!maybe_compressed_char_at) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + if (type == DataType::Type::kReference) { + DCHECK(!codegen_->EmitBakerReadBarrier()); + // If read barriers are enabled, emit read barriers other than Baker's using + // a slow path (and also unpoison the loaded reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow( + instruction, out_loc, out_loc, obj_loc, data_offset, index); + } + } + + if (maybe_compressed_char_at) { + __ Bind(&string_char_at_done); + } +} + +void LocationsBuilderRISCV64::VisitArrayLength(HArrayLength* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorRISCV64::VisitArrayLength(HArrayLength* instruction) { + LocationSummary* locations = instruction->GetLocations(); + uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); + XRegister obj = locations->InAt(0).AsRegister<XRegister>(); + XRegister out = locations->Out().AsRegister<XRegister>(); + __ Loadwu(out, obj, offset); // Unsigned for string length; does not matter for other arrays. + codegen_->MaybeRecordImplicitNullCheck(instruction); + // Mask out compression flag from String's array length. + if (mirror::kUseStringCompression && instruction->IsStringLength()) { + __ Srli(out, out, 1u); + } +} + +void LocationsBuilderRISCV64::VisitArraySet(HArraySet* instruction) { + bool needs_type_check = instruction->NeedsTypeCheck(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, + needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetInAt(2, ValueLocationForStore(instruction->GetValue())); +} + +void InstructionCodeGeneratorRISCV64::VisitArraySet(HArraySet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XRegister array = locations->InAt(0).AsRegister<XRegister>(); + Location index = locations->InAt(1); + Location value = locations->InAt(2); + DataType::Type value_type = instruction->GetComponentType(); + bool needs_type_check = instruction->NeedsTypeCheck(); + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); + size_t data_offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value(); + SlowPathCodeRISCV64* slow_path = nullptr; + + if (needs_write_barrier) { + DCHECK_EQ(value_type, DataType::Type::kReference); + DCHECK(!value.IsConstant()); + Riscv64Label do_store; + + bool can_value_be_null = instruction->GetValueCanBeNull(); + if (can_value_be_null) { + __ Beqz(value.AsRegister<XRegister>(), &do_store); + } + + if (needs_type_check) { + slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathRISCV64(instruction); + codegen_->AddSlowPath(slow_path); + + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + + ScratchRegisterScope srs(GetAssembler()); + XRegister temp1 = srs.AllocateXRegister(); + XRegister temp2 = srs.AllocateXRegister(); + + // Note that when read barriers are enabled, the type checks are performed + // without read barriers. This is fine, even in the case where a class object + // is in the from-space after the flip, as a comparison involving such a type + // would not produce a false positive; it may of course produce a false + // negative, in which case we would take the ArraySet slow path. + + // /* HeapReference<Class> */ temp1 = array->klass_ + __ Loadwu(temp1, array, class_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + codegen_->MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp2 = temp1->component_type_ + __ Loadwu(temp2, temp1, component_offset); + // /* HeapReference<Class> */ temp1 = value->klass_ + __ Loadwu(temp1, value.AsRegister<XRegister>(), class_offset); + // If heap poisoning is enabled, no need to unpoison `temp1` + // nor `temp2`, as we are comparing two poisoned references. + if (instruction->StaticTypeOfArrayIsObjectArray()) { + Riscv64Label do_put; + __ Beq(temp1, temp2, &do_put); + // If heap poisoning is enabled, the `temp2` reference has + // not been unpoisoned yet; unpoison it now. + codegen_->MaybeUnpoisonHeapReference(temp2); + + // /* HeapReference<Class> */ temp1 = temp2->super_class_ + __ Loadwu(temp1, temp2, super_offset); + // If heap poisoning is enabled, no need to unpoison + // `temp1`, as we are comparing against null below. + __ Bnez(temp1, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ Bne(temp1, temp2, slow_path->GetEntryLabel()); + } + } + + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck) + << " Already null checked so we shouldn't do it again."; + codegen_->MarkGCCard(array, value.AsRegister<XRegister>(), /* value_can_be_null= */ false); + } + + if (can_value_be_null) { + __ Bind(&do_store); + } + } + + if (index.IsConstant()) { + int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); + int32_t offset = data_offset + (const_index << DataType::SizeShift(value_type)); + Store(value, array, offset, value_type); + } else { + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + ShNAdd(tmp, index.AsRegister<XRegister>(), array, value_type); + Store(value, tmp, data_offset, value_type); + } + // There must be no instructions between the `Store()` and the `MaybeRecordImplicitNullCheck()`. + // We can avoid this if the type check makes the null check unconditionally. + DCHECK_IMPLIES(needs_type_check, needs_write_barrier); + if (!(needs_type_check && !instruction->GetValueCanBeNull())) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void LocationsBuilderRISCV64::VisitBelow(HBelow* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitBelow(HBelow* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitBelowOrEqual(HBelowOrEqual* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitBelowOrEqual(HBelowOrEqual* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitBooleanNot(HBooleanNot* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorRISCV64::VisitBooleanNot(HBooleanNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + __ Xori(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>(), 1); +} + +void LocationsBuilderRISCV64::VisitBoundsCheck(HBoundsCheck* instruction) { + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); + + HInstruction* index = instruction->InputAt(0); + HInstruction* length = instruction->InputAt(1); + + bool const_index = false; + bool const_length = false; + + if (length->IsConstant()) { + if (index->IsConstant()) { + const_index = true; + const_length = true; + } else { + int32_t length_value = length->AsIntConstant()->GetValue(); + if (length_value == 0 || length_value == 1) { + const_length = true; + } + } + } else if (index->IsConstant()) { + int32_t index_value = index->AsIntConstant()->GetValue(); + if (index_value <= 0) { + const_index = true; + } + } + + locations->SetInAt( + 0, + const_index ? Location::ConstantLocation(index) : Location::RequiresRegister()); + locations->SetInAt( + 1, + const_length ? Location::ConstantLocation(length) : Location::RequiresRegister()); +} + +void InstructionCodeGeneratorRISCV64::VisitBoundsCheck(HBoundsCheck* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location index_loc = locations->InAt(0); + Location length_loc = locations->InAt(1); + + if (length_loc.IsConstant()) { + int32_t length = length_loc.GetConstant()->AsIntConstant()->GetValue(); + if (index_loc.IsConstant()) { + int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue(); + if (index < 0 || index >= length) { + BoundsCheckSlowPathRISCV64* slow_path = + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathRISCV64(instruction); + codegen_->AddSlowPath(slow_path); + __ J(slow_path->GetEntryLabel()); + } else { + // Nothing to be done. + } + return; + } + + BoundsCheckSlowPathRISCV64* slow_path = + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathRISCV64(instruction); + codegen_->AddSlowPath(slow_path); + XRegister index = index_loc.AsRegister<XRegister>(); + if (length == 0) { + __ J(slow_path->GetEntryLabel()); + } else { + DCHECK_EQ(length, 1); + __ Bnez(index, slow_path->GetEntryLabel()); + } + } else { + XRegister length = length_loc.AsRegister<XRegister>(); + BoundsCheckSlowPathRISCV64* slow_path = + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathRISCV64(instruction); + codegen_->AddSlowPath(slow_path); + if (index_loc.IsConstant()) { + int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue(); + if (index < 0) { + __ J(slow_path->GetEntryLabel()); + } else { + DCHECK_EQ(index, 0); + __ Blez(length, slow_path->GetEntryLabel()); + } + } else { + XRegister index = index_loc.AsRegister<XRegister>(); + __ Bgeu(index, length, slow_path->GetEntryLabel()); + } + } +} + +void LocationsBuilderRISCV64::VisitBoundType([[maybe_unused]] HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorRISCV64::VisitBoundType([[maybe_unused]] HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + LOG(FATAL) << "Unreachable"; +} + +// Temp is used for read barrier. +static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) { + if (emit_read_barrier && + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + return 1; + } + return 0; +} + +// Interface case has 3 temps, one for holding the number of interfaces, one for the current +// interface pointer, one for loading the current interface. +// The other checks have one temp for loading the object's class and maybe a temp for read barrier. +static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) { + if (type_check_kind == TypeCheckKind::kInterfaceCheck) { + return 3; + } + return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind); +} + +void LocationsBuilderRISCV64::VisitCheckCast(HCheckCast* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } + locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind)); +} + +void InstructionCodeGeneratorRISCV64::VisitCheckCast(HCheckCast* instruction) { +TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); + XRegister obj = obj_loc.AsRegister<XRegister>(); + Location cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? Location::NoLocation() + : locations->InAt(1); + Location temp_loc = locations->GetTemp(0); + XRegister temp = temp_loc.AsRegister<XRegister>(); + const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind); + DCHECK_GE(num_temps, 1u); + DCHECK_LE(num_temps, 3u); + Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation(); + Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation(); + const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); + const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); + const uint32_t object_array_data_offset = + mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); + Riscv64Label done; + + bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction); + SlowPathCodeRISCV64* slow_path = + new (codegen_->GetScopedAllocator()) TypeCheckSlowPathRISCV64( + instruction, is_type_check_slow_path_fatal); + codegen_->AddSlowPath(slow_path); + + // Avoid this check if we know `obj` is not null. + if (instruction->MustDoNullCheck()) { + __ Beqz(obj, &done); + } + + switch (type_check_kind) { + case TypeCheckKind::kExactCheck: + case TypeCheckKind::kArrayCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + // Jump to slow path for throwing the exception or doing a + // more involved array check. + __ Bne(temp, cls.AsRegister<XRegister>(), slow_path->GetEntryLabel()); + break; + } + + case TypeCheckKind::kAbstractClassCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + // If the class is abstract, we eagerly fetch the super class of the + // object to avoid doing a comparison we know will fail. + Riscv64Label loop; + __ Bind(&loop); + // /* HeapReference<Class> */ temp = temp->super_class_ + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + super_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + // If the class reference currently in `temp` is null, jump to the slow path to throw the + // exception. + __ Beqz(temp, slow_path->GetEntryLabel()); + // Otherwise, compare the classes. + __ Bne(temp, cls.AsRegister<XRegister>(), &loop); + break; + } + + case TypeCheckKind::kClassHierarchyCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + // Walk over the class hierarchy to find a match. + Riscv64Label loop; + __ Bind(&loop); + __ Beq(temp, cls.AsRegister<XRegister>(), &done); + // /* HeapReference<Class> */ temp = temp->super_class_ + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + super_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + // If the class reference currently in `temp` is null, jump to the slow path to throw the + // exception. Otherwise, jump to the beginning of the loop. + __ Bnez(temp, &loop); + __ J(slow_path->GetEntryLabel()); + break; + } + + case TypeCheckKind::kArrayObjectCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + // Do an exact check. + __ Beq(temp, cls.AsRegister<XRegister>(), &done); + // Otherwise, we need to check that the object's class is a non-primitive array. + // /* HeapReference<Class> */ temp = temp->component_type_ + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + component_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + // If the component type is null, jump to the slow path to throw the exception. + __ Beqz(temp, slow_path->GetEntryLabel()); + // Otherwise, the object is indeed an array, further check that this component + // type is not a primitive type. + __ Loadhu(temp, temp, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Bnez(temp, slow_path->GetEntryLabel()); + break; + } + + case TypeCheckKind::kUnresolvedCheck: + // We always go into the type check slow path for the unresolved check case. + // We cannot directly call the CheckCast runtime entry point + // without resorting to a type checking slow path here (i.e. by + // calling InvokeRuntime directly), as it would require to + // assign fixed registers for the inputs of this HInstanceOf + // instruction (following the runtime calling convention), which + // might be cluttered by the potential first read barrier + // emission at the beginning of this method. + __ J(slow_path->GetEntryLabel()); + break; + + case TypeCheckKind::kInterfaceCheck: { + // Avoid read barriers to improve performance of the fast path. We can not get false + // positives by doing this. False negatives are handled by the slow path. + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + // /* HeapReference<Class> */ temp = temp->iftable_ + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + iftable_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + XRegister temp2 = maybe_temp2_loc.AsRegister<XRegister>(); + XRegister temp3 = maybe_temp3_loc.AsRegister<XRegister>(); + // Iftable is never null. + __ Loadw(temp2, temp, array_length_offset); + // Loop through the iftable and check if any class matches. + Riscv64Label loop; + __ Bind(&loop); + __ Beqz(temp2, slow_path->GetEntryLabel()); + __ Lwu(temp3, temp, object_array_data_offset); + codegen_->MaybeUnpoisonHeapReference(temp3); + // Go to next interface. + __ Addi(temp, temp, 2 * kHeapReferenceSize); + __ Addi(temp2, temp2, -2); + // Compare the classes and continue the loop if they do not match. + __ Bne(temp3, cls.AsRegister<XRegister>(), &loop); + break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ Bnez(temp, slow_path->GetEntryLabel()); + break; + } + } + + __ Bind(&done); + __ Bind(slow_path->GetExitLabel()); +} + +void LocationsBuilderRISCV64::VisitClassTableGet(HClassTableGet* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorRISCV64::VisitClassTableGet(HClassTableGet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XRegister in = locations->InAt(0).AsRegister<XRegister>(); + XRegister out = locations->Out().AsRegister<XRegister>(); + if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { + MemberOffset method_offset = + mirror::Class::EmbeddedVTableEntryOffset(instruction->GetIndex(), kRiscv64PointerSize); + __ Loadd(out, in, method_offset.SizeValue()); + } else { + uint32_t method_offset = dchecked_integral_cast<uint32_t>( + ImTable::OffsetOfElement(instruction->GetIndex(), kRiscv64PointerSize)); + __ Loadd(out, in, mirror::Class::ImtPtrOffset(kRiscv64PointerSize).Uint32Value()); + __ Loadd(out, out, method_offset); + } +} + +static int32_t GetExceptionTlsOffset() { + return Thread::ExceptionOffset<kRiscv64PointerSize>().Int32Value(); +} + +void LocationsBuilderRISCV64::VisitClearException(HClearException* instruction) { + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); +} + +void InstructionCodeGeneratorRISCV64::VisitClearException( + [[maybe_unused]] HClearException* instruction) { + __ Stored(Zero, TR, GetExceptionTlsOffset()); +} + +void LocationsBuilderRISCV64::VisitClinitCheck(HClinitCheck* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnSlowPath); + locations->SetInAt(0, Location::RequiresRegister()); + if (instruction->HasUses()) { + locations->SetOut(Location::SameAsFirstInput()); + } + // Rely on the type initialization to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); +} + +void InstructionCodeGeneratorRISCV64::VisitClinitCheck(HClinitCheck* instruction) { + // We assume the class is not null. + SlowPathCodeRISCV64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathRISCV64( + instruction->GetLoadClass(), instruction); + codegen_->AddSlowPath(slow_path); + GenerateClassInitializationCheck(slow_path, + instruction->GetLocations()->InAt(0).AsRegister<XRegister>()); +} + +void LocationsBuilderRISCV64::VisitCompare(HCompare* instruction) { + DataType::Type in_type = instruction->InputAt(0)->GetType(); + + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + + switch (in_type) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, RegisterOrZeroBitPatternLocation(instruction->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + default: + LOG(FATAL) << "Unexpected type for compare operation " << in_type; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorRISCV64::VisitCompare(HCompare* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XRegister result = locations->Out().AsRegister<XRegister>(); + DataType::Type in_type = instruction->InputAt(0)->GetType(); + + // 0 if: left == right + // 1 if: left > right + // -1 if: left < right + switch (in_type) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: { + XRegister left = locations->InAt(0).AsRegister<XRegister>(); + XRegister right = InputXRegisterOrZero(locations->InAt(1)); + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + __ Slt(tmp, left, right); + __ Slt(result, right, left); + __ Sub(result, result, tmp); + break; + } + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + FRegister left = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister right = locations->InAt(1).AsFpuRegister<FRegister>(); + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + if (instruction->IsGtBias()) { + // ((FLE l,r) ^ 1) - (FLT l,r); see `GenerateFpCondition()`. + FLe(tmp, left, right, in_type); + FLt(result, left, right, in_type); + __ Xori(tmp, tmp, 1); + __ Sub(result, tmp, result); + } else { + // ((FLE r,l) - 1) + (FLT r,l); see `GenerateFpCondition()`. + FLe(tmp, right, left, in_type); + FLt(result, right, left, in_type); + __ Addi(tmp, tmp, -1); + __ Add(result, result, tmp); + } + break; + } + + default: + LOG(FATAL) << "Unimplemented compare type " << in_type; + } +} + +void LocationsBuilderRISCV64::VisitConstructorFence(HConstructorFence* instruction) { + instruction->SetLocations(nullptr); +} + +void InstructionCodeGeneratorRISCV64::VisitConstructorFence( + [[maybe_unused]] HConstructorFence* instruction) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + +void LocationsBuilderRISCV64::VisitCurrentMethod(HCurrentMethod* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(Location::RegisterLocation(kArtMethodRegister)); +} + +void InstructionCodeGeneratorRISCV64::VisitCurrentMethod( + [[maybe_unused]] HCurrentMethod* instruction) { + // Nothing to do, the method is already at its location. +} + +void LocationsBuilderRISCV64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorRISCV64::VisitShouldDeoptimizeFlag( + HShouldDeoptimizeFlag* instruction) { + __ Loadw(instruction->GetLocations()->Out().AsRegister<XRegister>(), + SP, + codegen_->GetStackOffsetOfShouldDeoptimizeFlag()); +} + +void LocationsBuilderRISCV64::VisitDeoptimize(HDeoptimize* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); + if (IsBooleanValueOrMaterializedCondition(instruction->InputAt(0))) { + locations->SetInAt(0, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorRISCV64::VisitDeoptimize(HDeoptimize* instruction) { + SlowPathCodeRISCV64* slow_path = + deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathRISCV64>(instruction); + GenerateTestAndBranch(instruction, + /* condition_input_index= */ 0, + slow_path->GetEntryLabel(), + /* false_target= */ nullptr); +} + +void LocationsBuilderRISCV64::VisitDiv(HDiv* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + switch (instruction->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + + default: + LOG(FATAL) << "Unexpected div type " << instruction->GetResultType(); + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorRISCV64::VisitDiv(HDiv* instruction) { + DataType::Type type = instruction->GetType(); + LocationSummary* locations = instruction->GetLocations(); + + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateDivRemIntegral(instruction); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + FRegister dst = locations->Out().AsFpuRegister<FRegister>(); + FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); + FDiv(dst, lhs, rhs, type); + break; + } + default: + LOG(FATAL) << "Unexpected div type " << type; + UNREACHABLE(); + } +} + +void LocationsBuilderRISCV64::VisitDivZeroCheck(HDivZeroCheck* instruction) { + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); + locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); +} + +void InstructionCodeGeneratorRISCV64::VisitDivZeroCheck(HDivZeroCheck* instruction) { + SlowPathCodeRISCV64* slow_path = + new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathRISCV64(instruction); + codegen_->AddSlowPath(slow_path); + Location value = instruction->GetLocations()->InAt(0); + + DataType::Type type = instruction->GetType(); + + if (!DataType::IsIntegralType(type)) { + LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck."; + UNREACHABLE(); + } + + if (value.IsConstant()) { + int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant()); + if (divisor == 0) { + __ J(slow_path->GetEntryLabel()); + } else { + // A division by a non-null constant is valid. We don't need to perform + // any check, so simply fall through. + } + } else { + __ Beqz(value.AsRegister<XRegister>(), slow_path->GetEntryLabel()); + } +} + +void LocationsBuilderRISCV64::VisitDoubleConstant(HDoubleConstant* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(instruction)); +} + +void InstructionCodeGeneratorRISCV64::VisitDoubleConstant( + [[maybe_unused]] HDoubleConstant* instruction) { + // Will be generated at use site. +} + +void LocationsBuilderRISCV64::VisitEqual(HEqual* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitEqual(HEqual* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitExit(HExit* instruction) { + instruction->SetLocations(nullptr); +} + +void InstructionCodeGeneratorRISCV64::VisitExit([[maybe_unused]] HExit* instruction) {} + +void LocationsBuilderRISCV64::VisitFloatConstant(HFloatConstant* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(instruction)); +} + +void InstructionCodeGeneratorRISCV64::VisitFloatConstant( + [[maybe_unused]] HFloatConstant* instruction) { + // Will be generated at use site. +} + +void LocationsBuilderRISCV64::VisitGoto(HGoto* instruction) { + instruction->SetLocations(nullptr); +} + +void InstructionCodeGeneratorRISCV64::VisitGoto(HGoto* instruction) { + HandleGoto(instruction, instruction->GetSuccessor()); +} + +void LocationsBuilderRISCV64::VisitGreaterThan(HGreaterThan* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitGreaterThan(HGreaterThan* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitIf(HIf* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + if (IsBooleanValueOrMaterializedCondition(instruction->InputAt(0))) { + locations->SetInAt(0, Location::RequiresRegister()); + if (GetGraph()->IsCompilingBaseline() && + codegen_->GetCompilerOptions().ProfileBranches() && + !Runtime::Current()->IsAotCompiler()) { + DCHECK(instruction->InputAt(0)->IsCondition()); + ProfilingInfo* info = GetGraph()->GetProfilingInfo(); + DCHECK(info != nullptr); + BranchCache* cache = info->GetBranchCache(instruction->GetDexPc()); + if (cache != nullptr) { + locations->AddTemp(Location::RequiresRegister()); + } + } + } +} + +void InstructionCodeGeneratorRISCV64::VisitIf(HIf* instruction) { + HBasicBlock* true_successor = instruction->IfTrueSuccessor(); + HBasicBlock* false_successor = instruction->IfFalseSuccessor(); + Riscv64Label* true_target = codegen_->GoesToNextBlock(instruction->GetBlock(), true_successor) + ? nullptr + : codegen_->GetLabelOf(true_successor); + Riscv64Label* false_target = codegen_->GoesToNextBlock(instruction->GetBlock(), false_successor) + ? nullptr + : codegen_->GetLabelOf(false_successor); + if (IsBooleanValueOrMaterializedCondition(instruction->InputAt(0))) { + if (GetGraph()->IsCompilingBaseline() && + codegen_->GetCompilerOptions().ProfileBranches() && + !Runtime::Current()->IsAotCompiler()) { + DCHECK(instruction->InputAt(0)->IsCondition()); + ProfilingInfo* info = GetGraph()->GetProfilingInfo(); + DCHECK(info != nullptr); + BranchCache* cache = info->GetBranchCache(instruction->GetDexPc()); + // Currently, not all If branches are profiled. + if (cache != nullptr) { + uint64_t address = + reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value(); + static_assert( + BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2, + "Unexpected offsets for BranchCache"); + Riscv64Label done; + XRegister condition = instruction->GetLocations()->InAt(0).AsRegister<XRegister>(); + XRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<XRegister>(); + __ LoadConst64(temp, address); + __ Sh1Add(temp, condition, temp); + ScratchRegisterScope srs(GetAssembler()); + XRegister counter = srs.AllocateXRegister(); + __ Loadhu(counter, temp, 0); + __ Addi(counter, counter, 1); + { + ScratchRegisterScope srs2(GetAssembler()); + XRegister overflow = srs2.AllocateXRegister(); + __ Srli(overflow, counter, 16); + __ Bnez(overflow, &done); + } + __ Storeh(counter, temp, 0); + __ Bind(&done); + } + } + } + GenerateTestAndBranch(instruction, /* condition_input_index= */ 0, true_target, false_target); +} + +void LocationsBuilderRISCV64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderRISCV64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); +} + +void LocationsBuilderRISCV64::VisitInstanceOf(HInstanceOf* instruction) { + LocationSummary::CallKind call_kind = LocationSummary::kNoCall; + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; + switch (type_check_kind) { + case TypeCheckKind::kExactCheck: + case TypeCheckKind::kAbstractClassCheck: + case TypeCheckKind::kClassHierarchyCheck: + case TypeCheckKind::kArrayObjectCheck: { + bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction); + call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; + break; + } + case TypeCheckKind::kArrayCheck: + case TypeCheckKind::kUnresolvedCheck: + case TypeCheckKind::kInterfaceCheck: + call_kind = LocationSummary::kCallOnSlowPath; + break; + case TypeCheckKind::kBitstringCheck: + break; + } + + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + locations->SetInAt(0, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } + // The output does overlap inputs. + // Note that TypeCheckSlowPathRISCV64 uses this register too. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + locations->AddRegisterTemps( + NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind)); +} + +void InstructionCodeGeneratorRISCV64::VisitInstanceOf(HInstanceOf* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); + XRegister obj = obj_loc.AsRegister<XRegister>(); + Location cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? Location::NoLocation() + : locations->InAt(1); + Location out_loc = locations->Out(); + XRegister out = out_loc.AsRegister<XRegister>(); + const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind); + DCHECK_LE(num_temps, 1u); + Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + Riscv64Label done; + SlowPathCodeRISCV64* slow_path = nullptr; + + // Return 0 if `obj` is null. + // Avoid this check if we know `obj` is not null. + if (instruction->MustDoNullCheck()) { + __ Mv(out, Zero); + __ Beqz(obj, &done); + } + + switch (type_check_kind) { + case TypeCheckKind::kExactCheck: { + ReadBarrierOption read_barrier_option = + codegen_->ReadBarrierOptionForInstanceOf(instruction); + // /* HeapReference<Class> */ out = obj->klass_ + GenerateReferenceLoadTwoRegisters( + instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, read_barrier_option); + // Classes must be equal for the instanceof to succeed. + __ Xor(out, out, cls.AsRegister<XRegister>()); + __ Seqz(out, out); + break; + } + + case TypeCheckKind::kAbstractClassCheck: { + ReadBarrierOption read_barrier_option = + codegen_->ReadBarrierOptionForInstanceOf(instruction); + // /* HeapReference<Class> */ out = obj->klass_ + GenerateReferenceLoadTwoRegisters( + instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, read_barrier_option); + // If the class is abstract, we eagerly fetch the super class of the + // object to avoid doing a comparison we know will fail. + Riscv64Label loop; + __ Bind(&loop); + // /* HeapReference<Class> */ out = out->super_class_ + GenerateReferenceLoadOneRegister( + instruction, out_loc, super_offset, maybe_temp_loc, read_barrier_option); + // If `out` is null, we use it for the result, and jump to `done`. + __ Beqz(out, &done); + __ Bne(out, cls.AsRegister<XRegister>(), &loop); + __ LoadConst32(out, 1); + break; + } + + case TypeCheckKind::kClassHierarchyCheck: { + ReadBarrierOption read_barrier_option = + codegen_->ReadBarrierOptionForInstanceOf(instruction); + // /* HeapReference<Class> */ out = obj->klass_ + GenerateReferenceLoadTwoRegisters( + instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, read_barrier_option); + // Walk over the class hierarchy to find a match. + Riscv64Label loop, success; + __ Bind(&loop); + __ Beq(out, cls.AsRegister<XRegister>(), &success); + // /* HeapReference<Class> */ out = out->super_class_ + GenerateReferenceLoadOneRegister( + instruction, out_loc, super_offset, maybe_temp_loc, read_barrier_option); + __ Bnez(out, &loop); + // If `out` is null, we use it for the result, and jump to `done`. + __ J(&done); + __ Bind(&success); + __ LoadConst32(out, 1); + break; + } + + case TypeCheckKind::kArrayObjectCheck: { + ReadBarrierOption read_barrier_option = + codegen_->ReadBarrierOptionForInstanceOf(instruction); + // FIXME(riscv64): We currently have marking entrypoints for 29 registers. + // We need to either store entrypoint for register `N` in entry `N-A` where + // `A` can be up to 5 (Zero, RA, SP, GP, TP are not valid registers for + // marking), or define two more entrypoints, or request an additional temp + // from the register allocator instead of using a scratch register. + ScratchRegisterScope srs(GetAssembler()); + Location tmp = Location::RegisterLocation(srs.AllocateXRegister()); + // /* HeapReference<Class> */ tmp = obj->klass_ + GenerateReferenceLoadTwoRegisters( + instruction, tmp, obj_loc, class_offset, maybe_temp_loc, read_barrier_option); + // Do an exact check. + __ LoadConst32(out, 1); + __ Beq(tmp.AsRegister<XRegister>(), cls.AsRegister<XRegister>(), &done); + // Otherwise, we need to check that the object's class is a non-primitive array. + // /* HeapReference<Class> */ out = out->component_type_ + GenerateReferenceLoadTwoRegisters( + instruction, out_loc, tmp, component_offset, maybe_temp_loc, read_barrier_option); + // If `out` is null, we use it for the result, and jump to `done`. + __ Beqz(out, &done); + __ Loadhu(out, out, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Seqz(out, out); + break; + } + + case TypeCheckKind::kArrayCheck: { + // No read barrier since the slow path will retry upon failure. + // /* HeapReference<Class> */ out = obj->klass_ + GenerateReferenceLoadTwoRegisters( + instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, kWithoutReadBarrier); + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (codegen_->GetScopedAllocator()) + TypeCheckSlowPathRISCV64(instruction, /* is_fatal= */ false); + codegen_->AddSlowPath(slow_path); + __ Bne(out, cls.AsRegister<XRegister>(), slow_path->GetEntryLabel()); + __ LoadConst32(out, 1); + break; + } + + case TypeCheckKind::kUnresolvedCheck: + case TypeCheckKind::kInterfaceCheck: { + // Note that we indeed only call on slow path, but we always go + // into the slow path for the unresolved and interface check + // cases. + // + // We cannot directly call the InstanceofNonTrivial runtime + // entry point without resorting to a type checking slow path + // here (i.e. by calling InvokeRuntime directly), as it would + // require to assign fixed registers for the inputs of this + // HInstanceOf instruction (following the runtime calling + // convention), which might be cluttered by the potential first + // read barrier emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathRISCV64( + instruction, /* is_fatal= */ false); + codegen_->AddSlowPath(slow_path); + __ J(slow_path->GetEntryLabel()); + break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters( + instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ Beqz(out, out); + break; + } + } + + __ Bind(&done); + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void LocationsBuilderRISCV64::VisitIntConstant(HIntConstant* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetOut(Location::ConstantLocation(instruction)); +} + +void InstructionCodeGeneratorRISCV64::VisitIntConstant([[maybe_unused]] HIntConstant* instruction) { + // Will be generated at use site. +} + +void LocationsBuilderRISCV64::VisitIntermediateAddress(HIntermediateAddress* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitIntermediateAddress(HIntermediateAddress* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitInvokeUnresolved(HInvokeUnresolved* instruction) { + // The trampoline uses the same calling convention as dex calling conventions, except + // instead of loading arg0/A0 with the target Method*, arg0/A0 will contain the method_idx. + HandleInvoke(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitInvokeUnresolved(HInvokeUnresolved* instruction) { + codegen_->GenerateInvokeUnresolvedRuntimeCall(instruction); +} + +void LocationsBuilderRISCV64::VisitInvokeInterface(HInvokeInterface* instruction) { + HandleInvoke(instruction); + // Use T0 as the hidden argument for `art_quick_imt_conflict_trampoline`. + if (instruction->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) { + instruction->GetLocations()->SetInAt(instruction->GetNumberOfArguments() - 1, + Location::RegisterLocation(T0)); + } else { + instruction->GetLocations()->AddTemp(Location::RegisterLocation(T0)); + } +} + +void InstructionCodeGeneratorRISCV64::VisitInvokeInterface(HInvokeInterface* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XRegister temp = locations->GetTemp(0).AsRegister<XRegister>(); + XRegister receiver = locations->InAt(0).AsRegister<XRegister>(); + int32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kRiscv64PointerSize); + + // /* HeapReference<Class> */ temp = receiver->klass_ + __ Loadwu(temp, receiver, class_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). + codegen_->MaybeUnpoisonHeapReference(temp); + + // If we're compiling baseline, update the inline cache. + codegen_->MaybeGenerateInlineCacheCheck(instruction, temp); + + // The register T0 is required to be used for the hidden argument in + // `art_quick_imt_conflict_trampoline`. + if (instruction->GetHiddenArgumentLoadKind() != MethodLoadKind::kRecursive && + instruction->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) { + Location hidden_reg = instruction->GetLocations()->GetTemp(1); + // Load the resolved interface method in the hidden argument register T0. + DCHECK_EQ(T0, hidden_reg.AsRegister<XRegister>()); + codegen_->LoadMethod(instruction->GetHiddenArgumentLoadKind(), hidden_reg, instruction); + } + + __ Loadd(temp, temp, mirror::Class::ImtPtrOffset(kRiscv64PointerSize).Uint32Value()); + uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( + instruction->GetImtIndex(), kRiscv64PointerSize)); + // temp = temp->GetImtEntryAt(method_offset); + __ Loadd(temp, temp, method_offset); + if (instruction->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) { + // We pass the method from the IMT in case of a conflict. This will ensure + // we go into the runtime to resolve the actual method. + Location hidden_reg = instruction->GetLocations()->GetTemp(1); + DCHECK_EQ(T0, hidden_reg.AsRegister<XRegister>()); + __ Mv(hidden_reg.AsRegister<XRegister>(), temp); + } + // RA = temp->GetEntryPoint(); + __ Loadd(RA, temp, entry_point.Int32Value()); + + // RA(); + __ Jalr(RA); + DCHECK(!codegen_->IsLeafMethod()); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); +} + +void LocationsBuilderRISCV64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) { + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!instruction->IsStaticWithExplicitClinitCheck()); + + IntrinsicLocationsBuilderRISCV64 intrinsic(GetGraph()->GetAllocator(), codegen_); + if (intrinsic.TryDispatch(instruction)) { + return; + } + + if (instruction->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) { + CriticalNativeCallingConventionVisitorRiscv64 calling_convention_visitor( + /*for_register_allocation=*/ true); + CodeGenerator::CreateCommonInvokeLocationSummary(instruction, &calling_convention_visitor); + } else { + HandleInvoke(instruction); + } +} + +static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorRISCV64* codegen) { + if (invoke->GetLocations()->Intrinsified()) { + IntrinsicCodeGeneratorRISCV64 intrinsic(codegen); + intrinsic.Dispatch(invoke); + return true; + } + return false; +} + +void InstructionCodeGeneratorRISCV64::VisitInvokeStaticOrDirect( + HInvokeStaticOrDirect* instruction) { + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!instruction->IsStaticWithExplicitClinitCheck()); + + if (TryGenerateIntrinsicCode(instruction, codegen_)) { + return; + } + + LocationSummary* locations = instruction->GetLocations(); + codegen_->GenerateStaticOrDirectCall( + instruction, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); +} + +void LocationsBuilderRISCV64::VisitInvokeVirtual(HInvokeVirtual* instruction) { + IntrinsicLocationsBuilderRISCV64 intrinsic(GetGraph()->GetAllocator(), codegen_); + if (intrinsic.TryDispatch(instruction)) { + return; + } + + HandleInvoke(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitInvokeVirtual(HInvokeVirtual* instruction) { + if (TryGenerateIntrinsicCode(instruction, codegen_)) { + return; + } + + codegen_->GenerateVirtualCall(instruction, instruction->GetLocations()->GetTemp(0)); + DCHECK(!codegen_->IsLeafMethod()); +} + +void LocationsBuilderRISCV64::VisitInvokePolymorphic(HInvokePolymorphic* instruction) { + IntrinsicLocationsBuilderRISCV64 intrinsic(GetGraph()->GetAllocator(), codegen_); + if (intrinsic.TryDispatch(instruction)) { + return; + } + HandleInvoke(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitInvokePolymorphic(HInvokePolymorphic* instruction) { + if (TryGenerateIntrinsicCode(instruction, codegen_)) { + return; + } + codegen_->GenerateInvokePolymorphicCall(instruction); +} + +void LocationsBuilderRISCV64::VisitInvokeCustom(HInvokeCustom* instruction) { + HandleInvoke(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitInvokeCustom(HInvokeCustom* instruction) { + codegen_->GenerateInvokeCustomCall(instruction); +} + +void LocationsBuilderRISCV64::VisitLessThan(HLessThan* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitLessThan(HLessThan* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitLessThanOrEqual(HLessThanOrEqual* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitLessThanOrEqual(HLessThanOrEqual* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitLoadClass(HLoadClass* instruction) { + HLoadClass::LoadKind load_kind = instruction->GetLoadKind(); + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { + InvokeRuntimeCallingConvention calling_convention; + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + DCHECK_EQ(DataType::Type::kReference, instruction->GetType()); + DCHECK(loc.Equals(calling_convention.GetReturnLocation(DataType::Type::kReference))); + CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(instruction, loc, loc); + return; + } + DCHECK_EQ(instruction->NeedsAccessCheck(), + load_kind == HLoadClass::LoadKind::kBssEntryPublic || + load_kind == HLoadClass::LoadKind::kBssEntryPackage); + + const bool requires_read_barrier = !instruction->IsInBootImage() && codegen_->EmitReadBarrier(); + LocationSummary::CallKind call_kind = (instruction->NeedsEnvironment() || requires_read_barrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); + if (kUseBakerReadBarrier && requires_read_barrier && !instruction->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + if (load_kind == HLoadClass::LoadKind::kReferrersClass) { + locations->SetInAt(0, Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadClass::LoadKind::kBssEntry || + load_kind == HLoadClass::LoadKind::kBssEntryPublic || + load_kind == HLoadClass::LoadKind::kBssEntryPackage) { + if (codegen_->EmitNonBakerReadBarrier()) { + // For non-Baker read barriers we have a temp-clobbering call. + } else { + // Rely on the type resolution or initialization and marking to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); + } + } +} + +// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not +// move. +void InstructionCodeGeneratorRISCV64::VisitLoadClass(HLoadClass* instruction) + NO_THREAD_SAFETY_ANALYSIS { + HLoadClass::LoadKind load_kind = instruction->GetLoadKind(); + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { + codegen_->GenerateLoadClassRuntimeCall(instruction); + return; + } + DCHECK_EQ(instruction->NeedsAccessCheck(), + load_kind == HLoadClass::LoadKind::kBssEntryPublic || + load_kind == HLoadClass::LoadKind::kBssEntryPackage); + + LocationSummary* locations = instruction->GetLocations(); + Location out_loc = locations->Out(); + XRegister out = out_loc.AsRegister<XRegister>(); + const ReadBarrierOption read_barrier_option = + instruction->IsInBootImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption(); + bool generate_null_check = false; + switch (load_kind) { + case HLoadClass::LoadKind::kReferrersClass: { + DCHECK(!instruction->CanCallRuntime()); + DCHECK(!instruction->MustGenerateClinitCheck()); + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + XRegister current_method = locations->InAt(0).AsRegister<XRegister>(); + codegen_->GenerateGcRootFieldLoad(instruction, + out_loc, + current_method, + ArtMethod::DeclaringClassOffset().Int32Value(), + read_barrier_option); + break; + } + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(codegen_->GetCompilerOptions().IsBootImage() || + codegen_->GetCompilerOptions().IsBootImageExtension()); + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = + codegen_->NewBootImageTypePatch(instruction->GetDexFile(), instruction->GetTypeIndex()); + codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = + codegen_->NewBootImageTypePatch( + instruction->GetDexFile(), instruction->GetTypeIndex(), info_high); + codegen_->EmitPcRelativeAddiPlaceholder(info_low, out, out); + break; + } + case HLoadClass::LoadKind::kBootImageRelRo: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(instruction); + codegen_->LoadBootImageRelRoEntry(out, boot_image_offset); + break; + } + case HLoadClass::LoadKind::kBssEntry: + case HLoadClass::LoadKind::kBssEntryPublic: + case HLoadClass::LoadKind::kBssEntryPackage: { + CodeGeneratorRISCV64::PcRelativePatchInfo* bss_info_high = + codegen_->NewTypeBssEntryPatch(instruction); + codegen_->EmitPcRelativeAuipcPlaceholder(bss_info_high, out); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = codegen_->NewTypeBssEntryPatch( + instruction, bss_info_high); + codegen_->GenerateGcRootFieldLoad(instruction, + out_loc, + out, + /* offset= */ kLinkTimeOffsetPlaceholderLow, + read_barrier_option, + &info_low->label); + generate_null_check = true; + break; + } + case HLoadClass::LoadKind::kJitBootImageAddress: { + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + uint32_t address = reinterpret_cast32<uint32_t>(instruction->GetClass().Get()); + DCHECK_NE(address, 0u); + __ Loadwu(out, codegen_->DeduplicateBootImageAddressLiteral(address)); + break; + } + case HLoadClass::LoadKind::kJitTableAddress: + __ Loadwu(out, codegen_->DeduplicateJitClassLiteral(instruction->GetDexFile(), + instruction->GetTypeIndex(), + instruction->GetClass())); + codegen_->GenerateGcRootFieldLoad( + instruction, out_loc, out, /* offset= */ 0, read_barrier_option); + break; + case HLoadClass::LoadKind::kRuntimeCall: + case HLoadClass::LoadKind::kInvalid: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } + + if (generate_null_check || instruction->MustGenerateClinitCheck()) { + DCHECK(instruction->CanCallRuntime()); + SlowPathCodeRISCV64* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathRISCV64(instruction, instruction); + codegen_->AddSlowPath(slow_path); + if (generate_null_check) { + __ Beqz(out, slow_path->GetEntryLabel()); + } + if (instruction->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } + } +} + +void LocationsBuilderRISCV64::VisitLoadException(HLoadException* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorRISCV64::VisitLoadException(HLoadException* instruction) { + XRegister out = instruction->GetLocations()->Out().AsRegister<XRegister>(); + __ Loadwu(out, TR, GetExceptionTlsOffset()); +} + +void LocationsBuilderRISCV64::VisitLoadMethodHandle(HLoadMethodHandle* instruction) { + InvokeRuntimeCallingConvention calling_convention; + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(instruction, loc, loc); +} + +void InstructionCodeGeneratorRISCV64::VisitLoadMethodHandle(HLoadMethodHandle* instruction) { + codegen_->GenerateLoadMethodHandleRuntimeCall(instruction); +} + +void LocationsBuilderRISCV64::VisitLoadMethodType(HLoadMethodType* instruction) { + InvokeRuntimeCallingConvention calling_convention; + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(instruction, loc, loc); +} + +void InstructionCodeGeneratorRISCV64::VisitLoadMethodType(HLoadMethodType* instruction) { + codegen_->GenerateLoadMethodTypeRuntimeCall(instruction); +} + +void LocationsBuilderRISCV64::VisitLoadString(HLoadString* instruction) { + HLoadString::LoadKind load_kind = instruction->GetLoadKind(); + LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(instruction); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); + if (load_kind == HLoadString::LoadKind::kRuntimeCall) { + InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(DataType::Type::kReference, instruction->GetType()); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); + } else { + locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kBssEntry) { + if (codegen_->EmitNonBakerReadBarrier()) { + // For non-Baker read barriers we have a temp-clobbering call. + } else { + // Rely on the pResolveString and marking to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); + } + } + } +} + +// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not +// move. +void InstructionCodeGeneratorRISCV64::VisitLoadString(HLoadString* instruction) + NO_THREAD_SAFETY_ANALYSIS { + HLoadString::LoadKind load_kind = instruction->GetLoadKind(); + LocationSummary* locations = instruction->GetLocations(); + Location out_loc = locations->Out(); + XRegister out = out_loc.AsRegister<XRegister>(); + + switch (load_kind) { + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(codegen_->GetCompilerOptions().IsBootImage() || + codegen_->GetCompilerOptions().IsBootImageExtension()); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = codegen_->NewBootImageStringPatch( + instruction->GetDexFile(), instruction->GetStringIndex()); + codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = codegen_->NewBootImageStringPatch( + instruction->GetDexFile(), instruction->GetStringIndex(), info_high); + codegen_->EmitPcRelativeAddiPlaceholder(info_low, out, out); + return; + } + case HLoadString::LoadKind::kBootImageRelRo: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(instruction); + codegen_->LoadBootImageRelRoEntry(out, boot_image_offset); + return; + } + case HLoadString::LoadKind::kBssEntry: { + CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = codegen_->NewStringBssEntryPatch( + instruction->GetDexFile(), instruction->GetStringIndex()); + codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = codegen_->NewStringBssEntryPatch( + instruction->GetDexFile(), instruction->GetStringIndex(), info_high); + codegen_->GenerateGcRootFieldLoad(instruction, + out_loc, + out, + /* offset= */ kLinkTimeOffsetPlaceholderLow, + codegen_->GetCompilerReadBarrierOption(), + &info_low->label); + SlowPathCodeRISCV64* slow_path = + new (codegen_->GetScopedAllocator()) LoadStringSlowPathRISCV64(instruction); + codegen_->AddSlowPath(slow_path); + __ Beqz(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + case HLoadString::LoadKind::kJitBootImageAddress: { + uint32_t address = reinterpret_cast32<uint32_t>(instruction->GetString().Get()); + DCHECK_NE(address, 0u); + __ Loadwu(out, codegen_->DeduplicateBootImageAddressLiteral(address)); + return; + } + case HLoadString::LoadKind::kJitTableAddress: + __ Loadwu( + out, + codegen_->DeduplicateJitStringLiteral( + instruction->GetDexFile(), instruction->GetStringIndex(), instruction->GetString())); + codegen_->GenerateGcRootFieldLoad( + instruction, out_loc, out, 0, codegen_->GetCompilerReadBarrierOption()); + return; + default: + break; + } + + DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall); + InvokeRuntimeCallingConvention calling_convention; + DCHECK(calling_convention.GetReturnLocation(DataType::Type::kReference).Equals(out_loc)); + __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetStringIndex().index_); + codegen_->InvokeRuntime(kQuickResolveString, instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); +} + +void LocationsBuilderRISCV64::VisitLongConstant(HLongConstant* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetOut(Location::ConstantLocation(instruction)); +} + +void InstructionCodeGeneratorRISCV64::VisitLongConstant( + [[maybe_unused]] HLongConstant* instruction) { + // Will be generated at use site. +} + +void LocationsBuilderRISCV64::VisitMax(HMax* instruction) { + HandleBinaryOp(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitMax(HMax* instruction) { + HandleBinaryOp(instruction); +} + +void LocationsBuilderRISCV64::VisitMemoryBarrier(HMemoryBarrier* instruction) { + instruction->SetLocations(nullptr); +} + +void InstructionCodeGeneratorRISCV64::VisitMemoryBarrier(HMemoryBarrier* instruction) { + codegen_->GenerateMemoryBarrier(instruction->GetBarrierKind()); +} + +void LocationsBuilderRISCV64::VisitMethodEntryHook(HMethodEntryHook* instruction) { + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); +} + +void InstructionCodeGeneratorRISCV64::VisitMethodEntryHook(HMethodEntryHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + +void LocationsBuilderRISCV64::VisitMethodExitHook(HMethodExitHook* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + DataType::Type return_type = instruction->InputAt(0)->GetType(); + locations->SetInAt(0, Riscv64ReturnLocation(return_type)); +} + +void InstructionCodeGeneratorRISCV64::VisitMethodExitHook(HMethodExitHook* instruction) { + DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable()); + DCHECK(codegen_->RequiresCurrentMethod()); + GenerateMethodEntryExitHook(instruction); +} + +void LocationsBuilderRISCV64::VisitMin(HMin* instruction) { + HandleBinaryOp(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitMin(HMin* instruction) { + HandleBinaryOp(instruction); +} + +void LocationsBuilderRISCV64::VisitMonitorOperation(HMonitorOperation* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); +} + +void InstructionCodeGeneratorRISCV64::VisitMonitorOperation(HMonitorOperation* instruction) { + codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject, + instruction, + instruction->GetDexPc()); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } +} + +void LocationsBuilderRISCV64::VisitMul(HMul* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + switch (instruction->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + + default: + LOG(FATAL) << "Unexpected mul type " << instruction->GetResultType(); + } +} + +void InstructionCodeGeneratorRISCV64::VisitMul(HMul* instruction) { + LocationSummary* locations = instruction->GetLocations(); + switch (instruction->GetResultType()) { + case DataType::Type::kInt32: + __ Mulw(locations->Out().AsRegister<XRegister>(), + locations->InAt(0).AsRegister<XRegister>(), + locations->InAt(1).AsRegister<XRegister>()); + break; + + case DataType::Type::kInt64: + __ Mul(locations->Out().AsRegister<XRegister>(), + locations->InAt(0).AsRegister<XRegister>(), + locations->InAt(1).AsRegister<XRegister>()); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + FMul(locations->Out().AsFpuRegister<FRegister>(), + locations->InAt(0).AsFpuRegister<FRegister>(), + locations->InAt(1).AsFpuRegister<FRegister>(), + instruction->GetResultType()); + break; + + default: + LOG(FATAL) << "Unexpected mul type " << instruction->GetResultType(); + } +} + +void LocationsBuilderRISCV64::VisitNeg(HNeg* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + switch (instruction->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + + default: + LOG(FATAL) << "Unexpected neg type " << instruction->GetResultType(); + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorRISCV64::VisitNeg(HNeg* instruction) { + LocationSummary* locations = instruction->GetLocations(); + switch (instruction->GetResultType()) { + case DataType::Type::kInt32: + __ NegW(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>()); + break; + + case DataType::Type::kInt64: + __ Neg(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>()); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + FNeg(locations->Out().AsFpuRegister<FRegister>(), + locations->InAt(0).AsFpuRegister<FRegister>(), + instruction->GetResultType()); + break; + + default: + LOG(FATAL) << "Unexpected neg type " << instruction->GetResultType(); + UNREACHABLE(); + } +} + +void LocationsBuilderRISCV64::VisitNewArray(HNewArray* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + InvokeRuntimeCallingConvention calling_convention; + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); +} + +void InstructionCodeGeneratorRISCV64::VisitNewArray(HNewArray* instruction) { + QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); + codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); + DCHECK(!codegen_->IsLeafMethod()); +} + +void LocationsBuilderRISCV64::VisitNewInstance(HNewInstance* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); +} + +void InstructionCodeGeneratorRISCV64::VisitNewInstance(HNewInstance* instruction) { + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + +void LocationsBuilderRISCV64::VisitNop(HNop* instruction) { + new (GetGraph()->GetAllocator()) LocationSummary(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitNop([[maybe_unused]] HNop* instruction) { + // The environment recording already happened in CodeGenerator::Compile. +} + +void LocationsBuilderRISCV64::VisitNot(HNot* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorRISCV64::VisitNot(HNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + switch (instruction->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + __ Not(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>()); + break; + + default: + LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType(); + UNREACHABLE(); + } +} + +void LocationsBuilderRISCV64::VisitNotEqual(HNotEqual* instruction) { + HandleCondition(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitNotEqual(HNotEqual* instruction) { + HandleCondition(instruction); +} + +void LocationsBuilderRISCV64::VisitNullConstant(HNullConstant* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetOut(Location::ConstantLocation(instruction)); +} + +void InstructionCodeGeneratorRISCV64::VisitNullConstant( + [[maybe_unused]] HNullConstant* instruction) { + // Will be generated at use site. +} + +void LocationsBuilderRISCV64::VisitNullCheck(HNullCheck* instruction) { + LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); + locations->SetInAt(0, Location::RequiresRegister()); +} + +void InstructionCodeGeneratorRISCV64::VisitNullCheck(HNullCheck* instruction) { + codegen_->GenerateNullCheck(instruction); +} + +void LocationsBuilderRISCV64::VisitOr(HOr* instruction) { + HandleBinaryOp(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitOr(HOr* instruction) { + HandleBinaryOp(instruction); +} + +void LocationsBuilderRISCV64::VisitPackedSwitch(HPackedSwitch* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); +} + +void InstructionCodeGeneratorRISCV64::VisitPackedSwitch(HPackedSwitch* instruction) { + int32_t lower_bound = instruction->GetStartValue(); + uint32_t num_entries = instruction->GetNumEntries(); + LocationSummary* locations = instruction->GetLocations(); + XRegister value = locations->InAt(0).AsRegister<XRegister>(); + HBasicBlock* switch_block = instruction->GetBlock(); + HBasicBlock* default_block = instruction->GetDefaultBlock(); + + // Prepare a temporary register and an adjusted zero-based value. + ScratchRegisterScope srs(GetAssembler()); + XRegister temp = srs.AllocateXRegister(); + XRegister adjusted = value; + if (lower_bound != 0) { + adjusted = temp; + __ AddConst32(temp, value, -lower_bound); + } + + // Jump to the default block if the index is out of the packed switch value range. + // Note: We could save one instruction for `num_entries == 1` with BNEZ but the + // `HInstructionBuilder` transforms that case to an `HIf`, so let's keep the code simple. + CHECK_NE(num_entries, 0u); // `HInstructionBuilder` creates a `HGoto` for empty packed-switch. + { + ScratchRegisterScope srs2(GetAssembler()); + XRegister temp2 = srs2.AllocateXRegister(); + __ LoadConst32(temp2, num_entries); + __ Bgeu(adjusted, temp2, codegen_->GetLabelOf(default_block)); // Can clobber `TMP` if taken. + } + + if (num_entries >= kPackedSwitchCompareJumpThreshold) { + GenTableBasedPackedSwitch(adjusted, temp, num_entries, switch_block); + } else { + GenPackedSwitchWithCompares(adjusted, temp, num_entries, switch_block); + } +} + +void LocationsBuilderRISCV64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) { + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorRISCV64::VisitParallelMove(HParallelMove* instruction) { + if (instruction->GetNext()->IsSuspendCheck() && + instruction->GetBlock()->GetLoopInformation() != nullptr) { + HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); + // The back edge will generate the suspend check. + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); + } + + codegen_->GetMoveResolver()->EmitNativeCode(instruction); +} + +void LocationsBuilderRISCV64::VisitParameterValue(HParameterValue* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); + if (location.IsStackSlot()) { + location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + } else if (location.IsDoubleStackSlot()) { + location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + } + locations->SetOut(location); +} + +void InstructionCodeGeneratorRISCV64::VisitParameterValue( + [[maybe_unused]] HParameterValue* instruction) { + // Nothing to do, the parameter is already at its location. +} + +void LocationsBuilderRISCV64::VisitPhi(HPhi* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { + locations->SetInAt(i, Location::Any()); + } + locations->SetOut(Location::Any()); +} + +void InstructionCodeGeneratorRISCV64::VisitPhi([[maybe_unused]] HPhi* instruction) { + LOG(FATAL) << "Unreachable"; +} + +void LocationsBuilderRISCV64::VisitRem(HRem* instruction) { + DataType::Type type = instruction->GetResultType(); + LocationSummary::CallKind call_kind = + DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly + : LocationSummary::kNoCall; + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); + + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); + locations->SetOut(calling_convention.GetReturnLocation(type)); + break; + } + + default: + LOG(FATAL) << "Unexpected rem type " << type; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorRISCV64::VisitRem(HRem* instruction) { + DataType::Type type = instruction->GetType(); + + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateDivRemIntegral(instruction); + break; + + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + QuickEntrypointEnum entrypoint = + (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod; + codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); + if (type == DataType::Type::kFloat32) { + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); + } else { + CheckEntrypointTypes<kQuickFmod, double, double, double>(); + } + break; + } + default: + LOG(FATAL) << "Unexpected rem type " << type; + UNREACHABLE(); + } +} + +void LocationsBuilderRISCV64::VisitReturn(HReturn* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + DataType::Type return_type = instruction->InputAt(0)->GetType(); + DCHECK_NE(return_type, DataType::Type::kVoid); + locations->SetInAt(0, Riscv64ReturnLocation(return_type)); +} + +void InstructionCodeGeneratorRISCV64::VisitReturn(HReturn* instruction) { + if (GetGraph()->IsCompilingOsr()) { + // To simplify callers of an OSR method, we put a floating point return value + // in both floating point and core return registers. + DataType::Type type = instruction->InputAt(0)->GetType(); + if (DataType::IsFloatingPointType(type)) { + FMvX(A0, FA0, type); + } + } + codegen_->GenerateFrameExit(); +} + +void LocationsBuilderRISCV64::VisitReturnVoid(HReturnVoid* instruction) { + instruction->SetLocations(nullptr); +} + +void InstructionCodeGeneratorRISCV64::VisitReturnVoid([[maybe_unused]] HReturnVoid* instruction) { + codegen_->GenerateFrameExit(); +} + +void LocationsBuilderRISCV64::VisitRor(HRor* instruction) { + HandleShift(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitRor(HRor* instruction) { + HandleShift(instruction); +} + +void LocationsBuilderRISCV64::VisitShl(HShl* instruction) { + HandleShift(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitShl(HShl* instruction) { + HandleShift(instruction); +} + +void LocationsBuilderRISCV64::VisitShr(HShr* instruction) { + HandleShift(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitShr(HShr* instruction) { + HandleShift(instruction); +} + +void LocationsBuilderRISCV64::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderRISCV64::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitStaticFieldSet(HStaticFieldSet* instruction) { + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); +} + +void LocationsBuilderRISCV64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { + codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(A0)); +} + +void InstructionCodeGeneratorRISCV64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { + __ LoadConst32(A0, instruction->GetFormat()->GetValue()); + codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc()); +} + +void LocationsBuilderRISCV64::VisitUnresolvedInstanceFieldGet( + HUnresolvedInstanceFieldGet* instruction) { + FieldAccessCallingConventionRISCV64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorRISCV64::VisitUnresolvedInstanceFieldGet( + HUnresolvedInstanceFieldGet* instruction) { + FieldAccessCallingConventionRISCV64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderRISCV64::VisitUnresolvedInstanceFieldSet( + HUnresolvedInstanceFieldSet* instruction) { + FieldAccessCallingConventionRISCV64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorRISCV64::VisitUnresolvedInstanceFieldSet( + HUnresolvedInstanceFieldSet* instruction) { + FieldAccessCallingConventionRISCV64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderRISCV64::VisitUnresolvedStaticFieldGet( + HUnresolvedStaticFieldGet* instruction) { + FieldAccessCallingConventionRISCV64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorRISCV64::VisitUnresolvedStaticFieldGet( + HUnresolvedStaticFieldGet* instruction) { + FieldAccessCallingConventionRISCV64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderRISCV64::VisitUnresolvedStaticFieldSet( + HUnresolvedStaticFieldSet* instruction) { + FieldAccessCallingConventionRISCV64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorRISCV64::VisitUnresolvedStaticFieldSet( + HUnresolvedStaticFieldSet* instruction) { + FieldAccessCallingConventionRISCV64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderRISCV64::VisitSelect(HSelect* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + if (DataType::IsFloatingPointType(instruction->GetType())) { + locations->SetInAt(0, FpuRegisterOrZeroBitPatternLocation(instruction->GetFalseValue())); + locations->SetInAt(1, FpuRegisterOrZeroBitPatternLocation(instruction->GetTrueValue())); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + if (!locations->InAt(0).IsConstant() && !locations->InAt(1).IsConstant()) { + locations->AddTemp(Location::RequiresRegister()); + } + } else { + locations->SetInAt(0, RegisterOrZeroBitPatternLocation(instruction->GetFalseValue())); + locations->SetInAt(1, RegisterOrZeroBitPatternLocation(instruction->GetTrueValue())); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + } + + if (IsBooleanValueOrMaterializedCondition(instruction->GetCondition())) { + locations->SetInAt(2, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorRISCV64::VisitSelect(HSelect* instruction) { + LocationSummary* locations = instruction->GetLocations(); + HInstruction* cond = instruction->GetCondition(); + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + if (!IsBooleanValueOrMaterializedCondition(cond)) { + DataType::Type cond_type = cond->InputAt(0)->GetType(); + IfCondition if_cond = cond->AsCondition()->GetCondition(); + if (DataType::IsFloatingPointType(cond_type)) { + GenerateFpCondition(if_cond, + cond->AsCondition()->IsGtBias(), + cond_type, + cond->GetLocations(), + /*label=*/ nullptr, + tmp, + /*to_all_bits=*/ true); + } else { + GenerateIntLongCondition(if_cond, cond->GetLocations(), tmp, /*to_all_bits=*/ true); + } + } else { + // TODO(riscv64): Remove the normalizing SNEZ when we can ensure that booleans + // have only values 0 and 1. b/279302742 + __ Snez(tmp, locations->InAt(2).AsRegister<XRegister>()); + __ Neg(tmp, tmp); + } + + XRegister true_reg, false_reg, xor_reg, out_reg; + DataType::Type type = instruction->GetType(); + if (DataType::IsFloatingPointType(type)) { + if (locations->InAt(0).IsConstant()) { + DCHECK(locations->InAt(0).GetConstant()->IsZeroBitPattern()); + false_reg = Zero; + } else { + false_reg = srs.AllocateXRegister(); + FMvX(false_reg, locations->InAt(0).AsFpuRegister<FRegister>(), type); + } + if (locations->InAt(1).IsConstant()) { + DCHECK(locations->InAt(1).GetConstant()->IsZeroBitPattern()); + true_reg = Zero; + } else { + true_reg = (false_reg == Zero) ? srs.AllocateXRegister() + : locations->GetTemp(0).AsRegister<XRegister>(); + FMvX(true_reg, locations->InAt(1).AsFpuRegister<FRegister>(), type); + } + // We can clobber the "true value" with the XOR result. + // Note: The XOR is not emitted if `true_reg == Zero`, see below. + xor_reg = true_reg; + out_reg = tmp; + } else { + false_reg = InputXRegisterOrZero(locations->InAt(0)); + true_reg = InputXRegisterOrZero(locations->InAt(1)); + xor_reg = srs.AllocateXRegister(); + out_reg = locations->Out().AsRegister<XRegister>(); + } + + // We use a branch-free implementation of `HSelect`. + // With `tmp` initialized to 0 for `false` and -1 for `true`: + // xor xor_reg, false_reg, true_reg + // and tmp, tmp, xor_reg + // xor out_reg, tmp, false_reg + if (false_reg == Zero) { + xor_reg = true_reg; + } else if (true_reg == Zero) { + xor_reg = false_reg; + } else { + DCHECK_NE(xor_reg, Zero); + __ Xor(xor_reg, false_reg, true_reg); + } + __ And(tmp, tmp, xor_reg); + __ Xor(out_reg, tmp, false_reg); + + if (type == DataType::Type::kFloat64) { + __ FMvDX(locations->Out().AsFpuRegister<FRegister>(), out_reg); + } else if (type == DataType::Type::kFloat32) { + __ FMvWX(locations->Out().AsFpuRegister<FRegister>(), out_reg); + } +} + +void LocationsBuilderRISCV64::VisitSub(HSub* instruction) { + HandleBinaryOp(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitSub(HSub* instruction) { + HandleBinaryOp(instruction); +} + +void LocationsBuilderRISCV64::VisitSuspendCheck(HSuspendCheck* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + // In suspend check slow path, usually there are no caller-save registers at all. + // If SIMD instructions are present, however, we force spilling all live SIMD + // registers in full width (since the runtime only saves/restores lower part). + locations->SetCustomSlowPathCallerSaves(GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : + RegisterSet::Empty()); +} + +void InstructionCodeGeneratorRISCV64::VisitSuspendCheck(HSuspendCheck* instruction) { + HBasicBlock* block = instruction->GetBlock(); + if (block->GetLoopInformation() != nullptr) { + DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); + // The back edge will generate the suspend check. + return; + } + if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { + // The goto will generate the suspend check. + return; + } + GenerateSuspendCheck(instruction, nullptr); +} + +void LocationsBuilderRISCV64::VisitThrow(HThrow* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); +} + +void InstructionCodeGeneratorRISCV64::VisitThrow(HThrow* instruction) { + codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); +} + +void LocationsBuilderRISCV64::VisitTryBoundary(HTryBoundary* instruction) { + instruction->SetLocations(nullptr); +} + +void InstructionCodeGeneratorRISCV64::VisitTryBoundary(HTryBoundary* instruction) { + HBasicBlock* successor = instruction->GetNormalFlowSuccessor(); + if (!successor->IsExitBlock()) { + HandleGoto(instruction, successor); + } +} + +void LocationsBuilderRISCV64::VisitTypeConversion(HTypeConversion* instruction) { + DataType::Type input_type = instruction->GetInputType(); + DataType::Type result_type = instruction->GetResultType(); + DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << " -> " << result_type; + + if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) || + (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) { + LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; + } + + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + + if (DataType::IsFloatingPointType(input_type)) { + locations->SetInAt(0, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + } + + if (DataType::IsFloatingPointType(result_type)) { + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + } else { + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + } +} + +void InstructionCodeGeneratorRISCV64::VisitTypeConversion(HTypeConversion* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DataType::Type result_type = instruction->GetResultType(); + DataType::Type input_type = instruction->GetInputType(); + + DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << " -> " << result_type; + + if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) { + XRegister dst = locations->Out().AsRegister<XRegister>(); + XRegister src = locations->InAt(0).AsRegister<XRegister>(); + switch (result_type) { + case DataType::Type::kUint8: + __ ZextB(dst, src); + break; + case DataType::Type::kInt8: + __ SextB(dst, src); + break; + case DataType::Type::kUint16: + __ ZextH(dst, src); + break; + case DataType::Type::kInt16: + __ SextH(dst, src); + break; + case DataType::Type::kInt32: + case DataType::Type::kInt64: + // Sign-extend 32-bit int into bits 32 through 63 for int-to-long and long-to-int + // conversions, except when the input and output registers are the same and we are not + // converting longs to shorter types. In these cases, do nothing. + if ((input_type == DataType::Type::kInt64) || (dst != src)) { + __ Addiw(dst, src, 0); + } + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + UNREACHABLE(); + } + } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) { + FRegister dst = locations->Out().AsFpuRegister<FRegister>(); + XRegister src = locations->InAt(0).AsRegister<XRegister>(); + if (input_type == DataType::Type::kInt64) { + if (result_type == DataType::Type::kFloat32) { + __ FCvtSL(dst, src, FPRoundingMode::kRNE); + } else { + __ FCvtDL(dst, src, FPRoundingMode::kRNE); + } + } else { + if (result_type == DataType::Type::kFloat32) { + __ FCvtSW(dst, src, FPRoundingMode::kRNE); + } else { + __ FCvtDW(dst, src); // No rounding. + } + } + } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) { + CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64); + XRegister dst = locations->Out().AsRegister<XRegister>(); + FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); + if (result_type == DataType::Type::kInt64) { + if (input_type == DataType::Type::kFloat32) { + __ FCvtLS(dst, src, FPRoundingMode::kRTZ); + } else { + __ FCvtLD(dst, src, FPRoundingMode::kRTZ); + } + } else { + if (input_type == DataType::Type::kFloat32) { + __ FCvtWS(dst, src, FPRoundingMode::kRTZ); + } else { + __ FCvtWD(dst, src, FPRoundingMode::kRTZ); + } + } + // For NaN inputs we need to return 0. + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + FClass(tmp, src, input_type); + __ Sltiu(tmp, tmp, kFClassNaNMinValue); // 0 for NaN, 1 otherwise. + __ Neg(tmp, tmp); // 0 for NaN, -1 otherwise. + __ And(dst, dst, tmp); // Cleared for NaN. + } else if (DataType::IsFloatingPointType(result_type) && + DataType::IsFloatingPointType(input_type)) { + FRegister dst = locations->Out().AsFpuRegister<FRegister>(); + FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); + if (result_type == DataType::Type::kFloat32) { + __ FCvtSD(dst, src); + } else { + __ FCvtDS(dst, src); + } + } else { + LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type + << " to " << result_type; + UNREACHABLE(); + } +} + +void LocationsBuilderRISCV64::VisitUShr(HUShr* instruction) { + HandleShift(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitUShr(HUShr* instruction) { + HandleShift(instruction); +} + +void LocationsBuilderRISCV64::VisitXor(HXor* instruction) { + HandleBinaryOp(instruction); +} + +void InstructionCodeGeneratorRISCV64::VisitXor(HXor* instruction) { + HandleBinaryOp(instruction); +} + +void LocationsBuilderRISCV64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecExtractScalar(HVecExtractScalar* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecExtractScalar(HVecExtractScalar* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecReduce(HVecReduce* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecReduce(HVecReduce* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecCnv(HVecCnv* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecCnv(HVecCnv* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecNeg(HVecNeg* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecNeg(HVecNeg* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecAbs(HVecAbs* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecAbs(HVecAbs* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecNot(HVecNot* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecNot(HVecNot* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecAdd(HVecAdd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecAdd(HVecAdd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecSub(HVecSub* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecSub(HVecSub* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecMul(HVecMul* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecMul(HVecMul* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecDiv(HVecDiv* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecDiv(HVecDiv* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecMin(HVecMin* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecMin(HVecMin* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecMax(HVecMax* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecMax(HVecMax* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecAnd(HVecAnd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecAnd(HVecAnd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecAndNot(HVecAndNot* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecAndNot(HVecAndNot* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecOr(HVecOr* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecOr(HVecOr* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecXor(HVecXor* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecXor(HVecXor* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecShl(HVecShl* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecShl(HVecShl* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecShr(HVecShr* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecShr(HVecShr* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecUShr(HVecUShr* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecUShr(HVecUShr* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecSetScalars(HVecSetScalars* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecSetScalars(HVecSetScalars* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecMultiplyAccumulate( + HVecMultiplyAccumulate* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecDotProd(HVecDotProd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecDotProd(HVecDotProd* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecLoad(HVecLoad* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecLoad(HVecLoad* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecStore(HVecStore* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecStore(HVecStore* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecPredSetAll(HVecPredSetAll* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecPredSetAll(HVecPredSetAll* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecPredWhile(HVecPredWhile* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecPredWhile(HVecPredWhile* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecCondition(HVecCondition* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecCondition(HVecCondition* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void LocationsBuilderRISCV64::VisitVecPredNot(HVecPredNot* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +void InstructionCodeGeneratorRISCV64::VisitVecPredNot(HVecPredNot* instruction) { + UNUSED(instruction); + LOG(FATAL) << "Unimplemented"; +} + +namespace detail { + +// Mark which intrinsics we don't have handcrafted code for. +template <Intrinsics T> +struct IsUnimplemented { + bool is_unimplemented = false; +}; + +#define TRUE_OVERRIDE(Name) \ + template <> \ + struct IsUnimplemented<Intrinsics::k##Name> { \ + bool is_unimplemented = true; \ + }; +UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(TRUE_OVERRIDE) +#undef TRUE_OVERRIDE + +static constexpr bool kIsIntrinsicUnimplemented[] = { + false, // kNone +#define IS_UNIMPLEMENTED(Intrinsic, ...) \ + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + ART_INTRINSICS_LIST(IS_UNIMPLEMENTED) +#undef IS_UNIMPLEMENTED +}; + +} // namespace detail + +CodeGeneratorRISCV64::CodeGeneratorRISCV64(HGraph* graph, + const CompilerOptions& compiler_options, + OptimizingCompilerStats* stats) + : CodeGenerator(graph, + kNumberOfXRegisters, + kNumberOfFRegisters, + /*number_of_register_pairs=*/ 0u, + ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves)), + ComputeRegisterMask(kFpuCalleeSaves, arraysize(kFpuCalleeSaves)), + compiler_options, + stats, + ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)), + assembler_(graph->GetAllocator(), + compiler_options.GetInstructionSetFeatures()->AsRiscv64InstructionSetFeatures()), + location_builder_(graph, this), + instruction_visitor_(graph, this), + block_labels_(nullptr), + move_resolver_(graph->GetAllocator(), this), + uint32_literals_(std::less<uint32_t>(), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + uint64_literals_(std::less<uint64_t>(), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_string_patches_(StringReferenceValueComparator(), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_class_patches_(TypeReferenceValueComparator(), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { + // Always mark the RA register to be saved. + AddAllocatedRegister(Location::RegisterLocation(RA)); +} + +void CodeGeneratorRISCV64::MaybeIncrementHotness(bool is_frame_entry) { + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + ScratchRegisterScope srs(GetAssembler()); + XRegister method = is_frame_entry ? kArtMethodRegister : srs.AllocateXRegister(); + if (!is_frame_entry) { + __ Loadd(method, SP, 0); + } + XRegister counter = srs.AllocateXRegister(); + __ Loadhu(counter, method, ArtMethod::HotnessCountOffset().Int32Value()); + Riscv64Label done; + DCHECK_EQ(0u, interpreter::kNterpHotnessValue); + __ Beqz(counter, &done); // Can clobber `TMP` if taken. + __ Addi(counter, counter, -1); + // We may not have another scratch register available for `Storeh`()`, + // so we must use the `Sh()` function directly. + static_assert(IsInt<12>(ArtMethod::HotnessCountOffset().Int32Value())); + __ Sh(counter, method, ArtMethod::HotnessCountOffset().Int32Value()); + __ Bind(&done); + } + + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + ProfilingInfo* info = GetGraph()->GetProfilingInfo(); + DCHECK(info != nullptr); + DCHECK(!HasEmptyFrame()); + uint64_t address = reinterpret_cast64<uint64_t>(info) + + ProfilingInfo::BaselineHotnessCountOffset().SizeValue(); + auto [base_address, imm12] = SplitJitAddress(address); + ScratchRegisterScope srs(GetAssembler()); + XRegister counter = srs.AllocateXRegister(); + XRegister tmp = RA; + __ LoadConst64(tmp, base_address); + SlowPathCodeRISCV64* slow_path = + new (GetScopedAllocator()) CompileOptimizedSlowPathRISCV64(tmp, imm12); + AddSlowPath(slow_path); + __ Lhu(counter, tmp, imm12); + __ Beqz(counter, slow_path->GetEntryLabel()); // Can clobber `TMP` if taken. + __ Addi(counter, counter, -1); + __ Sh(counter, tmp, imm12); + __ Bind(slow_path->GetExitLabel()); + } +} + +bool CodeGeneratorRISCV64::CanUseImplicitSuspendCheck() const { + // TODO(riscv64): Implement implicit suspend checks to reduce code size. + return false; +} + +void CodeGeneratorRISCV64::GenerateMemoryBarrier(MemBarrierKind kind) { + switch (kind) { + case MemBarrierKind::kAnyAny: + __ Fence(/*pred=*/ kFenceRead | kFenceWrite, /*succ=*/ kFenceRead | kFenceWrite); + break; + case MemBarrierKind::kAnyStore: + __ Fence(/*pred=*/ kFenceRead | kFenceWrite, /*succ=*/ kFenceWrite); + break; + case MemBarrierKind::kLoadAny: + __ Fence(/*pred=*/ kFenceRead, /*succ=*/ kFenceRead | kFenceWrite); + break; + case MemBarrierKind::kStoreStore: + __ Fence(/*pred=*/ kFenceWrite, /*succ=*/ kFenceWrite); + break; + + default: + LOG(FATAL) << "Unexpected memory barrier " << kind; + UNREACHABLE(); + } +} + +void CodeGeneratorRISCV64::GenerateFrameEntry() { + // Check if we need to generate the clinit check. We will jump to the + // resolution stub if the class is not initialized and the executing thread is + // not the thread initializing it. + // We do this before constructing the frame to get the correct stack trace if + // an exception is thrown. + if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) { + Riscv64Label resolution; + Riscv64Label memory_barrier; + + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + XRegister tmp2 = srs.AllocateXRegister(); + + // We don't emit a read barrier here to save on code size. We rely on the + // resolution trampoline to do a clinit check before re-entering this code. + __ Loadwu(tmp2, kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()); + + // We shall load the full 32-bit status word with sign-extension and compare as unsigned + // to sign-extended shifted status values. This yields the same comparison as loading and + // materializing unsigned but the constant is materialized with a single LUI instruction. + __ Loadw(tmp, tmp2, mirror::Class::StatusOffset().SizeValue()); // Sign-extended. + + // Check if we're visibly initialized. + __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kVisiblyInitialized>()); + __ Bgeu(tmp, tmp2, &frame_entry_label_); // Can clobber `TMP` if taken. + + // Check if we're initialized and jump to code that does a memory barrier if so. + __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kInitialized>()); + __ Bgeu(tmp, tmp2, &memory_barrier); // Can clobber `TMP` if taken. + + // Check if we're initializing and the thread initializing is the one + // executing the code. + __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kInitializing>()); + __ Bltu(tmp, tmp2, &resolution); // Can clobber `TMP` if taken. + + __ Loadwu(tmp2, kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()); + __ Loadw(tmp, tmp2, mirror::Class::ClinitThreadIdOffset().Int32Value()); + __ Loadw(tmp2, TR, Thread::TidOffset<kRiscv64PointerSize>().Int32Value()); + __ Beq(tmp, tmp2, &frame_entry_label_); + __ Bind(&resolution); + + // Jump to the resolution stub. + ThreadOffset64 entrypoint_offset = + GetThreadOffset<kRiscv64PointerSize>(kQuickQuickResolutionTrampoline); + __ Loadd(tmp, TR, entrypoint_offset.Int32Value()); + __ Jr(tmp); + + __ Bind(&memory_barrier); + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + __ Bind(&frame_entry_label_); + + bool do_overflow_check = + FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kRiscv64) || !IsLeafMethod(); + + if (do_overflow_check) { + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + __ Loadw( + Zero, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kRiscv64))); + RecordPcInfo(nullptr, 0); + } + + if (!HasEmptyFrame()) { + // Make sure the frame size isn't unreasonably large. + if (GetFrameSize() > GetStackOverflowReservedBytes(InstructionSet::kRiscv64)) { + LOG(FATAL) << "Stack frame larger than " + << GetStackOverflowReservedBytes(InstructionSet::kRiscv64) << " bytes"; + } + + // Spill callee-saved registers. + + uint32_t frame_size = GetFrameSize(); + + IncreaseFrame(frame_size); + + uint32_t offset = frame_size; + for (size_t i = arraysize(kCoreCalleeSaves); i != 0; ) { + --i; + XRegister reg = kCoreCalleeSaves[i]; + if (allocated_registers_.ContainsCoreRegister(reg)) { + offset -= kRiscv64DoublewordSize; + __ Stored(reg, SP, offset); + __ cfi().RelOffset(dwarf::Reg::Riscv64Core(reg), offset); + } + } + + for (size_t i = arraysize(kFpuCalleeSaves); i != 0; ) { + --i; + FRegister reg = kFpuCalleeSaves[i]; + if (allocated_registers_.ContainsFloatingPointRegister(reg)) { + offset -= kRiscv64DoublewordSize; + __ FStored(reg, SP, offset); + __ cfi().RelOffset(dwarf::Reg::Riscv64Fp(reg), offset); + } + } + + // Save the current method if we need it. Note that we do not + // do this in HCurrentMethod, as the instruction might have been removed + // in the SSA graph. + if (RequiresCurrentMethod()) { + __ Stored(kArtMethodRegister, SP, 0); + } + + if (GetGraph()->HasShouldDeoptimizeFlag()) { + // Initialize should_deoptimize flag to 0. + __ Storew(Zero, SP, GetStackOffsetOfShouldDeoptimizeFlag()); + } + } + MaybeIncrementHotness(/*is_frame_entry=*/ true); +} + +void CodeGeneratorRISCV64::GenerateFrameExit() { + __ cfi().RememberState(); + + if (!HasEmptyFrame()) { + // Restore callee-saved registers. + + // For better instruction scheduling restore RA before other registers. + uint32_t offset = GetFrameSize(); + for (size_t i = arraysize(kCoreCalleeSaves); i != 0; ) { + --i; + XRegister reg = kCoreCalleeSaves[i]; + if (allocated_registers_.ContainsCoreRegister(reg)) { + offset -= kRiscv64DoublewordSize; + __ Loadd(reg, SP, offset); + __ cfi().Restore(dwarf::Reg::Riscv64Core(reg)); + } + } + + for (size_t i = arraysize(kFpuCalleeSaves); i != 0; ) { + --i; + FRegister reg = kFpuCalleeSaves[i]; + if (allocated_registers_.ContainsFloatingPointRegister(reg)) { + offset -= kRiscv64DoublewordSize; + __ FLoadd(reg, SP, offset); + __ cfi().Restore(dwarf::Reg::Riscv64Fp(reg)); + } + } + + DecreaseFrame(GetFrameSize()); + } + + __ Jr(RA); + + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(GetFrameSize()); +} + +void CodeGeneratorRISCV64::Bind(HBasicBlock* block) { __ Bind(GetLabelOf(block)); } + +void CodeGeneratorRISCV64::MoveConstant(Location destination, int32_t value) { + DCHECK(destination.IsRegister()); + __ LoadConst32(destination.AsRegister<XRegister>(), value); +} + +void CodeGeneratorRISCV64::MoveLocation(Location destination, + Location source, + DataType::Type dst_type) { + if (source.Equals(destination)) { + return; + } + + // A valid move type can always be inferred from the destination and source locations. + // When moving from and to a register, the `dst_type` can be used to generate 32-bit instead + // of 64-bit moves but it's generally OK to use 64-bit moves for 32-bit values in registers. + bool unspecified_type = (dst_type == DataType::Type::kVoid); + // TODO(riscv64): Is the destination type known in all cases? + // TODO(riscv64): Can unspecified `dst_type` move 32-bit GPR to FPR without NaN-boxing? + CHECK(!unspecified_type); + + if (destination.IsRegister() || destination.IsFpuRegister()) { + if (unspecified_type) { + HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr; + if (source.IsStackSlot() || + (src_cst != nullptr && + (src_cst->IsIntConstant() || src_cst->IsFloatConstant() || src_cst->IsNullConstant()))) { + // For stack slots and 32-bit constants, a 32-bit type is appropriate. + dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32; + } else { + // If the source is a double stack slot or a 64-bit constant, a 64-bit type + // is appropriate. Else the source is a register, and since the type has not + // been specified, we chose a 64-bit type to force a 64-bit move. + dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64; + } + } + DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) || + (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type))); + + if (source.IsStackSlot() || source.IsDoubleStackSlot()) { + // Move to GPR/FPR from stack + if (DataType::IsFloatingPointType(dst_type)) { + if (DataType::Is64BitType(dst_type)) { + __ FLoadd(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex()); + } else { + __ FLoadw(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex()); + } + } else { + if (DataType::Is64BitType(dst_type)) { + __ Loadd(destination.AsRegister<XRegister>(), SP, source.GetStackIndex()); + } else if (dst_type == DataType::Type::kReference) { + __ Loadwu(destination.AsRegister<XRegister>(), SP, source.GetStackIndex()); + } else { + __ Loadw(destination.AsRegister<XRegister>(), SP, source.GetStackIndex()); + } + } + } else if (source.IsConstant()) { + // Move to GPR/FPR from constant + // TODO(riscv64): Consider using literals for difficult-to-materialize 64-bit constants. + int64_t value = GetInt64ValueOf(source.GetConstant()->AsConstant()); + ScratchRegisterScope srs(GetAssembler()); + XRegister gpr = DataType::IsFloatingPointType(dst_type) + ? srs.AllocateXRegister() + : destination.AsRegister<XRegister>(); + if (DataType::IsFloatingPointType(dst_type) && value == 0) { + gpr = Zero; // Note: The scratch register allocated above shall not be used. + } else { + // Note: For `float` we load the sign-extended value here as it can sometimes yield + // a shorter instruction sequence. The higher 32 bits shall be ignored during the + // transfer to FP reg and the result shall be correctly NaN-boxed. + __ LoadConst64(gpr, value); + } + if (dst_type == DataType::Type::kFloat32) { + __ FMvWX(destination.AsFpuRegister<FRegister>(), gpr); + } else if (dst_type == DataType::Type::kFloat64) { + __ FMvDX(destination.AsFpuRegister<FRegister>(), gpr); + } + } else if (source.IsRegister()) { + if (destination.IsRegister()) { + // Move to GPR from GPR + __ Mv(destination.AsRegister<XRegister>(), source.AsRegister<XRegister>()); + } else { + DCHECK(destination.IsFpuRegister()); + if (DataType::Is64BitType(dst_type)) { + __ FMvDX(destination.AsFpuRegister<FRegister>(), source.AsRegister<XRegister>()); + } else { + __ FMvWX(destination.AsFpuRegister<FRegister>(), source.AsRegister<XRegister>()); + } + } + } else if (source.IsFpuRegister()) { + if (destination.IsFpuRegister()) { + if (GetGraph()->HasSIMD()) { + LOG(FATAL) << "Vector extension is unsupported"; + UNREACHABLE(); + } else { + // Move to FPR from FPR + if (dst_type == DataType::Type::kFloat32) { + __ FMvS(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>()); + } else { + DCHECK_EQ(dst_type, DataType::Type::kFloat64); + __ FMvD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>()); + } + } + } else { + DCHECK(destination.IsRegister()); + if (DataType::Is64BitType(dst_type)) { + __ FMvXD(destination.AsRegister<XRegister>(), source.AsFpuRegister<FRegister>()); + } else { + __ FMvXW(destination.AsRegister<XRegister>(), source.AsFpuRegister<FRegister>()); + } + } + } + } else if (destination.IsSIMDStackSlot()) { + LOG(FATAL) << "SIMD is unsupported"; + UNREACHABLE(); + } else { // The destination is not a register. It must be a stack slot. + DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot()); + if (source.IsRegister() || source.IsFpuRegister()) { + if (unspecified_type) { + if (source.IsRegister()) { + dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64; + } else { + dst_type = + destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64; + } + } + DCHECK_EQ(source.IsFpuRegister(), DataType::IsFloatingPointType(dst_type)); + // For direct @CriticalNative calls, we need to sign-extend narrow integral args + // to 64 bits, so widening integral values is allowed. Narrowing is forbidden. + DCHECK_IMPLIES(DataType::IsFloatingPointType(dst_type) || destination.IsStackSlot(), + destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)); + // Move to stack from GPR/FPR + if (destination.IsDoubleStackSlot()) { + if (source.IsRegister()) { + __ Stored(source.AsRegister<XRegister>(), SP, destination.GetStackIndex()); + } else { + __ FStored(source.AsFpuRegister<FRegister>(), SP, destination.GetStackIndex()); + } + } else { + if (source.IsRegister()) { + __ Storew(source.AsRegister<XRegister>(), SP, destination.GetStackIndex()); + } else { + __ FStorew(source.AsFpuRegister<FRegister>(), SP, destination.GetStackIndex()); + } + } + } else if (source.IsConstant()) { + // Move to stack from constant + int64_t value = GetInt64ValueOf(source.GetConstant()); + ScratchRegisterScope srs(GetAssembler()); + XRegister gpr = (value != 0) ? srs.AllocateXRegister() : Zero; + if (value != 0) { + __ LoadConst64(gpr, value); + } + if (destination.IsStackSlot()) { + __ Storew(gpr, SP, destination.GetStackIndex()); + } else { + DCHECK(destination.IsDoubleStackSlot()); + __ Stored(gpr, SP, destination.GetStackIndex()); + } + } else { + DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot()); + // For direct @CriticalNative calls, we need to sign-extend narrow integral args + // to 64 bits, so widening move is allowed. Narrowing move is forbidden. + DCHECK_IMPLIES(destination.IsStackSlot(), source.IsStackSlot()); + // Move to stack from stack + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + if (source.IsStackSlot()) { + __ Loadw(tmp, SP, source.GetStackIndex()); + } else { + __ Loadd(tmp, SP, source.GetStackIndex()); + } + if (destination.IsStackSlot()) { + __ Storew(tmp, SP, destination.GetStackIndex()); + } else { + __ Stored(tmp, SP, destination.GetStackIndex()); + } + } + } +} + +void CodeGeneratorRISCV64::AddLocationAsTemp(Location location, LocationSummary* locations) { + if (location.IsRegister()) { + locations->AddTemp(location); + } else { + UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; + } +} + +void CodeGeneratorRISCV64::SetupBlockedRegisters() const { + // ZERO, GP, SP, RA, TP and TR(S1) are reserved and can't be allocated. + blocked_core_registers_[Zero] = true; + blocked_core_registers_[GP] = true; + blocked_core_registers_[SP] = true; + blocked_core_registers_[RA] = true; + blocked_core_registers_[TP] = true; + blocked_core_registers_[TR] = true; // ART Thread register. + + // TMP(T6), TMP2(T5) and FTMP(FT11) are used as temporary/scratch registers. + blocked_core_registers_[TMP] = true; + blocked_core_registers_[TMP2] = true; + blocked_fpu_registers_[FTMP] = true; + + if (GetGraph()->IsDebuggable()) { + // Stubs do not save callee-save floating point registers. If the graph + // is debuggable, we need to deal with these registers differently. For + // now, just block them. + for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { + blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; + } + } +} + +size_t CodeGeneratorRISCV64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { + __ Stored(XRegister(reg_id), SP, stack_index); + return kRiscv64DoublewordSize; +} + +size_t CodeGeneratorRISCV64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { + __ Loadd(XRegister(reg_id), SP, stack_index); + return kRiscv64DoublewordSize; +} + +size_t CodeGeneratorRISCV64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + if (GetGraph()->HasSIMD()) { + // TODO(riscv64): RISC-V vector extension. + UNIMPLEMENTED(FATAL) << "Vector extension is unsupported"; + UNREACHABLE(); + } + __ FStored(FRegister(reg_id), SP, stack_index); + return kRiscv64FloatRegSizeInBytes; +} + +size_t CodeGeneratorRISCV64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { + if (GetGraph()->HasSIMD()) { + // TODO(riscv64): RISC-V vector extension. + UNIMPLEMENTED(FATAL) << "Vector extension is unsupported"; + UNREACHABLE(); + } + __ FLoadd(FRegister(reg_id), SP, stack_index); + return kRiscv64FloatRegSizeInBytes; +} + +void CodeGeneratorRISCV64::DumpCoreRegister(std::ostream& stream, int reg) const { + stream << XRegister(reg); +} + +void CodeGeneratorRISCV64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { + stream << FRegister(reg); +} + +const Riscv64InstructionSetFeatures& CodeGeneratorRISCV64::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsRiscv64InstructionSetFeatures(); +} + +void CodeGeneratorRISCV64::Finalize() { + // Ensure that we fix up branches and literal loads and emit the literal pool. + __ FinalizeCode(); + + // Adjust native pc offsets in stack maps. + StackMapStream* stack_map_stream = GetStackMapStream(); + for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) { + uint32_t old_position = stack_map_stream->GetStackMapNativePcOffset(i); + uint32_t new_position = __ GetAdjustedPosition(old_position); + DCHECK_GE(new_position, old_position); + stack_map_stream->SetStackMapNativePcOffset(i, new_position); + } + + // Adjust pc offsets for the disassembly information. + if (disasm_info_ != nullptr) { + GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval(); + frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start); + frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end); + for (auto& entry : *disasm_info_->GetInstructionIntervals()) { + entry.second.start = __ GetAdjustedPosition(entry.second.start); + entry.second.end = __ GetAdjustedPosition(entry.second.end); + } + for (auto& entry : *disasm_info_->GetSlowPathIntervals()) { + entry.code_interval.start = __ GetAdjustedPosition(entry.code_interval.start); + entry.code_interval.end = __ GetAdjustedPosition(entry.code_interval.end); + } + } +} + +// Generate code to invoke a runtime entry point. +void CodeGeneratorRISCV64::InvokeRuntime(QuickEntrypointEnum entrypoint, + HInstruction* instruction, + uint32_t dex_pc, + SlowPathCode* slow_path) { + ValidateInvokeRuntime(entrypoint, instruction, slow_path); + + ThreadOffset64 entrypoint_offset = GetThreadOffset<kRiscv64PointerSize>(entrypoint); + + // TODO(riscv64): Reduce code size for AOT by using shared trampolines for slow path + // runtime calls across the entire oat file. + __ Loadd(RA, TR, entrypoint_offset.Int32Value()); + __ Jalr(RA); + if (EntrypointRequiresStackMap(entrypoint)) { + RecordPcInfo(instruction, dex_pc, slow_path); + } +} + +// Generate code to invoke a runtime entry point, but do not record +// PC-related information in a stack map. +void CodeGeneratorRISCV64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path) { + ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); + __ Loadd(RA, TR, entry_point_offset); + __ Jalr(RA); +} + +void CodeGeneratorRISCV64::IncreaseFrame(size_t adjustment) { + int32_t adjustment32 = dchecked_integral_cast<int32_t>(adjustment); + __ AddConst64(SP, SP, -adjustment32); + GetAssembler()->cfi().AdjustCFAOffset(adjustment32); +} + +void CodeGeneratorRISCV64::DecreaseFrame(size_t adjustment) { + int32_t adjustment32 = dchecked_integral_cast<int32_t>(adjustment); + __ AddConst64(SP, SP, adjustment32); + GetAssembler()->cfi().AdjustCFAOffset(-adjustment32); +} + +void CodeGeneratorRISCV64::GenerateNop() { + __ Nop(); +} + +void CodeGeneratorRISCV64::GenerateImplicitNullCheck(HNullCheck* instruction) { + if (CanMoveNullCheckToUser(instruction)) { + return; + } + Location obj = instruction->GetLocations()->InAt(0); + + __ Lw(Zero, obj.AsRegister<XRegister>(), 0); + RecordPcInfo(instruction, instruction->GetDexPc()); +} + +void CodeGeneratorRISCV64::GenerateExplicitNullCheck(HNullCheck* instruction) { + SlowPathCodeRISCV64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathRISCV64(instruction); + AddSlowPath(slow_path); + + Location obj = instruction->GetLocations()->InAt(0); + + __ Beqz(obj.AsRegister<XRegister>(), slow_path->GetEntryLabel()); +} + +HLoadString::LoadKind CodeGeneratorRISCV64::GetSupportedLoadStringKind( + HLoadString::LoadKind desired_string_load_kind) { + switch (desired_string_load_kind) { + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageRelRo: + case HLoadString::LoadKind::kBssEntry: + DCHECK(!Runtime::Current()->UseJitCompilation()); + break; + case HLoadString::LoadKind::kJitBootImageAddress: + case HLoadString::LoadKind::kJitTableAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + break; + case HLoadString::LoadKind::kRuntimeCall: + break; + } + return desired_string_load_kind; +} + +HLoadClass::LoadKind CodeGeneratorRISCV64::GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) { + switch (desired_class_load_kind) { + case HLoadClass::LoadKind::kInvalid: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + case HLoadClass::LoadKind::kReferrersClass: + break; + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageRelRo: + case HLoadClass::LoadKind::kBssEntry: + case HLoadClass::LoadKind::kBssEntryPublic: + case HLoadClass::LoadKind::kBssEntryPackage: + DCHECK(!Runtime::Current()->UseJitCompilation()); + break; + case HLoadClass::LoadKind::kJitBootImageAddress: + case HLoadClass::LoadKind::kJitTableAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + break; + case HLoadClass::LoadKind::kRuntimeCall: + break; + } + return desired_class_load_kind; +} + +HInvokeStaticOrDirect::DispatchInfo CodeGeneratorRISCV64::GetSupportedInvokeStaticOrDirectDispatch( + const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, ArtMethod* method) { + UNUSED(method); + // On RISCV64 we support all dispatch types. + return desired_dispatch_info; +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageIntrinsicPatch( + uint32_t intrinsic_data, const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file= */ nullptr, intrinsic_data, info_high, &boot_image_other_patches_); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageRelRoPatch( + uint32_t boot_image_offset, const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file= */ nullptr, boot_image_offset, info_high, &boot_image_other_patches_); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageMethodPatch( + MethodReference target_method, const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &boot_image_method_patches_); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewMethodBssEntryPatch( + MethodReference target_method, const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &method_bss_entry_patches_); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageTypePatch( + const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &boot_image_type_patches_); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageJniEntrypointPatch( + MethodReference target_method, const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &boot_image_jni_entrypoint_patches_); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewTypeBssEntryPatch( + HLoadClass* load_class, + const PcRelativePatchInfo* info_high) { + const DexFile& dex_file = load_class->GetDexFile(); + dex::TypeIndex type_index = load_class->GetTypeIndex(); + ArenaDeque<PcRelativePatchInfo>* patches = nullptr; + switch (load_class->GetLoadKind()) { + case HLoadClass::LoadKind::kBssEntry: + patches = &type_bss_entry_patches_; + break; + case HLoadClass::LoadKind::kBssEntryPublic: + patches = &public_type_bss_entry_patches_; + break; + case HLoadClass::LoadKind::kBssEntryPackage: + patches = &package_type_bss_entry_patches_; + break; + default: + LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind(); + UNREACHABLE(); + } + return NewPcRelativePatch(&dex_file, type_index.index_, info_high, patches); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageStringPatch( + const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &boot_image_string_patches_); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewStringBssEntryPatch( + const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &string_bss_entry_patches_); +} + +CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewPcRelativePatch( + const DexFile* dex_file, + uint32_t offset_or_index, + const PcRelativePatchInfo* info_high, + ArenaDeque<PcRelativePatchInfo>* patches) { + patches->emplace_back(dex_file, offset_or_index, info_high); + return &patches->back(); +} + +Literal* CodeGeneratorRISCV64::DeduplicateUint32Literal(uint32_t value) { + return uint32_literals_.GetOrCreate(value, + [this, value]() { return __ NewLiteral<uint32_t>(value); }); +} + +Literal* CodeGeneratorRISCV64::DeduplicateUint64Literal(uint64_t value) { + return uint64_literals_.GetOrCreate(value, + [this, value]() { return __ NewLiteral<uint64_t>(value); }); +} + +Literal* CodeGeneratorRISCV64::DeduplicateBootImageAddressLiteral(uint64_t address) { + return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address)); +} + +Literal* CodeGeneratorRISCV64::DeduplicateJitStringLiteral(const DexFile& dex_file, + dex::StringIndex string_index, + Handle<mirror::String> handle) { + ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); + return jit_string_patches_.GetOrCreate( + StringReference(&dex_file, string_index), + [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); }); +} + +Literal* CodeGeneratorRISCV64::DeduplicateJitClassLiteral(const DexFile& dex_file, + dex::TypeIndex type_index, + Handle<mirror::Class> handle) { + ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); + return jit_class_patches_.GetOrCreate( + TypeReference(&dex_file, type_index), + [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); }); +} + +void CodeGeneratorRISCV64::PatchJitRootUse(uint8_t* code, + const uint8_t* roots_data, + const Literal* literal, + uint64_t index_in_table) const { + uint32_t literal_offset = GetAssembler().GetLabelLocation(literal->GetLabel()); + uintptr_t address = + reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); + reinterpret_cast<uint32_t*>(code + literal_offset)[0] = dchecked_integral_cast<uint32_t>(address); +} + +void CodeGeneratorRISCV64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { + for (const auto& entry : jit_string_patches_) { + const StringReference& string_reference = entry.first; + Literal* table_entry_literal = entry.second; + uint64_t index_in_table = GetJitStringRootIndex(string_reference); + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); + } + for (const auto& entry : jit_class_patches_) { + const TypeReference& type_reference = entry.first; + Literal* table_entry_literal = entry.second; + uint64_t index_in_table = GetJitClassRootIndex(type_reference); + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); + } +} + +void CodeGeneratorRISCV64::EmitPcRelativeAuipcPlaceholder(PcRelativePatchInfo* info_high, + XRegister out) { + DCHECK(info_high->pc_insn_label == &info_high->label); + __ Bind(&info_high->label); + __ Auipc(out, /*imm20=*/ kLinkTimeOffsetPlaceholderHigh); +} + +void CodeGeneratorRISCV64::EmitPcRelativeAddiPlaceholder(PcRelativePatchInfo* info_low, + XRegister rd, + XRegister rs1) { + DCHECK(info_low->pc_insn_label != &info_low->label); + __ Bind(&info_low->label); + __ Addi(rd, rs1, /*imm12=*/ kLinkTimeOffsetPlaceholderLow); +} + +void CodeGeneratorRISCV64::EmitPcRelativeLwuPlaceholder(PcRelativePatchInfo* info_low, + XRegister rd, + XRegister rs1) { + DCHECK(info_low->pc_insn_label != &info_low->label); + __ Bind(&info_low->label); + __ Lwu(rd, rs1, /*offset=*/ kLinkTimeOffsetPlaceholderLow); +} + +void CodeGeneratorRISCV64::EmitPcRelativeLdPlaceholder(PcRelativePatchInfo* info_low, + XRegister rd, + XRegister rs1) { + DCHECK(info_low->pc_insn_label != &info_low->label); + __ Bind(&info_low->label); + __ Ld(rd, rs1, /*offset=*/ kLinkTimeOffsetPlaceholderLow); +} + +template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorRISCV64::EmitPcRelativeLinkerPatches( + const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<linker::LinkerPatch>* linker_patches) { + for (const PcRelativePatchInfo& info : infos) { + linker_patches->push_back(Factory(__ GetLabelLocation(&info.label), + info.target_dex_file, + __ GetLabelLocation(info.pc_insn_label), + info.offset_or_index)); + } +} + +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); +} + +void CodeGeneratorRISCV64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { + DCHECK(linker_patches->empty()); + size_t size = + boot_image_method_patches_.size() + + method_bss_entry_patches_.size() + + boot_image_type_patches_.size() + + type_bss_entry_patches_.size() + + public_type_bss_entry_patches_.size() + + package_type_bss_entry_patches_.size() + + boot_image_string_patches_.size() + + string_bss_entry_patches_.size() + + boot_image_jni_entrypoint_patches_.size() + + boot_image_other_patches_.size(); + linker_patches->reserve(size); + if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) { + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( + boot_image_method_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( + boot_image_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( + boot_image_string_patches_, linker_patches); + } else { + DCHECK(boot_image_method_patches_.empty()); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); + } + if (GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_other_patches_, linker_patches); + } else { + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_other_patches_, linker_patches); + } + EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( + method_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( + type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>( + public_type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>( + package_type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( + string_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>( + boot_image_jni_entrypoint_patches_, linker_patches); + DCHECK_EQ(size, linker_patches->size()); +} + +void CodeGeneratorRISCV64::LoadTypeForBootImageIntrinsic(XRegister dest, + TypeReference target_type) { + // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()); + PcRelativePatchInfo* info_high = + NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex()); + EmitPcRelativeAuipcPlaceholder(info_high, dest); + PcRelativePatchInfo* info_low = + NewBootImageTypePatch(*target_type.dex_file, target_type.TypeIndex(), info_high); + EmitPcRelativeAddiPlaceholder(info_low, dest, dest); +} + +void CodeGeneratorRISCV64::LoadBootImageRelRoEntry(XRegister dest, uint32_t boot_image_offset) { + PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset); + EmitPcRelativeAuipcPlaceholder(info_high, dest); + PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high); + // Note: Boot image is in the low 4GiB and the entry is always 32-bit, so emit a 32-bit load. + EmitPcRelativeLwuPlaceholder(info_low, dest, dest); +} + +void CodeGeneratorRISCV64::LoadBootImageAddress(XRegister dest, uint32_t boot_image_reference) { + if (GetCompilerOptions().IsBootImage()) { + PcRelativePatchInfo* info_high = NewBootImageIntrinsicPatch(boot_image_reference); + EmitPcRelativeAuipcPlaceholder(info_high, dest); + PcRelativePatchInfo* info_low = NewBootImageIntrinsicPatch(boot_image_reference, info_high); + EmitPcRelativeAddiPlaceholder(info_low, dest, dest); + } else if (GetCompilerOptions().GetCompilePic()) { + LoadBootImageRelRoEntry(dest, boot_image_reference); + } else { + DCHECK(GetCompilerOptions().IsJitCompiler()); + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; + // Note: Boot image is in the low 4GiB (usually the low 2GiB, requiring just LUI+ADDI). + // We may not have an available scratch register for `LoadConst64()` but it never + // emits better code than `Li()` for 32-bit unsigned constants anyway. + __ Li(dest, reinterpret_cast32<uint32_t>(address)); + } +} + +void CodeGeneratorRISCV64::LoadIntrinsicDeclaringClass(XRegister dest, HInvoke* invoke) { + DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone); + if (GetCompilerOptions().IsBootImage()) { + MethodReference target_method = invoke->GetResolvedMethodReference(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + LoadTypeForBootImageIntrinsic(dest, TypeReference(target_method.dex_file, type_idx)); + } else { + uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke); + LoadBootImageAddress(dest, boot_image_offset); + } +} + +void CodeGeneratorRISCV64::LoadClassRootForIntrinsic(XRegister dest, ClassRoot class_root) { + if (GetCompilerOptions().IsBootImage()) { + ScopedObjectAccess soa(Thread::Current()); + ObjPtr<mirror::Class> klass = GetClassRoot(class_root); + TypeReference target_type(&klass->GetDexFile(), klass->GetDexTypeIndex()); + LoadTypeForBootImageIntrinsic(dest, target_type); + } else { + uint32_t boot_image_offset = GetBootImageOffset(class_root); + LoadBootImageAddress(dest, boot_image_offset); + } +} + +void CodeGeneratorRISCV64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) { + switch (load_kind) { + case MethodLoadKind::kBootImageLinkTimePcRelative: { + DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = + NewBootImageMethodPatch(invoke->GetResolvedMethodReference()); + EmitPcRelativeAuipcPlaceholder(info_high, temp.AsRegister<XRegister>()); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = + NewBootImageMethodPatch(invoke->GetResolvedMethodReference(), info_high); + EmitPcRelativeAddiPlaceholder( + info_low, temp.AsRegister<XRegister>(), temp.AsRegister<XRegister>()); + break; + } + case MethodLoadKind::kBootImageRelRo: { + uint32_t boot_image_offset = GetBootImageOffset(invoke); + LoadBootImageRelRoEntry(temp.AsRegister<XRegister>(), boot_image_offset); + break; + } + case MethodLoadKind::kBssEntry: { + PcRelativePatchInfo* info_high = NewMethodBssEntryPatch(invoke->GetMethodReference()); + EmitPcRelativeAuipcPlaceholder(info_high, temp.AsRegister<XRegister>()); + PcRelativePatchInfo* info_low = + NewMethodBssEntryPatch(invoke->GetMethodReference(), info_high); + EmitPcRelativeLdPlaceholder( + info_low, temp.AsRegister<XRegister>(), temp.AsRegister<XRegister>()); + break; + } + case MethodLoadKind::kJitDirectAddress: { + __ LoadConst64(temp.AsRegister<XRegister>(), + reinterpret_cast<uint64_t>(invoke->GetResolvedMethod())); + break; + } + case MethodLoadKind::kRuntimeCall: { + // Test situation, don't do anything. + break; + } + default: { + LOG(FATAL) << "Load kind should have already been handled " << load_kind; + UNREACHABLE(); + } + } +} + +void CodeGeneratorRISCV64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, + Location temp, + SlowPathCode* slow_path) { + // All registers are assumed to be correctly set up per the calling convention. + Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. + + switch (invoke->GetMethodLoadKind()) { + case MethodLoadKind::kStringInit: { + // temp = thread->string_init_entrypoint + uint32_t offset = + GetThreadOffset<kRiscv64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); + __ Loadd(temp.AsRegister<XRegister>(), TR, offset); + break; + } + case MethodLoadKind::kRecursive: + callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex()); + break; + case MethodLoadKind::kRuntimeCall: + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. + case MethodLoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()); + if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) { + // Do not materialize the method pointer, load directly the entrypoint. + CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = + NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference()); + EmitPcRelativeAuipcPlaceholder(info_high, RA); + CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = + NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference(), info_high); + EmitPcRelativeLdPlaceholder(info_low, RA, RA); + break; + } + FALLTHROUGH_INTENDED; + default: + LoadMethod(invoke->GetMethodLoadKind(), temp, invoke); + break; + } + + switch (invoke->GetCodePtrLocation()) { + case CodePtrLocation::kCallSelf: + DCHECK(!GetGraph()->HasShouldDeoptimizeFlag()); + __ Jal(&frame_entry_label_); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + break; + case CodePtrLocation::kCallArtMethod: + // RA = callee_method->entry_point_from_quick_compiled_code_; + __ Loadd(RA, + callee_method.AsRegister<XRegister>(), + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kRiscv64PointerSize).Int32Value()); + // RA() + __ Jalr(RA); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + break; + case CodePtrLocation::kCallCriticalNative: { + size_t out_frame_size = + PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorRiscv64, + kNativeStackAlignment, + GetCriticalNativeDirectCallFrameSize>(invoke); + if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) { + // Entrypoint is already loaded in RA. + } else { + // RA = callee_method->ptr_sized_fields_.data_; // EntryPointFromJni + MemberOffset offset = ArtMethod::EntryPointFromJniOffset(kRiscv64PointerSize); + __ Loadd(RA, callee_method.AsRegister<XRegister>(), offset.Int32Value()); + } + __ Jalr(RA); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + // The result is returned the same way in native ABI and managed ABI. No result conversion is + // needed, see comments in `Riscv64JniCallingConvention::RequiresSmallResultTypeExtension()`. + if (out_frame_size != 0u) { + DecreaseFrame(out_frame_size); + } + break; + } + } + + DCHECK(!IsLeafMethod()); +} + +void CodeGeneratorRISCV64::MaybeGenerateInlineCacheCheck(HInstruction* instruction, + XRegister klass) { + if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) { + ProfilingInfo* info = GetGraph()->GetProfilingInfo(); + DCHECK(info != nullptr); + InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(info, instruction->AsInvoke()); + if (cache != nullptr) { + uint64_t address = reinterpret_cast64<uint64_t>(cache); + Riscv64Label done; + // The `art_quick_update_inline_cache` expects the inline cache in T5. + XRegister ic_reg = T5; + ScratchRegisterScope srs(GetAssembler()); + DCHECK_EQ(srs.AvailableXRegisters(), 2u); + srs.ExcludeXRegister(ic_reg); + DCHECK_EQ(srs.AvailableXRegisters(), 1u); + __ LoadConst64(ic_reg, address); + { + ScratchRegisterScope srs2(GetAssembler()); + XRegister tmp = srs2.AllocateXRegister(); + __ Loadd(tmp, ic_reg, InlineCache::ClassesOffset().Int32Value()); + // Fast path for a monomorphic cache. + __ Beq(klass, tmp, &done); + } + InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc()); + __ Bind(&done); + } else { + // This is unexpected, but we don't guarantee stable compilation across + // JIT runs so just warn about it. + ScopedObjectAccess soa(Thread::Current()); + LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod(); + } + } +} + +void CodeGeneratorRISCV64::GenerateVirtualCall(HInvokeVirtual* invoke, + Location temp_location, + SlowPathCode* slow_path) { + // Use the calling convention instead of the location of the receiver, as + // intrinsics may have put the receiver in a different register. In the intrinsics + // slow path, the arguments have been moved to the right place, so here we are + // guaranteed that the receiver is the first register of the calling convention. + InvokeDexCallingConvention calling_convention; + XRegister receiver = calling_convention.GetRegisterAt(0); + XRegister temp = temp_location.AsRegister<XRegister>(); + MemberOffset method_offset = + mirror::Class::EmbeddedVTableEntryOffset(invoke->GetVTableIndex(), kRiscv64PointerSize); + MemberOffset class_offset = mirror::Object::ClassOffset(); + Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kRiscv64PointerSize); + + // temp = object->GetClass(); + __ Loadwu(temp, receiver, class_offset.Int32Value()); + MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). + MaybeUnpoisonHeapReference(temp); + + // If we're compiling baseline, update the inline cache. + MaybeGenerateInlineCacheCheck(invoke, temp); + + // temp = temp->GetMethodAt(method_offset); + __ Loadd(temp, temp, method_offset.Int32Value()); + // RA = temp->GetEntryPoint(); + __ Loadd(RA, temp, entry_point.Int32Value()); + // RA(); + __ Jalr(RA); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); +} + +void CodeGeneratorRISCV64::MoveFromReturnRegister(Location trg, DataType::Type type) { + if (!trg.IsValid()) { + DCHECK_EQ(type, DataType::Type::kVoid); + return; + } + + DCHECK_NE(type, DataType::Type::kVoid); + + if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) { + XRegister trg_reg = trg.AsRegister<XRegister>(); + XRegister res_reg = Riscv64ReturnLocation(type).AsRegister<XRegister>(); + if (trg_reg != res_reg) { + __ Mv(trg_reg, res_reg); + } + } else { + FRegister trg_reg = trg.AsFpuRegister<FRegister>(); + FRegister res_reg = Riscv64ReturnLocation(type).AsFpuRegister<FRegister>(); + if (trg_reg != res_reg) { + __ FMvD(trg_reg, res_reg); // 64-bit move is OK also for `float`. + } + } +} + +void CodeGeneratorRISCV64::PoisonHeapReference(XRegister reg) { + __ Sub(reg, Zero, reg); // Negate the ref. + __ ZextW(reg, reg); // Zero-extend the 32-bit ref. +} + +void CodeGeneratorRISCV64::UnpoisonHeapReference(XRegister reg) { + __ Sub(reg, Zero, reg); // Negate the ref. + __ ZextW(reg, reg); // Zero-extend the 32-bit ref. +} + +void CodeGeneratorRISCV64::MaybePoisonHeapReference(XRegister reg) { + if (kPoisonHeapReferences) { + PoisonHeapReference(reg); + } +} + +void CodeGeneratorRISCV64::MaybeUnpoisonHeapReference(XRegister reg) { + if (kPoisonHeapReferences) { + UnpoisonHeapReference(reg); + } +} + +void CodeGeneratorRISCV64::SwapLocations(Location loc1, Location loc2, DataType::Type type) { + DCHECK(!loc1.IsConstant()); + DCHECK(!loc2.IsConstant()); + + if (loc1.Equals(loc2)) { + return; + } + + bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot(); + bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot(); + bool is_simd1 = loc1.IsSIMDStackSlot(); + bool is_simd2 = loc2.IsSIMDStackSlot(); + bool is_fp_reg1 = loc1.IsFpuRegister(); + bool is_fp_reg2 = loc2.IsFpuRegister(); + + if ((is_slot1 != is_slot2) || + (loc2.IsRegister() && loc1.IsRegister()) || + (is_fp_reg2 && is_fp_reg1)) { + if ((is_fp_reg2 && is_fp_reg1) && GetGraph()->HasSIMD()) { + LOG(FATAL) << "Unsupported"; + UNREACHABLE(); + } + ScratchRegisterScope srs(GetAssembler()); + Location tmp = (is_fp_reg2 || is_fp_reg1) + ? Location::FpuRegisterLocation(srs.AllocateFRegister()) + : Location::RegisterLocation(srs.AllocateXRegister()); + MoveLocation(tmp, loc1, type); + MoveLocation(loc1, loc2, type); + MoveLocation(loc2, tmp, type); + } else if (is_slot1 && is_slot2) { + move_resolver_.Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), loc1.IsDoubleStackSlot()); + } else if (is_simd1 && is_simd2) { + // TODO(riscv64): Add VECTOR/SIMD later. + UNIMPLEMENTED(FATAL) << "Vector extension is unsupported"; + } else if ((is_fp_reg1 && is_simd2) || (is_fp_reg2 && is_simd1)) { + // TODO(riscv64): Add VECTOR/SIMD later. + UNIMPLEMENTED(FATAL) << "Vector extension is unsupported"; + } else { + LOG(FATAL) << "Unimplemented swap between locations " << loc1 << " and " << loc2; + } +} + +} // namespace riscv64 +} // namespace art diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h index 405b39aa0a..1e0eb51258 100644 --- a/compiler/optimizing/code_generator_riscv64.h +++ b/compiler/optimizing/code_generator_riscv64.h @@ -17,7 +17,827 @@ #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_ +#include "android-base/logging.h" +#include "arch/riscv64/registers_riscv64.h" +#include "base/macros.h" #include "code_generator.h" #include "driver/compiler_options.h" +#include "intrinsics_list.h" +#include "optimizing/locations.h" +#include "parallel_move_resolver.h" +#include "utils/riscv64/assembler_riscv64.h" + +namespace art HIDDEN { +namespace riscv64 { + +// InvokeDexCallingConvention registers +static constexpr XRegister kParameterCoreRegisters[] = {A1, A2, A3, A4, A5, A6, A7}; +static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); + +static constexpr FRegister kParameterFpuRegisters[] = {FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7}; +static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters); + +// InvokeRuntimeCallingConvention registers +static constexpr XRegister kRuntimeParameterCoreRegisters[] = {A0, A1, A2, A3, A4, A5, A6, A7}; +static constexpr size_t kRuntimeParameterCoreRegistersLength = + arraysize(kRuntimeParameterCoreRegisters); + +static constexpr FRegister kRuntimeParameterFpuRegisters[] = { + FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7 +}; +static constexpr size_t kRuntimeParameterFpuRegistersLength = + arraysize(kRuntimeParameterFpuRegisters); + +// FCLASS returns a 10-bit classification mask with the two highest bits marking NaNs +// (signaling and quiet). To detect a NaN, we can compare (either BGE or BGEU, the sign +// bit is always clear) the result with the `kFClassNaNMinValue`. +static_assert(kSignalingNaN == 0x100); +static_assert(kQuietNaN == 0x200); +static constexpr int32_t kFClassNaNMinValue = 0x100; + +#define UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(V) \ + V(SystemArrayCopyByte) \ + V(SystemArrayCopyChar) \ + V(SystemArrayCopyInt) \ + V(FP16Ceil) \ + V(FP16Compare) \ + V(FP16Floor) \ + V(FP16Rint) \ + V(FP16ToFloat) \ + V(FP16ToHalf) \ + V(FP16Greater) \ + V(FP16GreaterEquals) \ + V(FP16Less) \ + V(FP16LessEquals) \ + V(FP16Min) \ + V(FP16Max) \ + V(StringCompareTo) \ + V(StringEquals) \ + V(StringGetCharsNoCheck) \ + V(StringStringIndexOf) \ + V(StringStringIndexOfAfter) \ + V(StringNewStringFromBytes) \ + V(StringNewStringFromChars) \ + V(StringNewStringFromString) \ + V(StringBufferAppend) \ + V(StringBufferLength) \ + V(StringBufferToString) \ + V(StringBuilderAppendObject) \ + V(StringBuilderAppendString) \ + V(StringBuilderAppendCharSequence) \ + V(StringBuilderAppendCharArray) \ + V(StringBuilderAppendBoolean) \ + V(StringBuilderAppendChar) \ + V(StringBuilderAppendInt) \ + V(StringBuilderAppendLong) \ + V(StringBuilderAppendFloat) \ + V(StringBuilderAppendDouble) \ + V(StringBuilderLength) \ + V(StringBuilderToString) \ + V(ThreadInterrupted) \ + V(CRC32Update) \ + V(CRC32UpdateBytes) \ + V(CRC32UpdateByteBuffer) \ + V(MethodHandleInvokeExact) \ + V(MethodHandleInvoke) + +// Method register on invoke. +static const XRegister kArtMethodRegister = A0; + +// Helper functions used by codegen as well as intrinsics. +XRegister InputXRegisterOrZero(Location location); +int32_t ReadBarrierMarkEntrypointOffset(Location ref); + +class CodeGeneratorRISCV64; + +class InvokeRuntimeCallingConvention : public CallingConvention<XRegister, FRegister> { + public: + InvokeRuntimeCallingConvention() + : CallingConvention(kRuntimeParameterCoreRegisters, + kRuntimeParameterCoreRegistersLength, + kRuntimeParameterFpuRegisters, + kRuntimeParameterFpuRegistersLength, + kRiscv64PointerSize) {} + + Location GetReturnLocation(DataType::Type return_type); + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); +}; + +class InvokeDexCallingConvention : public CallingConvention<XRegister, FRegister> { + public: + InvokeDexCallingConvention() + : CallingConvention(kParameterCoreRegisters, + kParameterCoreRegistersLength, + kParameterFpuRegisters, + kParameterFpuRegistersLength, + kRiscv64PointerSize) {} + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); +}; + +class InvokeDexCallingConventionVisitorRISCV64 : public InvokeDexCallingConventionVisitor { + public: + InvokeDexCallingConventionVisitorRISCV64() {} + virtual ~InvokeDexCallingConventionVisitorRISCV64() {} + + Location GetNextLocation(DataType::Type type) override; + Location GetReturnLocation(DataType::Type type) const override; + Location GetMethodLocation() const override; + + private: + InvokeDexCallingConvention calling_convention; + + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorRISCV64); +}; + +class CriticalNativeCallingConventionVisitorRiscv64 : public InvokeDexCallingConventionVisitor { + public: + explicit CriticalNativeCallingConventionVisitorRiscv64(bool for_register_allocation) + : for_register_allocation_(for_register_allocation) {} + + virtual ~CriticalNativeCallingConventionVisitorRiscv64() {} + + Location GetNextLocation(DataType::Type type) override; + Location GetReturnLocation(DataType::Type type) const override; + Location GetMethodLocation() const override; + + size_t GetStackOffset() const { return stack_offset_; } + + private: + // Register allocator does not support adjusting frame size, so we cannot provide final locations + // of stack arguments for register allocation. We ask the register allocator for any location and + // move these arguments to the right place after adjusting the SP when generating the call. + const bool for_register_allocation_; + size_t gpr_index_ = 0u; + size_t fpr_index_ = 0u; + size_t stack_offset_ = 0u; + + DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorRiscv64); +}; + +class SlowPathCodeRISCV64 : public SlowPathCode { + public: + explicit SlowPathCodeRISCV64(HInstruction* instruction) + : SlowPathCode(instruction), entry_label_(), exit_label_() {} + + Riscv64Label* GetEntryLabel() { return &entry_label_; } + Riscv64Label* GetExitLabel() { return &exit_label_; } + + private: + Riscv64Label entry_label_; + Riscv64Label exit_label_; + + DISALLOW_COPY_AND_ASSIGN(SlowPathCodeRISCV64); +}; + +class ParallelMoveResolverRISCV64 : public ParallelMoveResolverWithSwap { + public: + ParallelMoveResolverRISCV64(ArenaAllocator* allocator, CodeGeneratorRISCV64* codegen) + : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} + + void EmitMove(size_t index) override; + void EmitSwap(size_t index) override; + void SpillScratch(int reg) override; + void RestoreScratch(int reg) override; + + void Exchange(int index1, int index2, bool double_slot); + + Riscv64Assembler* GetAssembler() const; + + private: + CodeGeneratorRISCV64* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverRISCV64); +}; + +class FieldAccessCallingConventionRISCV64 : public FieldAccessCallingConvention { + public: + FieldAccessCallingConventionRISCV64() {} + + Location GetObjectLocation() const override { + return Location::RegisterLocation(A1); + } + Location GetFieldIndexLocation() const override { + return Location::RegisterLocation(A0); + } + Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { + return Location::RegisterLocation(A0); + } + Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, + bool is_instance) const override { + return is_instance + ? Location::RegisterLocation(A2) + : Location::RegisterLocation(A1); + } + Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { + return Location::FpuRegisterLocation(FA0); + } + + private: + DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionRISCV64); +}; + +class LocationsBuilderRISCV64 : public HGraphVisitor { + public: + LocationsBuilderRISCV64(HGraph* graph, CodeGeneratorRISCV64* codegen) + : HGraphVisitor(graph), codegen_(codegen) {} + +#define DECLARE_VISIT_INSTRUCTION(name, super) void Visit##name(H##name* instr) override; + + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_RISCV64(DECLARE_VISIT_INSTRUCTION) + +#undef DECLARE_VISIT_INSTRUCTION + + void VisitInstruction(HInstruction* instruction) override { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " + << instruction->GetId() << ")"; + } + + protected: + void HandleInvoke(HInvoke* invoke); + void HandleBinaryOp(HBinaryOperation* operation); + void HandleCondition(HCondition* instruction); + void HandleShift(HBinaryOperation* operation); + void HandleFieldSet(HInstruction* instruction); + void HandleFieldGet(HInstruction* instruction); + + InvokeDexCallingConventionVisitorRISCV64 parameter_visitor_; + + CodeGeneratorRISCV64* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(LocationsBuilderRISCV64); +}; + +class InstructionCodeGeneratorRISCV64 : public InstructionCodeGenerator { + public: + InstructionCodeGeneratorRISCV64(HGraph* graph, CodeGeneratorRISCV64* codegen); + +#define DECLARE_VISIT_INSTRUCTION(name, super) void Visit##name(H##name* instr) override; + + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_RISCV64(DECLARE_VISIT_INSTRUCTION) + +#undef DECLARE_VISIT_INSTRUCTION + + void VisitInstruction(HInstruction* instruction) override { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " + << instruction->GetId() << ")"; + } + + Riscv64Assembler* GetAssembler() const { return assembler_; } + + void GenerateMemoryBarrier(MemBarrierKind kind); + + void FAdd(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FClass(XRegister rd, FRegister rs1, DataType::Type type); + + void Load(Location out, XRegister rs1, int32_t offset, DataType::Type type); + void Store(Location value, XRegister rs1, int32_t offset, DataType::Type type); + + // Sequentially consistent store. Used for volatile fields and intrinsics. + // The `instruction` argument is for recording an implicit null check stack map with the + // store instruction which may not be the last instruction emitted by `StoreSeqCst()`. + void StoreSeqCst(Location value, + XRegister rs1, + int32_t offset, + DataType::Type type, + HInstruction* instruction = nullptr); + + void ShNAdd(XRegister rd, XRegister rs1, XRegister rs2, DataType::Type type); + + protected: + void GenerateClassInitializationCheck(SlowPathCodeRISCV64* slow_path, XRegister class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, XRegister temp); + void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); + void HandleBinaryOp(HBinaryOperation* operation); + void HandleCondition(HCondition* instruction); + void HandleShift(HBinaryOperation* operation); + void HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + bool value_can_be_null, + WriteBarrierKind write_barrier_kind); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + + // Generate a heap reference load using one register `out`: + // + // out <- *(out + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a read barrier and + // shall be a register in that case; it may be an invalid location + // otherwise. + void GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option); + // Generate a heap reference load using two different registers + // `out` and `obj`: + // + // out <- *(obj + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a Baker's (fast + // path) read barrier and shall be a register in that case; it may + // be an invalid location otherwise. + void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option); + + void GenerateTestAndBranch(HInstruction* instruction, + size_t condition_input_index, + Riscv64Label* true_target, + Riscv64Label* false_target); + void DivRemOneOrMinusOne(HBinaryOperation* instruction); + void DivRemByPowerOfTwo(HBinaryOperation* instruction); + void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); + void GenerateDivRemIntegral(HBinaryOperation* instruction); + void GenerateIntLongCondition(IfCondition cond, LocationSummary* locations); + void GenerateIntLongCondition(IfCondition cond, + LocationSummary* locations, + XRegister rd, + bool to_all_bits); + void GenerateIntLongCompareAndBranch(IfCondition cond, + LocationSummary* locations, + Riscv64Label* label); + void GenerateFpCondition(IfCondition cond, + bool gt_bias, + DataType::Type type, + LocationSummary* locations, + Riscv64Label* label = nullptr); + void GenerateFpCondition(IfCondition cond, + bool gt_bias, + DataType::Type type, + LocationSummary* locations, + Riscv64Label* label, + XRegister rd, + bool to_all_bits); + void GenerateMethodEntryExitHook(HInstruction* instruction); + void HandleGoto(HInstruction* got, HBasicBlock* successor); + void GenPackedSwitchWithCompares(XRegister adjusted, + XRegister temp, + uint32_t num_entries, + HBasicBlock* switch_block); + void GenTableBasedPackedSwitch(XRegister adjusted, + XRegister temp, + uint32_t num_entries, + HBasicBlock* switch_block); + int32_t VecAddress(LocationSummary* locations, + size_t size, + /*out*/ XRegister* adjusted_base); + + template <typename Reg, + void (Riscv64Assembler::*opS)(Reg, FRegister, FRegister), + void (Riscv64Assembler::*opD)(Reg, FRegister, FRegister)> + void FpBinOp(Reg rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FSub(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FDiv(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FMul(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FMin(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FMax(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FEq(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FLt(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FLe(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + + template <typename Reg, + void (Riscv64Assembler::*opS)(Reg, FRegister), + void (Riscv64Assembler::*opD)(Reg, FRegister)> + void FpUnOp(Reg rd, FRegister rs1, DataType::Type type); + void FAbs(FRegister rd, FRegister rs1, DataType::Type type); + void FNeg(FRegister rd, FRegister rs1, DataType::Type type); + void FMv(FRegister rd, FRegister rs1, DataType::Type type); + void FMvX(XRegister rd, FRegister rs1, DataType::Type type); + + Riscv64Assembler* const assembler_; + CodeGeneratorRISCV64* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorRISCV64); +}; + +class CodeGeneratorRISCV64 : public CodeGenerator { + public: + CodeGeneratorRISCV64(HGraph* graph, + const CompilerOptions& compiler_options, + OptimizingCompilerStats* stats = nullptr); + virtual ~CodeGeneratorRISCV64() {} + + void GenerateFrameEntry() override; + void GenerateFrameExit() override; + + void Bind(HBasicBlock* block) override; + + size_t GetWordSize() const override { + // The "word" for the compiler is the core register size (64-bit for riscv64) while the + // riscv64 assembler uses "word" for 32-bit values and "double word" for 64-bit values. + return kRiscv64DoublewordSize; + } + + bool SupportsPredicatedSIMD() const override { + // TODO(riscv64): Check the vector extension. + return false; + } + + // Get FP register width in bytes for spilling/restoring in the slow paths. + // + // Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers + // alias and live SIMD registers are forced to be spilled in full size in the slow paths. + size_t GetSlowPathFPWidth() const override { + // Default implementation. + return GetCalleePreservedFPWidth(); + } + + size_t GetCalleePreservedFPWidth() const override { + return kRiscv64FloatRegSizeInBytes; + }; + + size_t GetSIMDRegisterWidth() const override { + // TODO(riscv64): Implement SIMD with the Vector extension. + // Note: HLoopOptimization calls this function even for an ISA without SIMD support. + return kRiscv64FloatRegSizeInBytes; + }; + + uintptr_t GetAddressOf(HBasicBlock* block) override { + return assembler_.GetLabelLocation(GetLabelOf(block)); + }; + + Riscv64Label* GetLabelOf(HBasicBlock* block) const { + return CommonGetLabelOf<Riscv64Label>(block_labels_, block); + } + + void Initialize() override { block_labels_ = CommonInitializeLabels<Riscv64Label>(); } + + void MoveConstant(Location destination, int32_t value) override; + void MoveLocation(Location destination, Location source, DataType::Type dst_type) override; + void AddLocationAsTemp(Location location, LocationSummary* locations) override; + + Riscv64Assembler* GetAssembler() override { return &assembler_; } + const Riscv64Assembler& GetAssembler() const override { return assembler_; } + + HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } + + InstructionCodeGeneratorRISCV64* GetInstructionVisitor() override { + return &instruction_visitor_; + } + + void MaybeGenerateInlineCacheCheck(HInstruction* instruction, XRegister klass); + + void SetupBlockedRegisters() const override; + + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; + + void DumpCoreRegister(std::ostream& stream, int reg) const override; + void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; + + InstructionSet GetInstructionSet() const override { return InstructionSet::kRiscv64; } + + const Riscv64InstructionSetFeatures& GetInstructionSetFeatures() const; + + uint32_t GetPreferredSlotsAlignment() const override { + return static_cast<uint32_t>(kRiscv64PointerSize); + } + + void Finalize() override; + + // Generate code to invoke a runtime entry point. + void InvokeRuntime(QuickEntrypointEnum entrypoint, + HInstruction* instruction, + uint32_t dex_pc, + SlowPathCode* slow_path = nullptr) override; + + // Generate code to invoke a runtime entry point, but do not record + // PC-related information in a stack map. + void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path); + + ParallelMoveResolver* GetMoveResolver() override { return &move_resolver_; } + + bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; } + + void IncreaseFrame(size_t adjustment) override; + void DecreaseFrame(size_t adjustment) override; + + void GenerateNop() override; + + void GenerateImplicitNullCheck(HNullCheck* instruction) override; + void GenerateExplicitNullCheck(HNullCheck* instruction) override; + + // Check if the desired_string_load_kind is supported. If it is, return it, + // otherwise return a fall-back kind that should be used instead. + HLoadString::LoadKind GetSupportedLoadStringKind( + HLoadString::LoadKind desired_string_load_kind) override; + + // Check if the desired_class_load_kind is supported. If it is, return it, + // otherwise return a fall-back kind that should be used instead. + HLoadClass::LoadKind GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) override; + + // Check if the desired_dispatch_info is supported. If it is, return it, + // otherwise return a fall-back info that should be used instead. + HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( + const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, ArtMethod* method) override; + + // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, + // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. + // + // The 20-bit and 12-bit parts of the 32-bit PC-relative offset are patched separately, + // necessitating two patches/infos. There can be more than two patches/infos if the + // instruction supplying the high part is shared with e.g. a slow path, while the low + // part is supplied by separate instructions, e.g.: + // auipc r1, high // patch + // lwu r2, low(r1) // patch + // beqz r2, slow_path + // back: + // ... + // slow_path: + // ... + // sw r2, low(r1) // patch + // j back + struct PcRelativePatchInfo : PatchInfo<Riscv64Label> { + PcRelativePatchInfo(const DexFile* dex_file, + uint32_t off_or_idx, + const PcRelativePatchInfo* info_high) + : PatchInfo<Riscv64Label>(dex_file, off_or_idx), + pc_insn_label(info_high != nullptr ? &info_high->label : &label) { + DCHECK_IMPLIES(info_high != nullptr, info_high->pc_insn_label == &info_high->label); + } + + // Pointer to the info for the high part patch or nullptr if this is the high part patch info. + const Riscv64Label* pc_insn_label; + + private: + PcRelativePatchInfo(PcRelativePatchInfo&& other) = delete; + DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); + }; + + PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageJniEntrypointPatch( + MethodReference target_method, const PcRelativePatchInfo* info_high = nullptr); + + PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, + dex::TypeIndex type_index, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewTypeBssEntryPatch(HLoadClass* load_class, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageStringPatch(const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high = nullptr); + + void EmitPcRelativeAuipcPlaceholder(PcRelativePatchInfo* info_high, XRegister out); + void EmitPcRelativeAddiPlaceholder(PcRelativePatchInfo* info_low, XRegister rd, XRegister rs1); + void EmitPcRelativeLwuPlaceholder(PcRelativePatchInfo* info_low, XRegister rd, XRegister rs1); + void EmitPcRelativeLdPlaceholder(PcRelativePatchInfo* info_low, XRegister rd, XRegister rs1); + + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; + + Literal* DeduplicateBootImageAddressLiteral(uint64_t address); + void PatchJitRootUse(uint8_t* code, + const uint8_t* roots_data, + const Literal* literal, + uint64_t index_in_table) const; + Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, + dex::StringIndex string_index, + Handle<mirror::String> handle); + Literal* DeduplicateJitClassLiteral(const DexFile& dex_file, + dex::TypeIndex type_index, + Handle<mirror::Class> handle); + void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; + + void LoadTypeForBootImageIntrinsic(XRegister dest, TypeReference target_type); + void LoadBootImageRelRoEntry(XRegister dest, uint32_t boot_image_offset); + void LoadBootImageAddress(XRegister dest, uint32_t boot_image_reference); + void LoadIntrinsicDeclaringClass(XRegister dest, HInvoke* invoke); + void LoadClassRootForIntrinsic(XRegister dest, ClassRoot class_root); + + void LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke); + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, + Location temp, + SlowPathCode* slow_path = nullptr) override; + void GenerateVirtualCall(HInvokeVirtual* invoke, + Location temp, + SlowPathCode* slow_path = nullptr) override; + void MoveFromReturnRegister(Location trg, DataType::Type type) override; + + void GenerateMemoryBarrier(MemBarrierKind kind); + + void MaybeIncrementHotness(bool is_frame_entry); + + bool CanUseImplicitSuspendCheck() const; + + + // Create slow path for a Baker read barrier for a GC root load within `instruction`. + SlowPathCodeRISCV64* AddGcRootBakerBarrierBarrierSlowPath( + HInstruction* instruction, Location root, Location temp); + + // Emit marking check for a Baker read barrier for a GC root load within `instruction`. + void EmitBakerReadBarierMarkingCheck( + SlowPathCodeRISCV64* slow_path, Location root, Location temp); + + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers (if any). + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + XRegister obj, + uint32_t offset, + ReadBarrierOption read_barrier_option, + Riscv64Label* label_low = nullptr); + + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + XRegister obj, + uint32_t offset, + Location temp, + bool needs_null_check); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference array load when Baker's read barriers are used. + void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + XRegister obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check); + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and intrinsics. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + XRegister obj, + uint32_t offset, + Location index, + Location temp, + bool needs_null_check); + + // Create slow path for a read barrier for a heap reference within `instruction`. + // + // This is a helper function for GenerateReadBarrierSlow() that has the same + // arguments. The creation and adding of the slow path is exposed for intrinsics + // that cannot use GenerateReadBarrierSlow() from their own slow paths. + SlowPathCodeRISCV64* AddReadBarrierSlowPath(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index); + + // Generate a read barrier for a heap reference within `instruction` + // using a slow path. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction` using + // a slow path. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); + + void MarkGCCard(XRegister object, XRegister value, bool value_can_be_null); + + // + // Heap poisoning. + // + + // Poison a heap reference contained in `reg`. + void PoisonHeapReference(XRegister reg); + + // Unpoison a heap reference contained in `reg`. + void UnpoisonHeapReference(XRegister reg); + + // Poison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybePoisonHeapReference(XRegister reg); + + // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybeUnpoisonHeapReference(XRegister reg); + + void SwapLocations(Location loc1, Location loc2, DataType::Type type); + + private: + using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>; + using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, Literal*>; + using StringToLiteralMap = + ArenaSafeMap<StringReference, Literal*, StringReferenceValueComparator>; + using TypeToLiteralMap = ArenaSafeMap<TypeReference, Literal*, TypeReferenceValueComparator>; + + Literal* DeduplicateUint32Literal(uint32_t value); + Literal* DeduplicateUint64Literal(uint64_t value); + + PcRelativePatchInfo* NewPcRelativePatch(const DexFile* dex_file, + uint32_t offset_or_index, + const PcRelativePatchInfo* info_high, + ArenaDeque<PcRelativePatchInfo>* patches); + + template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<linker::LinkerPatch>* linker_patches); + + Riscv64Assembler assembler_; + LocationsBuilderRISCV64 location_builder_; + InstructionCodeGeneratorRISCV64 instruction_visitor_; + Riscv64Label frame_entry_label_; + + // Labels for each block that will be compiled. + Riscv64Label* block_labels_; // Indexed by block id. + + ParallelMoveResolverRISCV64 move_resolver_; + + // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. + Uint32ToLiteralMap uint32_literals_; + // Deduplication map for 64-bit literals, used for non-patchable method address or method code + // address. + Uint64ToLiteralMap uint64_literals_; + + // PC-relative method patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; + // PC-relative method patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; + // PC-relative type patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; + // PC-relative type patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // PC-relative public type patch info for kBssEntryPublic. + ArenaDeque<PcRelativePatchInfo> public_type_bss_entry_patches_; + // PC-relative package type patch info for kBssEntryPackage. + ArenaDeque<PcRelativePatchInfo> package_type_bss_entry_patches_; + // PC-relative String patch info for kBootImageLinkTimePcRelative. + ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; + // PC-relative String patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; + // PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative. + ArenaDeque<PcRelativePatchInfo> boot_image_jni_entrypoint_patches_; + // PC-relative patch info for IntrinsicObjects for the boot image, + // and for method/type/string patches for kBootImageRelRo otherwise. + ArenaDeque<PcRelativePatchInfo> boot_image_other_patches_; + + // Patches for string root accesses in JIT compiled code. + StringToLiteralMap jit_string_patches_; + // Patches for class root accesses in JIT compiled code. + TypeToLiteralMap jit_class_patches_; +}; + +} // namespace riscv64 +} // namespace art #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_ diff --git a/compiler/optimizing/code_generator_vector_arm64_neon.cc b/compiler/optimizing/code_generator_vector_arm64_neon.cc index 6b6e25cf0c..848b5e7567 100644 --- a/compiler/optimizing/code_generator_vector_arm64_neon.cc +++ b/compiler/optimizing/code_generator_vector_arm64_neon.cc @@ -61,10 +61,8 @@ inline bool NEONCanEncodeConstantAsImmediate(HConstant* constant, HInstruction* // - constant location - if 'constant' is an actual constant and its value can be // encoded into the instruction. // - register location otherwise. -inline Location NEONEncodableConstantOrRegister(HInstruction* constant, - HInstruction* instr) { - if (constant->IsConstant() - && NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { +inline Location NEONEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) { + if (constant->IsConstant() && NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { return Location::ConstantLocation(constant); } @@ -1533,12 +1531,32 @@ void InstructionCodeGeneratorARM64Neon::VisitVecPredWhile(HVecPredWhile* instruc UNREACHABLE(); } -void LocationsBuilderARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) { +void LocationsBuilderARM64Neon::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); UNREACHABLE(); } -void InstructionCodeGeneratorARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) { +void InstructionCodeGeneratorARM64Neon::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void LocationsBuilderARM64Neon::VisitVecCondition(HVecCondition* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void InstructionCodeGeneratorARM64Neon::VisitVecCondition(HVecCondition* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void LocationsBuilderARM64Neon::VisitVecPredNot(HVecPredNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void InstructionCodeGeneratorARM64Neon::VisitVecPredNot(HVecPredNot* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); UNREACHABLE(); } diff --git a/compiler/optimizing/code_generator_vector_arm64_sve.cc b/compiler/optimizing/code_generator_vector_arm64_sve.cc index fe15791d3f..ef79932899 100644 --- a/compiler/optimizing/code_generator_vector_arm64_sve.cc +++ b/compiler/optimizing/code_generator_vector_arm64_sve.cc @@ -62,8 +62,7 @@ static bool SVECanEncodeConstantAsImmediate(HConstant* constant, HInstruction* i // encoded into the instruction. // - register location otherwise. inline Location SVEEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) { - if (constant->IsConstant() - && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { + if (constant->IsConstant() && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { return Location::ConstantLocation(constant); } @@ -246,7 +245,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecReduce(HVecReduce* instruction) { LocationSummary* locations = instruction->GetLocations(); const ZRegister src = ZRegisterFrom(locations->InAt(0)); const VRegister dst = DRegisterFrom(locations->Out()); - const PRegister p_reg = LoopPReg(); + const PRegister p_reg = GetVecGoverningPReg(instruction); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kInt32: @@ -284,7 +283,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecCnv(HVecCnv* instruction) { LocationSummary* locations = instruction->GetLocations(); const ZRegister src = ZRegisterFrom(locations->InAt(0)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); DataType::Type from = instruction->GetInputType(); DataType::Type to = instruction->GetResultType(); ValidateVectorLength(instruction); @@ -304,7 +303,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecNeg(HVecNeg* instruction) { LocationSummary* locations = instruction->GetLocations(); const ZRegister src = ZRegisterFrom(locations->InAt(0)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kUint8: @@ -342,7 +341,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAbs(HVecAbs* instruction) { LocationSummary* locations = instruction->GetLocations(); const ZRegister src = ZRegisterFrom(locations->InAt(0)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kInt8: @@ -378,7 +377,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecNot(HVecNot* instruction) { LocationSummary* locations = instruction->GetLocations(); const ZRegister src = ZRegisterFrom(locations->InAt(0)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kBool: // special case boolean-not @@ -438,7 +437,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAdd(HVecAdd* instruction) { const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister rhs = ZRegisterFrom(locations->InAt(1)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kUint8: @@ -497,7 +496,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecSub(HVecSub* instruction) { const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister rhs = ZRegisterFrom(locations->InAt(1)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kUint8: @@ -546,7 +545,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecMul(HVecMul* instruction) { const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister rhs = ZRegisterFrom(locations->InAt(1)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kUint8: @@ -585,7 +584,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecDiv(HVecDiv* instruction) { const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister rhs = ZRegisterFrom(locations->InAt(1)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); // Note: VIXL guarantees StrictNaNPropagation for Fdiv. @@ -633,7 +632,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAnd(HVecAnd* instruction) { const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister rhs = ZRegisterFrom(locations->InAt(1)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kBool: @@ -678,7 +677,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecOr(HVecOr* instruction) { const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister rhs = ZRegisterFrom(locations->InAt(1)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kBool: @@ -714,7 +713,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecXor(HVecXor* instruction) { const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister rhs = ZRegisterFrom(locations->InAt(1)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kBool: @@ -769,7 +768,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecShl(HVecShl* instruction) { LocationSummary* locations = instruction->GetLocations(); const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { @@ -802,7 +801,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecShr(HVecShr* instruction) { LocationSummary* locations = instruction->GetLocations(); const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { @@ -835,7 +834,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecUShr(HVecUShr* instruction) { LocationSummary* locations = instruction->GetLocations(); const ZRegister lhs = ZRegisterFrom(locations->InAt(0)); const ZRegister dst = ZRegisterFrom(locations->Out()); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { @@ -966,7 +965,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecMultiplyAccumulate( const ZRegister acc = ZRegisterFrom(locations->InAt(0)); const ZRegister left = ZRegisterFrom(locations->InAt(1)); const ZRegister right = ZRegisterFrom(locations->InAt(2)); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); DCHECK(locations->InAt(0).Equals(locations->Out())); ValidateVectorLength(instruction); @@ -1029,7 +1028,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecDotProd(HVecDotProd* instruction) const ZRegister acc = ZRegisterFrom(locations->InAt(0)); const ZRegister left = ZRegisterFrom(locations->InAt(1)); const ZRegister right = ZRegisterFrom(locations->InAt(2)); - const PRegisterM p_reg = LoopPReg().Merging(); + const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging(); HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), @@ -1099,7 +1098,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecLoad(HVecLoad* instruction) { const ZRegister reg = ZRegisterFrom(locations->Out()); UseScratchRegisterScope temps(GetVIXLAssembler()); Register scratch; - const PRegisterZ p_reg = LoopPReg().Zeroing(); + const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { @@ -1141,7 +1140,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecStore(HVecStore* instruction) { const ZRegister reg = ZRegisterFrom(locations->InAt(2)); UseScratchRegisterScope temps(GetVIXLAssembler()); Register scratch; - const PRegisterZ p_reg = LoopPReg().Zeroing(); + const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing(); ValidateVectorLength(instruction); switch (instruction->GetPackedType()) { @@ -1182,25 +1181,25 @@ void LocationsBuilderARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) { void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) { // Instruction is not predicated, see nodes_vector.h DCHECK(!instruction->IsPredicated()); - const PRegister p_reg = LoopPReg(); + const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction); switch (instruction->GetPackedType()) { case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - __ Ptrue(p_reg.VnB(), vixl::aarch64::SVE_ALL); + __ Ptrue(output_p_reg.VnB(), vixl::aarch64::SVE_ALL); break; case DataType::Type::kUint16: case DataType::Type::kInt16: - __ Ptrue(p_reg.VnH(), vixl::aarch64::SVE_ALL); + __ Ptrue(output_p_reg.VnH(), vixl::aarch64::SVE_ALL); break; case DataType::Type::kInt32: case DataType::Type::kFloat32: - __ Ptrue(p_reg.VnS(), vixl::aarch64::SVE_ALL); + __ Ptrue(output_p_reg.VnS(), vixl::aarch64::SVE_ALL); break; case DataType::Type::kInt64: case DataType::Type::kFloat64: - __ Ptrue(p_reg.VnD(), vixl::aarch64::SVE_ALL); + __ Ptrue(output_p_reg.VnD(), vixl::aarch64::SVE_ALL); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -1208,6 +1207,67 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instru } } +void LocationsBuilderARM64Sve::VisitVecCondition(HVecCondition* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARM64Sve::VisitVecCondition(HVecCondition* instruction) { + DCHECK(instruction->IsPredicated()); + LocationSummary* locations = instruction->GetLocations(); + const ZRegister left = ZRegisterFrom(locations->InAt(0)); + const ZRegister right = ZRegisterFrom(locations->InAt(1)); + const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing(); + const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction); + + HVecOperation* a = instruction->InputAt(0)->AsVecOperation(); + HVecOperation* b = instruction->InputAt(1)->AsVecOperation(); + DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), + HVecOperation::ToSignedType(b->GetPackedType())); + ValidateVectorLength(instruction); + + // TODO: Support other condition OPs and types. + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + __ Cmpeq(output_p_reg.VnB(), p_reg, left.VnB(), right.VnB()); + break; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + __ Cmpeq(output_p_reg.VnH(), p_reg, left.VnH(), right.VnH()); + break; + case DataType::Type::kInt32: + __ Cmpeq(output_p_reg.VnS(), p_reg, left.VnS(), right.VnS()); + break; + case DataType::Type::kInt64: + __ Cmpeq(output_p_reg.VnD(), p_reg, left.VnD(), right.VnD()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderARM64Sve::VisitVecPredNot(HVecPredNot* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + DCHECK(instruction->InputAt(0)->IsVecPredSetOperation()); + locations->SetInAt(0, Location::NoLocation()); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARM64Sve::VisitVecPredNot(HVecPredNot* instruction) { + DCHECK(instruction->IsPredicated()); + + const PRegister input_p_reg = GetVecPredSetFixedOutPReg( + instruction->InputAt(0)->AsVecPredSetOperation()); + const PRegister control_p_reg = GetVecGoverningPReg(instruction); + const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction); + + __ Not(output_p_reg.VnB(), control_p_reg.Zeroing(), input_p_reg.VnB()); +} + void LocationsBuilderARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); locations->SetInAt(0, Location::RequiresRegister()); @@ -1218,8 +1278,8 @@ void LocationsBuilderARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) { // Semantically, the out location of this instruction and predicate inputs locations of // its users should be a fixed predicate register (similar to // Location::RegisterLocation(int reg)). But the register allocator (RA) doesn't support - // SIMD regs (e.g. predicate), so LoopPReg() is used explicitly without exposing it - // to the RA. + // SIMD regs (e.g. predicate), so fixed registers are used explicitly without exposing it + // to the RA (through GetVecPredSetFixedOutPReg()). // // To make the RA happy Location::NoLocation() was used for all the vector instructions // predicate inputs; but for the PredSetOperations (e.g. VecPredWhile) Location::NoLocation() @@ -1241,21 +1301,22 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruct DCHECK(instruction->GetCondKind() == HVecPredWhile::CondKind::kLO); Register left = InputRegisterAt(instruction, 0); Register right = InputRegisterAt(instruction, 1); + const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction); DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % instruction->GetVectorLength(), 0u); switch (codegen_->GetSIMDRegisterWidth() / instruction->GetVectorLength()) { case 1u: - __ Whilelo(LoopPReg().VnB(), left, right); + __ Whilelo(output_p_reg.VnB(), left, right); break; case 2u: - __ Whilelo(LoopPReg().VnH(), left, right); + __ Whilelo(output_p_reg.VnH(), left, right); break; case 4u: - __ Whilelo(LoopPReg().VnS(), left, right); + __ Whilelo(output_p_reg.VnS(), left, right); break; case 8u: - __ Whilelo(LoopPReg().VnD(), left, right); + __ Whilelo(output_p_reg.VnD(), left, right); break; default: LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); @@ -1263,20 +1324,20 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruct } } -void LocationsBuilderARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) { +void LocationsBuilderARM64Sve::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); locations->SetInAt(0, Location::NoLocation()); // Result of the operation - a boolean value in a core register. locations->SetOut(Location::RequiresRegister()); } -void InstructionCodeGeneratorARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) { +void InstructionCodeGeneratorARM64Sve::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { // Instruction is not predicated, see nodes_vector.h DCHECK(!instruction->IsPredicated()); Register reg = OutputRegister(instruction); - // Currently VecPredCondition is only used as part of vectorized loop check condition + // Currently VecPredToBoolean is only used as part of vectorized loop check condition // evaluation. - DCHECK(instruction->GetPCondKind() == HVecPredCondition::PCondKind::kNFirst); + DCHECK(instruction->GetPCondKind() == HVecPredToBoolean::PCondKind::kNFirst); __ Cset(reg, pl); } diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index e8ecf28386..70f22af17b 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -1069,12 +1069,32 @@ void InstructionCodeGeneratorARMVIXL::VisitVecPredWhile(HVecPredWhile* instructi UNREACHABLE(); } -void LocationsBuilderARMVIXL::VisitVecPredCondition(HVecPredCondition* instruction) { +void LocationsBuilderARMVIXL::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); UNREACHABLE(); } -void InstructionCodeGeneratorARMVIXL::VisitVecPredCondition(HVecPredCondition* instruction) { +void InstructionCodeGeneratorARMVIXL::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void LocationsBuilderARMVIXL::VisitVecCondition(HVecCondition* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecCondition(HVecCondition* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void LocationsBuilderARMVIXL::VisitVecPredNot(HVecPredNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecPredNot(HVecPredNot* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); UNREACHABLE(); } diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 343a6e1af4..1f9b2578ac 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -1401,12 +1401,32 @@ void InstructionCodeGeneratorX86::VisitVecPredWhile(HVecPredWhile* instruction) UNREACHABLE(); } -void LocationsBuilderX86::VisitVecPredCondition(HVecPredCondition* instruction) { +void LocationsBuilderX86::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); UNREACHABLE(); } -void InstructionCodeGeneratorX86::VisitVecPredCondition(HVecPredCondition* instruction) { +void InstructionCodeGeneratorX86::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void LocationsBuilderX86::VisitVecCondition(HVecCondition* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void InstructionCodeGeneratorX86::VisitVecCondition(HVecCondition* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void LocationsBuilderX86::VisitVecPredNot(HVecPredNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void InstructionCodeGeneratorX86::VisitVecPredNot(HVecPredNot* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); UNREACHABLE(); } diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index fb6e4e753f..47afa3b4a1 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -1374,12 +1374,32 @@ void InstructionCodeGeneratorX86_64::VisitVecPredWhile(HVecPredWhile* instructio UNREACHABLE(); } -void LocationsBuilderX86_64::VisitVecPredCondition(HVecPredCondition* instruction) { +void LocationsBuilderX86_64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); UNREACHABLE(); } -void InstructionCodeGeneratorX86_64::VisitVecPredCondition(HVecPredCondition* instruction) { +void InstructionCodeGeneratorX86_64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void LocationsBuilderX86_64::VisitVecCondition(HVecCondition* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void InstructionCodeGeneratorX86_64::VisitVecCondition(HVecCondition* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void LocationsBuilderX86_64::VisitVecPredNot(HVecPredNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + UNREACHABLE(); +} + +void InstructionCodeGeneratorX86_64::VisitVecPredNot(HVecPredNot* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); UNREACHABLE(); } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index cb1cecc45a..71db5c99af 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -27,6 +27,7 @@ #include "heap_poisoning.h" #include "interpreter/mterp/nterp.h" #include "intrinsics.h" +#include "intrinsics_list.h" #include "intrinsics_utils.h" #include "intrinsics_x86.h" #include "jit/profiling_info.h" @@ -36,8 +37,10 @@ #include "mirror/class-inl.h" #include "mirror/var_handle.h" #include "optimizing/nodes.h" +#include "profiling_info_builder.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" +#include "trace.h" #include "utils/assembler.h" #include "utils/stack_checks.h" #include "utils/x86/assembler_x86.h" @@ -66,7 +69,7 @@ static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { RegisterSet caller_saves = RegisterSet::Empty(); caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() - // that the the kPrimNot result register is the same as the first argument register. + // that the kPrimNot result register is the same as the first argument register. return caller_saves; } @@ -503,18 +506,17 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { : SlowPathCode(instruction), ref_(ref), unpoison_ref_before_marking_(unpoison_ref_before_marking) { - DCHECK(gUseReadBarrier); } const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; } void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(codegen->EmitReadBarrier()); LocationSummary* locations = instruction_->GetLocations(); Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; DCHECK(instruction_->IsInstanceFieldGet() || - instruction_->IsPredicatedInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || instruction_->IsArraySet() || @@ -590,12 +592,12 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode { field_addr_(field_addr), unpoison_ref_before_marking_(unpoison_ref_before_marking), temp_(temp) { - DCHECK(gUseReadBarrier); } const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; } void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(codegen->EmitReadBarrier()); LocationSummary* locations = instruction_->GetLocations(); Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); @@ -604,7 +606,9 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode { << "Unexpected instruction in read barrier marking and field updating slow path: " << instruction_->DebugName(); HInvoke* invoke = instruction_->AsInvoke(); - DCHECK(IsUnsafeCASObject(invoke) || IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic(); + DCHECK(IsUnsafeCASReference(invoke) || + IsUnsafeGetAndSetReference(invoke) || + IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic(); __ Bind(GetEntryLabel()); if (unpoison_ref_before_marking_) { @@ -650,7 +654,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode { __ cmpl(temp_, ref_reg); __ j(kEqual, &done); - // Update the the holder's field atomically. This may fail if + // Update the holder's field atomically. This may fail if // mutator updates before us, but it's OK. This is achieved // using a strong compare-and-set (CAS) operation with relaxed // memory synchronization ordering, where the expected value is @@ -744,7 +748,6 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { obj_(obj), offset_(offset), index_(index) { - DCHECK(gUseReadBarrier); // If `obj` is equal to `out` or `ref`, it means the initial object // has been overwritten by (or after) the heap object reference load // to be instrumented, e.g.: @@ -759,13 +762,13 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { } void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(codegen->EmitReadBarrier()); CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); LocationSummary* locations = instruction_->GetLocations(); Register reg_out = out_.AsRegister<Register>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); DCHECK(instruction_->IsInstanceFieldGet() || - instruction_->IsPredicatedInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || instruction_->IsInstanceOf() || @@ -838,9 +841,11 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) || - (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObject) || - (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectVolatile) || - (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectAcquire)) + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetReference) || + (instruction_->AsInvoke()->GetIntrinsic() == + Intrinsics::kJdkUnsafeGetReferenceVolatile) || + (instruction_->AsInvoke()->GetIntrinsic() == + Intrinsics::kJdkUnsafeGetReferenceAcquire)) << instruction_->AsInvoke()->GetIntrinsic(); DCHECK_EQ(offset_, 0U); DCHECK(index_.IsRegisterPair()); @@ -918,10 +923,10 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode { public: ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root) : SlowPathCode(instruction), out_(out), root_(root) { - DCHECK(gUseReadBarrier); } void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(codegen->EmitReadBarrier()); LocationSummary* locations = instruction_->GetLocations(); Register reg_out = out_.AsRegister<Register>(); DCHECK(locations->CanCall()); @@ -985,11 +990,14 @@ class MethodEntryExitHooksSlowPathX86 : public SlowPathCode { class CompileOptimizedSlowPathX86 : public SlowPathCode { public: - CompileOptimizedSlowPathX86() : SlowPathCode(/* instruction= */ nullptr) {} + explicit CompileOptimizedSlowPathX86(uint32_t counter_address) + : SlowPathCode(/* instruction= */ nullptr), + counter_address_(counter_address) {} void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); + __ movw(Address::Absolute(counter_address_), Immediate(ProfilingInfo::GetOptimizeThreshold())); x86_codegen->GenerateInvokeRuntime( GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value()); __ jmp(GetExitLabel()); @@ -1000,6 +1008,8 @@ class CompileOptimizedSlowPathX86 : public SlowPathCode { } private: + uint32_t counter_address_; + DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86); }; @@ -1107,6 +1117,7 @@ void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) { } namespace detail { + // Mark which intrinsics we don't have handcrafted code for. template <Intrinsics T> struct IsUnimplemented { @@ -1121,15 +1132,13 @@ struct IsUnimplemented { UNIMPLEMENTED_INTRINSIC_LIST_X86(TRUE_OVERRIDE) #undef TRUE_OVERRIDE -#include "intrinsics_list.h" static constexpr bool kIsIntrinsicUnimplemented[] = { - false, // kNone + false, // kNone #define IS_UNIMPLEMENTED(Intrinsic, ...) \ - IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, - INTRINSICS_LIST(IS_UNIMPLEMENTED) + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + ART_INTRINSICS_LIST(IS_UNIMPLEMENTED) #undef IS_UNIMPLEMENTED }; -#undef INTRINSICS_LIST } // namespace detail @@ -1140,8 +1149,7 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters, kNumberOfRegisterPairs, - ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), - arraysize(kCoreCalleeSaves)) + ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves)) | (1 << kFakeReturnRegister), 0, compiler_options, @@ -1221,12 +1229,18 @@ void LocationsBuilderX86::VisitMethodExitHook(HMethodExitHook* method_hook) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); SetInForReturnValue(method_hook, locations); + // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX. + locations->AddTemp(Location::RegisterLocation(EAX)); + locations->AddTemp(Location::RegisterLocation(EDX)); + // An additional temporary register to hold address to store the timestamp counter. + locations->AddTemp(Location::RequiresRegister()); } void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* instruction) { SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction); codegen_->AddSlowPath(slow_path); + LocationSummary* locations = instruction->GetLocations(); if (instruction->IsMethodExitHook()) { // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it @@ -1242,8 +1256,51 @@ void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* inst MemberOffset offset = instruction->IsMethodExitHook() ? instrumentation::Instrumentation::HaveMethodExitListenersOffset() : instrumentation::Instrumentation::HaveMethodEntryListenersOffset(); - __ cmpb(Address::Absolute(address + offset.Int32Value()), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ cmpb(Address::Absolute(address + offset.Int32Value()), + Immediate(instrumentation::Instrumentation::kFastTraceListeners)); + // Check if there are any trace method entry / exit listeners. If no, continue. + __ j(kLess, slow_path->GetExitLabel()); + // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners. + // If yes, just take the slow path. + __ j(kGreater, slow_path->GetEntryLabel()); + + // For entry_addr use the first temp that isn't EAX or EDX. We need this after + // rdtsc which returns values in EAX + EDX. + Register entry_addr = locations->GetTemp(2).AsRegister<Register>(); + Register index = locations->GetTemp(1).AsRegister<Register>(); + + // Check if there is place in the buffer for a new entry, if no, take slow path. + uint32_t trace_buffer_ptr = Thread::TraceBufferPtrOffset<kX86PointerSize>().Int32Value(); + uint64_t trace_buffer_index_offset = + Thread::TraceBufferIndexOffset<kX86PointerSize>().Int32Value(); + + __ fs()->movl(index, Address::Absolute(trace_buffer_index_offset)); + __ subl(index, Immediate(kNumEntriesForWallClock)); + __ j(kLess, slow_path->GetEntryLabel()); + + // Update the index in the `Thread`. + __ fs()->movl(Address::Absolute(trace_buffer_index_offset), index); + // Calculate the entry address in the buffer. + // entry_addr = base_addr + sizeof(void*) * index + __ fs()->movl(entry_addr, Address::Absolute(trace_buffer_ptr)); + __ leal(entry_addr, Address(entry_addr, index, TIMES_4, 0)); + + // Record method pointer and trace action. + Register method = index; + __ movl(method, Address(ESP, kCurrentMethodStackOffset)); + // Use last two bits to encode trace method action. For MethodEntry it is 0 + // so no need to set the bits since they are 0 already. + if (instruction->IsMethodExitHook()) { + DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4)); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1); + __ orl(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit))); + } + __ movl(Address(entry_addr, kMethodOffsetInBytes), method); + // Get the timestamp. rdtsc returns timestamp in EAX + EDX. + __ rdtsc(); + __ movl(Address(entry_addr, kTimestampOffsetInBytes), EAX); + __ movl(Address(entry_addr, kHighTimestampOffsetInBytes), EDX); __ Bind(slow_path->GetExitLabel()); } @@ -1254,7 +1311,13 @@ void InstructionCodeGeneratorX86::VisitMethodExitHook(HMethodExitHook* instructi } void LocationsBuilderX86::VisitMethodEntryHook(HMethodEntryHook* method_hook) { - new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX. + locations->AddTemp(Location::RegisterLocation(EAX)); + locations->AddTemp(Location::RegisterLocation(EDX)); + // An additional temporary register to hold address to store the timestamp counter. + locations->AddTemp(Location::RequiresRegister()); } void InstructionCodeGeneratorX86::VisitMethodEntryHook(HMethodEntryHook* instruction) { @@ -1286,13 +1349,13 @@ void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) { } if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { - SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86(); - AddSlowPath(slow_path); ProfilingInfo* info = GetGraph()->GetProfilingInfo(); DCHECK(info != nullptr); uint32_t address = reinterpret_cast32<uint32_t>(info) + ProfilingInfo::BaselineHotnessCountOffset().Int32Value(); DCHECK(!HasEmptyFrame()); + SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86(address); + AddSlowPath(slow_path); // With multiple threads, this can overflow. This is OK, we will eventually get to see // it reaching 0. Also, at this point we have no register available to look // at the counter directly. @@ -1700,7 +1763,7 @@ void CodeGeneratorX86::LoadFromMemoryNoBarrier(DataType::Type dst_type, __ movsd(dst.AsFpuRegister<XmmRegister>(), src); break; case DataType::Type::kReference: - DCHECK(!gUseReadBarrier); + DCHECK(!EmitReadBarrier()); __ movl(dst.AsRegister<Register>(), src); __ MaybeUnpoisonHeapReference(dst.AsRegister<Register>()); break; @@ -1865,8 +1928,7 @@ void LocationsBuilderX86::VisitExit(HExit* exit) { exit->SetLocations(nullptr); } -void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { -} +void InstructionCodeGeneratorX86::VisitExit([[maybe_unused]] HExit* exit) {} template<class LabelType> void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond, @@ -1981,7 +2043,7 @@ void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double) { - HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable(); + HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTableOrNull(); if (is_double) { if (rhs.IsFpuRegister()) { __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>()); @@ -2053,14 +2115,18 @@ void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condi } } -static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) { +static bool AreEflagsSetFrom(HInstruction* cond, + HInstruction* branch, + const CompilerOptions& compiler_options) { // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS // are set only strictly before `branch`. We can't use the eflags on long/FP // conditions if they are materialized due to the complex branching. return cond->IsCondition() && cond->GetNext() == branch && cond->InputAt(0)->GetType() != DataType::Type::kInt64 && - !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()); + !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) && + !(cond->GetBlock()->GetGraph()->IsCompilingBaseline() && + compiler_options.ProfileBranches()); } template<class LabelType> @@ -2097,7 +2163,7 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio // - condition true => branch to true_target // - branch to false_target if (IsBooleanValueOrMaterializedCondition(cond)) { - if (AreEflagsSetFrom(cond, instruction)) { + if (AreEflagsSetFrom(cond, instruction, codegen_->GetCompilerOptions())) { if (true_target == nullptr) { __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target); } else { @@ -2151,7 +2217,15 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio void LocationsBuilderX86::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { - locations->SetInAt(0, Location::Any()); + if (GetGraph()->IsCompilingBaseline() && + codegen_->GetCompilerOptions().ProfileBranches() && + !Runtime::Current()->IsAotCompiler()) { + locations->SetInAt(0, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } else { + locations->SetInAt(0, Location::Any()); + } } } @@ -2162,6 +2236,34 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { nullptr : codegen_->GetLabelOf(true_successor); Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { + if (GetGraph()->IsCompilingBaseline() && + codegen_->GetCompilerOptions().ProfileBranches() && + !Runtime::Current()->IsAotCompiler()) { + DCHECK(if_instr->InputAt(0)->IsCondition()); + Register temp = if_instr->GetLocations()->GetTemp(0).AsRegister<Register>(); + Register counter = if_instr->GetLocations()->GetTemp(1).AsRegister<Register>(); + ProfilingInfo* info = GetGraph()->GetProfilingInfo(); + DCHECK(info != nullptr); + BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc()); + // Currently, not all If branches are profiled. + if (cache != nullptr) { + uint64_t address = + reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value(); + static_assert( + BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2, + "Unexpected offsets for BranchCache"); + NearLabel done; + Location lhs = if_instr->GetLocations()->InAt(0); + __ movl(temp, Immediate(address)); + __ movzxw(counter, Address(temp, lhs.AsRegister<Register>(), TIMES_2, 0)); + __ addw(counter, Immediate(1)); + __ j(kEqual, &done); + __ movw(Address(temp, lhs.AsRegister<Register>(), TIMES_2, 0), counter); + __ Bind(&done); + } + } + } GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } @@ -2257,7 +2359,7 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) { if (!condition->IsEmittedAtUseSite()) { // This was a previously materialized condition. // Can we use the existing condition code? - if (AreEflagsSetFrom(condition, select)) { + if (AreEflagsSetFrom(condition, select, codegen_->GetCompilerOptions())) { // Materialization was the previous instruction. Condition codes are right. cond = X86Condition(condition->GetCondition()); } else { @@ -2506,7 +2608,7 @@ void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitIntConstant([[maybe_unused]] HIntConstant* constant) { // Will be generated at use site. } @@ -2516,7 +2618,7 @@ void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitNullConstant([[maybe_unused]] HNullConstant* constant) { // Will be generated at use site. } @@ -2526,7 +2628,7 @@ void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitLongConstant([[maybe_unused]] HLongConstant* constant) { // Will be generated at use site. } @@ -2536,7 +2638,7 @@ void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) { // Will be generated at use site. } @@ -2546,7 +2648,7 @@ void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitDoubleConstant([[maybe_unused]] HDoubleConstant* constant) { // Will be generated at use site. } @@ -2555,7 +2657,7 @@ void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_f } void InstructionCodeGeneratorX86::VisitConstructorFence( - HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + [[maybe_unused]] HConstructorFence* constructor_fence) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } @@ -2571,7 +2673,7 @@ void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) { ret->SetLocations(nullptr); } -void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) { codegen_->GenerateFrameExit(); } @@ -2697,7 +2799,7 @@ void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { HandleInvoke(invoke); - if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + if (ProfilingInfoBuilder::IsInlineCacheUseful(invoke, codegen_)) { // Add one temporary for inline cache update. invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP)); } @@ -2725,7 +2827,7 @@ void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) { // Add the hidden argument. invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7)); - if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + if (ProfilingInfoBuilder::IsInlineCacheUseful(invoke, codegen_)) { // Add one temporary for inline cache update. invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP)); } @@ -2743,29 +2845,30 @@ void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) { void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) { DCHECK_EQ(EAX, klass); - // We know the destination of an intrinsic, so no need to record inline - // caches (also the intrinsic location builder doesn't request an additional - // temporary). - if (!instruction->GetLocations()->Intrinsified() && - GetGraph()->IsCompilingBaseline() && - !Runtime::Current()->IsAotCompiler()) { - DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke()); + if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) { ProfilingInfo* info = GetGraph()->GetProfilingInfo(); DCHECK(info != nullptr); - InlineCache* cache = info->GetInlineCache(instruction->GetDexPc()); - uint32_t address = reinterpret_cast32<uint32_t>(cache); - if (kIsDebugBuild) { - uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u; - CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>()); + InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(info, instruction->AsInvoke()); + if (cache != nullptr) { + uint32_t address = reinterpret_cast32<uint32_t>(cache); + if (kIsDebugBuild) { + uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u; + CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>()); + } + Register temp = EBP; + NearLabel done; + __ movl(temp, Immediate(address)); + // Fast path for a monomorphic cache. + __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value())); + __ j(kEqual, &done); + GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value()); + __ Bind(&done); + } else { + // This is unexpected, but we don't guarantee stable compilation across + // JIT runs so just warn about it. + ScopedObjectAccess soa(Thread::Current()); + LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod(); } - Register temp = EBP; - NearLabel done; - __ movl(temp, Immediate(address)); - // Fast path for a monomorphic cache. - __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value())); - __ j(kEqual, &done); - GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value()); - __ Bind(&done); } } @@ -2954,10 +3057,10 @@ void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) { constant_area)); __ xorps(out.AsFpuRegister<XmmRegister>(), mask); } else { - __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000), - neg->GetBaseMethodAddress(), - constant_area)); - __ xorpd(out.AsFpuRegister<XmmRegister>(), mask); + __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000), + neg->GetBaseMethodAddress(), + constant_area)); + __ xorpd(out.AsFpuRegister<XmmRegister>(), mask); } } @@ -5086,8 +5189,7 @@ void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) { } void InstructionCodeGeneratorX86::VisitParameterValue( - HParameterValue* instruction ATTRIBUTE_UNUSED) { -} + [[maybe_unused]] HParameterValue* instruction) {} void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) { LocationSummary* locations = @@ -5095,7 +5197,7 @@ void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) { locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument)); } -void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitCurrentMethod([[maybe_unused]] HCurrentMethod* instruction) { } void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) { @@ -5294,7 +5396,7 @@ void LocationsBuilderX86::VisitPhi(HPhi* instruction) { locations->SetOut(Location::Any()); } -void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitPhi([[maybe_unused]] HPhi* instruction) { LOG(FATAL) << "Unreachable"; } @@ -5323,8 +5425,8 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { } HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch( - const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - ArtMethod* method ATTRIBUTE_UNUSED) { + const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, + [[maybe_unused]] ArtMethod* method) { return desired_dispatch_info; } @@ -5679,7 +5781,7 @@ void CodeGeneratorX86::LoadBootImageAddress(Register reg, void CodeGeneratorX86::LoadIntrinsicDeclaringClass(Register reg, HInvokeStaticOrDirect* invoke) { DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone); if (GetCompilerOptions().IsBootImage()) { - // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. HX86ComputeBaseMethodAddress* method_address = invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); DCHECK(method_address != nullptr); @@ -5804,45 +5906,33 @@ void CodeGeneratorX86::MarkGCCard( } void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { - DCHECK(instruction->IsInstanceFieldGet() || - instruction->IsStaticFieldGet() || - instruction->IsPredicatedInstanceFieldGet()); + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); bool object_field_get_with_read_barrier = - gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); - bool is_predicated = instruction->IsPredicatedInstanceFieldGet(); + (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, - gUseReadBarrier + codegen_->EmitReadBarrier() ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } // receiver_input - locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister()); - if (is_predicated) { - if (DataType::IsFloatingPointType(instruction->GetType())) { - locations->SetInAt(0, Location::RequiresFpuRegister()); - } else { - locations->SetInAt(0, Location::RequiresRegister()); - } - } + locations->SetInAt(0, Location::RequiresRegister()); if (DataType::IsFloatingPointType(instruction->GetType())) { - locations->SetOut(is_predicated ? Location::SameAsFirstInput() - : Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); } else { // The output overlaps in case of long: we don't want the low move // to overwrite the object's location. Likewise, in the case of // an object field get with read barriers enabled, we do not want // the move to overwrite the object's location, as we need it to emit // the read barrier. - locations->SetOut(is_predicated ? Location::SameAsFirstInput() : Location::RequiresRegister(), - (object_field_get_with_read_barrier || - instruction->GetType() == DataType::Type::kInt64 || - is_predicated) - ? Location::kOutputOverlap - : Location::kNoOutputOverlap); + locations->SetOut( + Location::RequiresRegister(), + (object_field_get_with_read_barrier || instruction->GetType() == DataType::Type::kInt64) + ? Location::kOutputOverlap + : Location::kNoOutputOverlap); } if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) { @@ -5856,12 +5946,10 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { - DCHECK(instruction->IsInstanceFieldGet() || - instruction->IsStaticFieldGet() || - instruction->IsPredicatedInstanceFieldGet()); + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = instruction->GetLocations(); - Location base_loc = locations->InAt(instruction->IsPredicatedInstanceFieldGet() ? 1 : 0); + Location base_loc = locations->InAt(0); Register base = base_loc.AsRegister<Register>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); @@ -5871,7 +5959,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, if (load_type == DataType::Type::kReference) { // /* HeapReference<Object> */ out = *(base + offset) - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( @@ -6099,17 +6187,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, bool is_volatile = field_info.IsVolatile(); DataType::Type field_type = field_info.GetFieldType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - bool is_predicated = - instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet(); - Address field_addr(base, offset); - NearLabel pred_is_null; - if (is_predicated) { - __ testl(base, base); - __ j(kEqual, &pred_is_null); - } - HandleFieldSet(instruction, /* value_index= */ 1, field_type, @@ -6118,10 +6197,6 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, is_volatile, value_can_be_null, write_barrier_kind); - - if (is_predicated) { - __ Bind(&pred_is_null); - } } void LocationsBuilderX86::VisitStaticFieldGet(HStaticFieldGet* instruction) { @@ -6154,25 +6229,10 @@ void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instr instruction->GetWriteBarrierKind()); } -void LocationsBuilderX86::VisitPredicatedInstanceFieldGet( - HPredicatedInstanceFieldGet* instruction) { - HandleFieldGet(instruction, instruction->GetFieldInfo()); -} - void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { HandleFieldGet(instruction, instruction->GetFieldInfo()); } -void InstructionCodeGeneratorX86::VisitPredicatedInstanceFieldGet( - HPredicatedInstanceFieldGet* instruction) { - NearLabel finish; - LocationSummary* locations = instruction->GetLocations(); - Register recv = locations->InAt(1).AsRegister<Register>(); - __ testl(recv, recv); - __ j(kZero, &finish); - HandleFieldGet(instruction, instruction->GetFieldInfo()); - __ Bind(&finish); -} void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { HandleFieldGet(instruction, instruction->GetFieldInfo()); } @@ -6299,7 +6359,7 @@ void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { bool object_array_get_with_read_barrier = - gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); + (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_array_get_with_read_barrier @@ -6341,7 +6401,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( @@ -6749,7 +6809,7 @@ void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) { } } -void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { +void LocationsBuilderX86::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) { LOG(FATAL) << "Unreachable"; } @@ -7163,7 +7223,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { load_kind == HLoadClass::LoadKind::kBssEntryPublic || load_kind == HLoadClass::LoadKind::kBssEntryPackage); - const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage(); + const bool requires_read_barrier = !cls->IsInBootImage() && codegen_->EmitReadBarrier(); LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; @@ -7177,11 +7237,11 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { } locations->SetOut(Location::RequiresRegister()); if (call_kind == LocationSummary::kCallOnSlowPath && cls->HasPcRelativeLoadKind()) { - if (!gUseReadBarrier || kUseBakerReadBarrier) { + if (codegen_->EmitNonBakerReadBarrier()) { + // For non-Baker read barrier we have a temp-clobbering call. + } else { // Rely on the type resolution and/or initialization to save everything. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); - } else { - // For non-Baker read barrier we have a temp-clobbering call. } } } @@ -7213,9 +7273,8 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE Register out = out_loc.AsRegister<Register>(); bool generate_null_check = false; - const ReadBarrierOption read_barrier_option = cls->IsInBootImage() - ? kWithoutReadBarrier - : gCompilerReadBarrierOption; + const ReadBarrierOption read_barrier_option = + cls->IsInBootImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption(); switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { DCHECK(!cls->CanCallRuntime()); @@ -7383,7 +7442,7 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( } void LocationsBuilderX86::VisitLoadString(HLoadString* load) { - LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); + LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || @@ -7396,11 +7455,11 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { } else { locations->SetOut(Location::RequiresRegister()); if (load_kind == HLoadString::LoadKind::kBssEntry) { - if (!gUseReadBarrier || kUseBakerReadBarrier) { + if (codegen_->EmitNonBakerReadBarrier()) { + // For non-Baker read barrier we have a temp-clobbering call. + } else { // Rely on the pResolveString to save everything. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); - } else { - // For non-Baker read barrier we have a temp-clobbering call. } } } @@ -7445,7 +7504,8 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset); Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::String> */ out = *address /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption); + GenerateGcRootFieldLoad( + load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption()); // No need for memory fence, thanks to the x86 memory model. SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load); codegen_->AddSlowPath(slow_path); @@ -7465,14 +7525,14 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S Label* fixup_label = codegen_->NewJitRootStringPatch( load->GetDexFile(), load->GetStringIndex(), load->GetString()); // /* GcRoot<mirror::String> */ out = *address - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption); + GenerateGcRootFieldLoad( + load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption()); return; } default: break; } - // TODO: Re-add the compiler code to do string dex cache lookup again. InvokeRuntimeCallingConvention calling_convention; DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_)); @@ -7498,7 +7558,7 @@ void LocationsBuilderX86::VisitClearException(HClearException* clear) { new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); } -void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitClearException([[maybe_unused]] HClearException* clear) { __ fs()->movl(GetExceptionTlsAddress(), Immediate(0)); } @@ -7515,8 +7575,8 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { } // Temp is used for read barrier. -static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { - if (gUseReadBarrier && +static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) { + if (emit_read_barrier && !kUseBakerReadBarrier && (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -7529,11 +7589,11 @@ static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { // Interface case has 2 temps, one for holding the number of interfaces, one for the current // interface pointer, the current interface is compared in memory. // The other checks have one temp for loading the object's class. -static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { +static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) { if (type_check_kind == TypeCheckKind::kInterfaceCheck) { return 2; } - return 1 + NumberOfInstanceOfTemps(type_check_kind); + return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind); } void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { @@ -7545,7 +7605,7 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: { - bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); + bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction); call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; break; @@ -7575,7 +7635,8 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { // Note that TypeCheckSlowPathX86 uses this "out" register too. locations->SetOut(Location::RequiresRegister()); // When read barriers are enabled, we need a temporary register for some cases. - locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); + locations->AddRegisterTemps( + NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind)); } void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { @@ -7586,7 +7647,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { Location cls = locations->InAt(1); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); - const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); + const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind); DCHECK_LE(num_temps, 1u); Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -7606,7 +7667,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + codegen_->ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, @@ -7629,7 +7690,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kAbstractClassCheck: { ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + codegen_->ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, @@ -7665,7 +7726,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kClassHierarchyCheck: { ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + codegen_->ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, @@ -7702,7 +7763,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: { ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + codegen_->ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, @@ -7825,7 +7886,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); + LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); @@ -7840,8 +7901,7 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { } else { locations->SetInAt(1, Location::Any()); } - // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86. - locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); + locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind)); } void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { @@ -7852,7 +7912,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); Register temp = temp_loc.AsRegister<Register>(); - const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); + const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind); DCHECK_GE(num_temps, 1u); DCHECK_LE(num_temps, 2u); Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation(); @@ -7865,7 +7925,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { const uint32_t object_array_data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); + bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction); SlowPathCode* type_check_slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86( instruction, is_type_check_slow_path_fatal); @@ -8028,11 +8088,11 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // /* HeapReference<Class> */ temp = temp->iftable_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - temp_loc, - iftable_offset, - kWithoutReadBarrier); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + iftable_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Iftable is never null. __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset)); // Maybe poison the `cls` for direct comparison with memory. @@ -8288,7 +8348,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister( ReadBarrierOption read_barrier_option) { Register out_reg = out.AsRegister<Register>(); if (read_barrier_option == kWithReadBarrier) { - CHECK(gUseReadBarrier); + DCHECK(codegen_->EmitReadBarrier()); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) @@ -8322,7 +8382,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters( Register out_reg = out.AsRegister<Register>(); Register obj_reg = obj.AsRegister<Register>(); if (read_barrier_option == kWithReadBarrier) { - CHECK(gUseReadBarrier); + DCHECK(codegen_->EmitReadBarrier()); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -8350,7 +8410,7 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad( ReadBarrierOption read_barrier_option) { Register root_reg = root.AsRegister<Register>(); if (read_barrier_option == kWithReadBarrier) { - DCHECK(gUseReadBarrier); + DCHECK(codegen_->EmitReadBarrier()); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used: @@ -8414,8 +8474,7 @@ void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr Register obj, uint32_t offset, bool needs_null_check) { - DCHECK(gUseReadBarrier); - DCHECK(kUseBakerReadBarrier); + DCHECK(EmitBakerReadBarrier()); // /* HeapReference<Object> */ ref = *(obj + offset) Address src(obj, offset); @@ -8428,8 +8487,7 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr uint32_t data_offset, Location index, bool needs_null_check) { - DCHECK(gUseReadBarrier); - DCHECK(kUseBakerReadBarrier); + DCHECK(EmitBakerReadBarrier()); static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), @@ -8447,8 +8505,7 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i bool needs_null_check, bool always_update_field, Register* temp) { - DCHECK(gUseReadBarrier); - DCHECK(kUseBakerReadBarrier); + DCHECK(EmitBakerReadBarrier()); // In slow path based read barriers, the read barrier call is // inserted after the original load. However, in fast path based @@ -8528,7 +8585,7 @@ void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - DCHECK(gUseReadBarrier); + DCHECK(EmitReadBarrier()); // Insert a slow path based read barrier *after* the reference load. // @@ -8555,7 +8612,7 @@ void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - if (gUseReadBarrier) { + if (EmitReadBarrier()) { // Baker's read barriers shall be handled by the fast path // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier). DCHECK(!kUseBakerReadBarrier); @@ -8570,7 +8627,7 @@ void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction, void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root) { - DCHECK(gUseReadBarrier); + DCHECK(EmitReadBarrier()); // Insert a slow path based read barrier *after* the GC root load. // @@ -8584,12 +8641,12 @@ void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction, __ Bind(slow_path->GetExitLabel()); } -void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { +void LocationsBuilderX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; } -void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; } @@ -8782,13 +8839,15 @@ void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromCons case DataType::Type::kFloat32: __ movss(out.AsFpuRegister<XmmRegister>(), codegen_->LiteralFloatAddress( - value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area)); + value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area)); break; case DataType::Type::kFloat64: __ movsd(out.AsFpuRegister<XmmRegister>(), codegen_->LiteralDoubleAddress( - value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area)); + value->AsDoubleConstant()->GetValue(), + insn->GetBaseMethodAddress(), + const_area)); break; case DataType::Type::kInt32: @@ -8877,7 +8936,7 @@ class JumpTableRIPFixup : public RIPFixup { const HX86PackedSwitch* switch_instr_; }; -void CodeGeneratorX86::Finalize(CodeAllocator* allocator) { +void CodeGeneratorX86::Finalize() { // Generate the constant area if needed. X86Assembler* assembler = GetAssembler(); @@ -8897,7 +8956,7 @@ void CodeGeneratorX86::Finalize(CodeAllocator* allocator) { } // And finish up. - CodeGenerator::Finalize(allocator); + CodeGenerator::Finalize(); } Address CodeGeneratorX86::LiteralDoubleAddress(double v, @@ -8968,9 +9027,9 @@ Address CodeGeneratorX86::ArrayAddress(Register obj, Location index, ScaleFactor scale, uint32_t data_offset) { - return index.IsConstant() ? - Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) : - Address(obj, index.AsRegister<Register>(), scale, data_offset); + return index.IsConstant() + ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) + : Address(obj, index.AsRegister<Register>(), scale, data_offset); } Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr, @@ -9025,7 +9084,7 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code, reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t; reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] = - dchecked_integral_cast<uint32_t>(address); + dchecked_integral_cast<uint32_t>(address); } void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { @@ -9042,13 +9101,13 @@ void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_da } } -void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction - ATTRIBUTE_UNUSED) { +void LocationsBuilderX86::VisitIntermediateAddress( + [[maybe_unused]] HIntermediateAddress* instruction) { LOG(FATAL) << "Unreachable"; } -void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction - ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86::VisitIntermediateAddress( + [[maybe_unused]] HIntermediateAddress* instruction) { LOG(FATAL) << "Unreachable"; } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index d27155f31d..5b59bfc7e3 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -89,19 +89,8 @@ static constexpr size_t kRuntimeParameterFpuRegistersLength = V(StringBuilderLength) \ V(StringBuilderToString) \ /* 1.8 */ \ - V(UnsafeGetAndAddInt) \ - V(UnsafeGetAndAddLong) \ - V(UnsafeGetAndSetInt) \ - V(UnsafeGetAndSetLong) \ - V(UnsafeGetAndSetObject) \ V(MethodHandleInvokeExact) \ - V(MethodHandleInvoke) \ - /* OpenJDK 11 */ \ - V(JdkUnsafeGetAndAddInt) \ - V(JdkUnsafeGetAndAddLong) \ - V(JdkUnsafeGetAndSetInt) \ - V(JdkUnsafeGetAndSetLong) \ - V(JdkUnsafeGetAndSetObject) + V(MethodHandleInvoke) class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> { public: @@ -196,7 +185,7 @@ class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention { ? Location::RegisterLocation(EDX) : Location::RegisterLocation(ECX)); } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { + Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override { return Location::FpuRegisterLocation(XMM0); } @@ -635,7 +624,7 @@ class CodeGeneratorX86 : public CodeGenerator { Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value); - void Finalize(CodeAllocator* allocator) override; + void Finalize() override; // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index eea6b204fa..9d010190f7 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -27,6 +27,7 @@ #include "heap_poisoning.h" #include "interpreter/mterp/nterp.h" #include "intrinsics.h" +#include "intrinsics_list.h" #include "intrinsics_utils.h" #include "intrinsics_x86_64.h" #include "jit/profiling_info.h" @@ -37,8 +38,10 @@ #include "mirror/object_reference.h" #include "mirror/var_handle.h" #include "optimizing/nodes.h" +#include "profiling_info_builder.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" +#include "trace.h" #include "utils/assembler.h" #include "utils/stack_checks.h" #include "utils/x86_64/assembler_x86_64.h" @@ -267,6 +270,38 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64); }; +class LoadMethodTypeSlowPathX86_64: public SlowPathCode { + public: + explicit LoadMethodTypeSlowPathX86_64(HLoadMethodType* mt) : SlowPathCode(mt) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + const dex::ProtoIndex proto_index = instruction_->AsLoadMethodType()->GetProtoIndex(); + // Custom calling convention: RAX serves as both input and output. + __ movl(CpuRegister(RAX), Immediate(proto_index.index_)); + x86_64_codegen->InvokeRuntime(kQuickResolveMethodType, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickResolveMethodType, void*, uint32_t>(); + x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); + RestoreLiveRegisters(codegen, locations); + + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const override { return "LoadMethodTypeSlowPathX86_64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(LoadMethodTypeSlowPathX86_64); +}; + class LoadClassSlowPathX86_64 : public SlowPathCode { public: LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at) @@ -510,23 +545,23 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { : SlowPathCode(instruction), ref_(ref), unpoison_ref_before_marking_(unpoison_ref_before_marking) { - DCHECK(gUseReadBarrier); } const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; } void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(codegen->EmitReadBarrier()); LocationSummary* locations = instruction_->GetLocations(); CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); Register ref_reg = ref_cpu_reg.AsRegister(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; DCHECK(instruction_->IsInstanceFieldGet() || - instruction_->IsPredicatedInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || instruction_->IsArraySet() || instruction_->IsLoadClass() || + instruction_->IsLoadMethodType() || instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || @@ -601,7 +636,6 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode { unpoison_ref_before_marking_(unpoison_ref_before_marking), temp1_(temp1), temp2_(temp2) { - DCHECK(gUseReadBarrier); } const char* GetDescription() const override { @@ -609,6 +643,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode { } void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(codegen->EmitReadBarrier()); LocationSummary* locations = instruction_->GetLocations(); CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); Register ref_reg = ref_cpu_reg.AsRegister(); @@ -618,7 +653,9 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode { << "Unexpected instruction in read barrier marking and field updating slow path: " << instruction_->DebugName(); HInvoke* invoke = instruction_->AsInvoke(); - DCHECK(IsUnsafeCASObject(invoke) || IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic(); + DCHECK(IsUnsafeCASReference(invoke) || + IsUnsafeGetAndSetReference(invoke) || + IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic(); __ Bind(GetEntryLabel()); if (unpoison_ref_before_marking_) { @@ -665,7 +702,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode { __ cmpl(temp1_, ref_cpu_reg); __ j(kEqual, &done); - // Update the the holder's field atomically. This may fail if + // Update the holder's field atomically. This may fail if // mutator updates before us, but it's OK. This is achived // using a strong compare-and-set (CAS) operation with relaxed // memory synchronization ordering, where the expected value is @@ -761,7 +798,6 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { obj_(obj), offset_(offset), index_(index) { - DCHECK(gUseReadBarrier); // If `obj` is equal to `out` or `ref`, it means the initial // object has been overwritten by (or after) the heap object // reference load to be instrumented, e.g.: @@ -776,13 +812,13 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { } void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(codegen->EmitReadBarrier()); CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); LocationSummary* locations = instruction_->GetLocations(); CpuRegister reg_out = out_.AsRegister<CpuRegister>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_; DCHECK(instruction_->IsInstanceFieldGet() || - instruction_->IsPredicatedInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || instruction_->IsInstanceOf() || @@ -855,9 +891,11 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) || - (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObject) || - (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectVolatile) || - (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectAcquire)) + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetReference) || + (instruction_->AsInvoke()->GetIntrinsic() == + Intrinsics::kJdkUnsafeGetReferenceVolatile) || + (instruction_->AsInvoke()->GetIntrinsic() == + Intrinsics::kJdkUnsafeGetReferenceAcquire)) << instruction_->AsInvoke()->GetIntrinsic(); DCHECK_EQ(offset_, 0U); DCHECK(index_.IsRegister()); @@ -937,10 +975,10 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { public: ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root) : SlowPathCode(instruction), out_(out), root_(root) { - DCHECK(gUseReadBarrier); } void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(codegen->EmitReadBarrier()); LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); @@ -1005,11 +1043,15 @@ class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode { class CompileOptimizedSlowPathX86_64 : public SlowPathCode { public: - CompileOptimizedSlowPathX86_64() : SlowPathCode(/* instruction= */ nullptr) {} + explicit CompileOptimizedSlowPathX86_64(uint64_t counter_address) + : SlowPathCode(/* instruction= */ nullptr), + counter_address_(counter_address) {} void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); + __ movq(CpuRegister(TMP), Immediate(counter_address_)); + __ movw(Address(CpuRegister(TMP), 0), Immediate(ProfilingInfo::GetOptimizeThreshold())); x86_64_codegen->GenerateInvokeRuntime( GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value()); __ jmp(GetExitLabel()); @@ -1020,6 +1062,8 @@ class CompileOptimizedSlowPathX86_64 : public SlowPathCode { } private: + uint64_t counter_address_; + DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86_64); }; @@ -1070,8 +1114,8 @@ void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* location } HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch( - const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - ArtMethod* method ATTRIBUTE_UNUSED) { + const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, + [[maybe_unused]] ArtMethod* method) { return desired_dispatch_info; } @@ -1308,6 +1352,12 @@ Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { return &string_bss_entry_patches_.back().label; } +Label* CodeGeneratorX86_64::NewMethodTypeBssEntryPatch(HLoadMethodType* load_method_type) { + method_type_bss_entry_patches_.emplace_back( + &load_method_type->GetDexFile(), load_method_type->GetProtoIndex().index_); + return &method_type_bss_entry_patches_.back().label; +} + void CodeGeneratorX86_64::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) { boot_image_jni_entrypoint_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file, invoke->GetResolvedMethodReference().index); @@ -1335,7 +1385,7 @@ void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_im void CodeGeneratorX86_64::LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke) { DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone); if (GetCompilerOptions().IsBootImage()) { - // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. __ leal(reg, Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false)); MethodReference target_method = invoke->GetResolvedMethodReference(); @@ -1395,6 +1445,7 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li package_type_bss_entry_patches_.size() + boot_image_string_patches_.size() + string_bss_entry_patches_.size() + + method_type_bss_entry_patches_.size() + boot_image_jni_entrypoint_patches_.size() + boot_image_other_patches_.size(); linker_patches->reserve(size); @@ -1427,6 +1478,8 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li package_type_bss_entry_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( string_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodTypeBssEntryPatch>( + method_type_bss_entry_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>( boot_image_jni_entrypoint_patches_, linker_patches); DCHECK_EQ(size, linker_patches->size()); @@ -1495,6 +1548,7 @@ void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) { } namespace detail { + // Mark which intrinsics we don't have handcrafted code for. template <Intrinsics T> struct IsUnimplemented { @@ -1509,15 +1563,13 @@ struct IsUnimplemented { UNIMPLEMENTED_INTRINSIC_LIST_X86_64(TRUE_OVERRIDE) #undef TRUE_OVERRIDE -#include "intrinsics_list.h" static constexpr bool kIsIntrinsicUnimplemented[] = { - false, // kNone + false, // kNone #define IS_UNIMPLEMENTED(Intrinsic, ...) \ - IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, - INTRINSICS_LIST(IS_UNIMPLEMENTED) + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + ART_INTRINSICS_LIST(IS_UNIMPLEMENTED) #undef IS_UNIMPLEMENTED }; -#undef INTRINSICS_LIST } // namespace detail @@ -1531,11 +1583,9 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, kNumberOfCpuRegisters, kNumberOfFloatRegisters, kNumberOfCpuRegisterPairs, - ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), - arraysize(kCoreCalleeSaves)) + ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves)) | (1 << kFakeReturnRegister), - ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), - arraysize(kFpuCalleeSaves)), + ComputeRegisterMask(kFpuCalleeSaves, arraysize(kFpuCalleeSaves)), compiler_options, stats, ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)), @@ -1554,6 +1604,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + method_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -1585,12 +1636,18 @@ static dwarf::Reg DWARFReg(FloatRegister reg) { } void LocationsBuilderX86_64::VisitMethodEntryHook(HMethodEntryHook* method_hook) { - new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = new (GetGraph()->GetAllocator()) + LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); + // We use rdtsc to record the timestamp for method profiling. rdtsc returns + // two 32-bit values in EAX + EDX even on 64-bit architectures. + locations->AddTemp(Location::RegisterLocation(RAX)); + locations->AddTemp(Location::RegisterLocation(RDX)); } void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* instruction) { SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction); + LocationSummary* locations = instruction->GetLocations(); codegen_->AddSlowPath(slow_path); if (instruction->IsMethodExitHook()) { @@ -1609,8 +1666,51 @@ void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* i instrumentation::Instrumentation::HaveMethodExitListenersOffset() : instrumentation::Instrumentation::HaveMethodEntryListenersOffset(); __ movq(CpuRegister(TMP), Immediate(address + offset.Int32Value())); - __ cmpb(Address(CpuRegister(TMP), 0), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); + __ cmpb(Address(CpuRegister(TMP), 0), + Immediate(instrumentation::Instrumentation::kFastTraceListeners)); + // Check if there are any method entry / exit listeners. If no, continue with execution. + __ j(kLess, slow_path->GetExitLabel()); + // Check if there are any slow method entry / exit listeners. If yes, take the slow path. + __ j(kGreater, slow_path->GetEntryLabel()); + + // Check if there is place in the buffer for a new entry, if no, take slow path. + CpuRegister index = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister entry_addr = CpuRegister(TMP); + uint64_t trace_buffer_index_offset = + Thread::TraceBufferIndexOffset<kX86_64PointerSize>().SizeValue(); + __ gs()->movq(CpuRegister(index), + Address::Absolute(trace_buffer_index_offset, /* no_rip= */ true)); + __ subq(CpuRegister(index), Immediate(kNumEntriesForWallClock)); + __ j(kLess, slow_path->GetEntryLabel()); + + // Update the index in the `Thread`. + __ gs()->movq(Address::Absolute(trace_buffer_index_offset, /* no_rip= */ true), + CpuRegister(index)); + // Calculate the entry address in the buffer. + // entry_addr = base_addr + sizeof(void*) * index + __ gs()->movq(entry_addr, + Address::Absolute(Thread::TraceBufferPtrOffset<kX86_64PointerSize>().SizeValue(), + /* no_rip= */ true)); + __ leaq(CpuRegister(entry_addr), + Address(CpuRegister(entry_addr), CpuRegister(index), TIMES_8, 0)); + + // Record method pointer and action. + CpuRegister method = index; + __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset)); + // Use last two bits to encode trace method action. For MethodEntry it is 0 + // so no need to set the bits since they are 0 already. + if (instruction->IsMethodExitHook()) { + DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4)); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0); + static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1); + __ orq(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit))); + } + __ movq(Address(entry_addr, kMethodOffsetInBytes), CpuRegister(method)); + // Get the timestamp. rdtsc returns timestamp in RAX + RDX even in 64-bit architectures. + __ rdtsc(); + __ shlq(CpuRegister(RDX), Immediate(32)); + __ orq(CpuRegister(RAX), CpuRegister(RDX)); + __ movq(Address(entry_addr, kTimestampOffsetInBytes), CpuRegister(RAX)); __ Bind(slow_path->GetExitLabel()); } @@ -1651,6 +1751,10 @@ void LocationsBuilderX86_64::VisitMethodExitHook(HMethodExitHook* method_hook) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath); SetInForReturnValue(method_hook, locations); + // We use rdtsc to record the timestamp for method profiling. rdtsc returns + // two 32-bit values in EAX + EDX even on 64-bit architectures. + locations->AddTemp(Location::RegisterLocation(RAX)); + locations->AddTemp(Location::RegisterLocation(RDX)); } void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instruction) { @@ -1677,20 +1781,20 @@ void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) { } if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { - SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86_64(); - AddSlowPath(slow_path); ProfilingInfo* info = GetGraph()->GetProfilingInfo(); DCHECK(info != nullptr); CHECK(!HasEmptyFrame()); - uint64_t address = reinterpret_cast64<uint64_t>(info); + uint64_t address = reinterpret_cast64<uint64_t>(info) + + ProfilingInfo::BaselineHotnessCountOffset().Int32Value(); + SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86_64(address); + AddSlowPath(slow_path); // Note: if the address was in the 32bit range, we could use // Address::Absolute and avoid this movq. __ movq(CpuRegister(TMP), Immediate(address)); // With multiple threads, this can overflow. This is OK, we will eventually get to see // it reaching 0. Also, at this point we have no register available to look // at the counter directly. - __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()), - Immediate(-1)); + __ addw(Address(CpuRegister(TMP), 0), Immediate(-1)); __ j(kEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -1949,8 +2053,9 @@ void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) { Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value)); } -void CodeGeneratorX86_64::MoveLocation( - Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) { +void CodeGeneratorX86_64::MoveLocation(Location dst, + Location src, + [[maybe_unused]] DataType::Type dst_type) { Move(dst, src); } @@ -2009,8 +2114,7 @@ void LocationsBuilderX86_64::VisitExit(HExit* exit) { exit->SetLocations(nullptr); } -void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { -} +void InstructionCodeGeneratorX86_64::VisitExit([[maybe_unused]] HExit* exit) {} template<class LabelType> void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond, @@ -2051,7 +2155,7 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) } else if (right.IsConstant()) { __ ucomiss(left.AsFpuRegister<XmmRegister>(), codegen_->LiteralFloatAddress( - right.GetConstant()->AsFloatConstant()->GetValue())); + right.GetConstant()->AsFloatConstant()->GetValue())); } else { DCHECK(right.IsStackSlot()); __ ucomiss(left.AsFpuRegister<XmmRegister>(), @@ -2065,7 +2169,7 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) } else if (right.IsConstant()) { __ ucomisd(left.AsFpuRegister<XmmRegister>(), codegen_->LiteralDoubleAddress( - right.GetConstant()->AsDoubleConstant()->GetValue())); + right.GetConstant()->AsDoubleConstant()->GetValue())); } else { DCHECK(right.IsDoubleStackSlot()); __ ucomisd(left.AsFpuRegister<XmmRegister>(), @@ -2119,13 +2223,17 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* co } } -static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) { +static bool AreEflagsSetFrom(HInstruction* cond, + HInstruction* branch, + const CompilerOptions& compiler_options) { // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS // are set only strictly before `branch`. We can't use the eflags on long // conditions if they are materialized due to the complex branching. return cond->IsCondition() && cond->GetNext() == branch && - !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()); + !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) && + !(cond->GetBlock()->GetGraph()->IsCompilingBaseline() && + compiler_options.ProfileBranches()); } template<class LabelType> @@ -2162,7 +2270,7 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc // - condition true => branch to true_target // - branch to false_target if (IsBooleanValueOrMaterializedCondition(cond)) { - if (AreEflagsSetFrom(cond, instruction)) { + if (AreEflagsSetFrom(cond, instruction, codegen_->GetCompilerOptions())) { if (true_target == nullptr) { __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target); } else { @@ -2215,7 +2323,14 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { - locations->SetInAt(0, Location::Any()); + if (GetGraph()->IsCompilingBaseline() && + codegen_->GetCompilerOptions().ProfileBranches() && + !Runtime::Current()->IsAotCompiler()) { + locations->SetInAt(0, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } else { + locations->SetInAt(0, Location::Any()); + } } } @@ -2226,6 +2341,33 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { nullptr : codegen_->GetLabelOf(true_successor); Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); + if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { + if (GetGraph()->IsCompilingBaseline() && + codegen_->GetCompilerOptions().ProfileBranches() && + !Runtime::Current()->IsAotCompiler()) { + DCHECK(if_instr->InputAt(0)->IsCondition()); + CpuRegister temp = if_instr->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); + ProfilingInfo* info = GetGraph()->GetProfilingInfo(); + DCHECK(info != nullptr); + BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc()); + // Currently, not all If branches are profiled. + if (cache != nullptr) { + uint64_t address = + reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value(); + static_assert( + BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2, + "Unexpected offsets for BranchCache"); + NearLabel done; + Location lhs = if_instr->GetLocations()->InAt(0); + __ movq(CpuRegister(TMP), Immediate(address)); + __ movzxw(temp, Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0)); + __ addw(temp, Immediate(1)); + __ j(kZero, &done); + __ movw(Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0), temp); + __ Bind(&done); + } + } + } GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } @@ -2318,7 +2460,7 @@ void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) { if (!condition->IsEmittedAtUseSite()) { // This was a previously materialized condition. // Can we use the existing condition code? - if (AreEflagsSetFrom(condition, select)) { + if (AreEflagsSetFrom(condition, select, codegen_->GetCompilerOptions())) { // Materialization was the previous instruction. Condition codes are right. cond = X86_64IntegerCondition(condition->GetCondition()); } else { @@ -2657,7 +2799,7 @@ void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) { // Will be generated at use site. } @@ -2667,7 +2809,7 @@ void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) { // Will be generated at use site. } @@ -2677,7 +2819,7 @@ void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) { // Will be generated at use site. } @@ -2687,7 +2829,7 @@ void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) { locations->SetOut(Location::ConstantLocation(constant)); } -void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) { // Will be generated at use site. } @@ -2698,7 +2840,7 @@ void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) { } void InstructionCodeGeneratorX86_64::VisitDoubleConstant( - HDoubleConstant* constant ATTRIBUTE_UNUSED) { + [[maybe_unused]] HDoubleConstant* constant) { // Will be generated at use site. } @@ -2707,7 +2849,7 @@ void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructo } void InstructionCodeGeneratorX86_64::VisitConstructorFence( - HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + [[maybe_unused]] HConstructorFence* constructor_fence) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } @@ -2723,7 +2865,7 @@ void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) { ret->SetLocations(nullptr); } -void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) { codegen_->GenerateFrameExit(); } @@ -2996,23 +3138,26 @@ void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction, CpuRegister klass) { DCHECK_EQ(RDI, klass.AsRegister()); - // We know the destination of an intrinsic, so no need to record inline - // caches. - if (!instruction->GetLocations()->Intrinsified() && - GetGraph()->IsCompilingBaseline() && - !Runtime::Current()->IsAotCompiler()) { + if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) { ProfilingInfo* info = GetGraph()->GetProfilingInfo(); DCHECK(info != nullptr); - InlineCache* cache = info->GetInlineCache(instruction->GetDexPc()); - uint64_t address = reinterpret_cast64<uint64_t>(cache); - NearLabel done; - __ movq(CpuRegister(TMP), Immediate(address)); - // Fast path for a monomorphic cache. - __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass); - __ j(kEqual, &done); - GenerateInvokeRuntime( - GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value()); - __ Bind(&done); + InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(info, instruction->AsInvoke()); + if (cache != nullptr) { + uint64_t address = reinterpret_cast64<uint64_t>(cache); + NearLabel done; + __ movq(CpuRegister(TMP), Immediate(address)); + // Fast path for a monomorphic cache. + __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass); + __ j(kEqual, &done); + GenerateInvokeRuntime( + GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value()); + __ Bind(&done); + } else { + // This is unexpected, but we don't guarantee stable compilation across + // JIT runs so just warn about it. + ScopedObjectAccess soa(Thread::Current()); + LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod(); + } } } @@ -4972,7 +5117,7 @@ void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) { } void InstructionCodeGeneratorX86_64::VisitParameterValue( - HParameterValue* instruction ATTRIBUTE_UNUSED) { + [[maybe_unused]] HParameterValue* instruction) { // Nothing to do, the parameter is already at its location. } @@ -4983,7 +5128,7 @@ void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) { } void InstructionCodeGeneratorX86_64::VisitCurrentMethod( - HCurrentMethod* instruction ATTRIBUTE_UNUSED) { + [[maybe_unused]] HCurrentMethod* instruction) { // Nothing to do, the method is already at its location. } @@ -5062,7 +5207,7 @@ void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) { locations->SetOut(Location::Any()); } -void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitPhi([[maybe_unused]] HPhi* instruction) { LOG(FATAL) << "Unimplemented"; } @@ -5091,13 +5236,10 @@ void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { } void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { - DCHECK(instruction->IsInstanceFieldGet() || - instruction->IsStaticFieldGet() || - instruction->IsPredicatedInstanceFieldGet()); + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); bool object_field_get_with_read_barrier = - gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); - bool is_predicated = instruction->IsPredicatedInstanceFieldGet(); + (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_field_get_with_read_barrier @@ -5107,37 +5249,26 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } // receiver_input - locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister()); - if (is_predicated) { - if (DataType::IsFloatingPointType(instruction->GetType())) { - locations->SetInAt(0, Location::RequiresFpuRegister()); - } else { - locations->SetInAt(0, Location::RequiresRegister()); - } - } + locations->SetInAt(0, Location::RequiresRegister()); if (DataType::IsFloatingPointType(instruction->GetType())) { - locations->SetOut(is_predicated ? Location::SameAsFirstInput() - : Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); } else { // The output overlaps for an object field get when read barriers are // enabled: we do not want the move to overwrite the object's location, as // we need it to emit the read barrier. For predicated instructions we can // always overlap since the output is SameAsFirst and the default value. - locations->SetOut(is_predicated ? Location::SameAsFirstInput() : Location::RequiresRegister(), - object_field_get_with_read_barrier || is_predicated - ? Location::kOutputOverlap - : Location::kNoOutputOverlap); + locations->SetOut( + Location::RequiresRegister(), + object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { - DCHECK(instruction->IsInstanceFieldGet() || - instruction->IsStaticFieldGet() || - instruction->IsPredicatedInstanceFieldGet()); + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = instruction->GetLocations(); - Location base_loc = locations->InAt(instruction->IsPredicatedInstanceFieldGet() ? 1 : 0); + Location base_loc = locations->InAt(0); CpuRegister base = base_loc.AsRegister<CpuRegister>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); @@ -5147,7 +5278,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, if (load_type == DataType::Type::kReference) { // /* HeapReference<Object> */ out = *(base + offset) - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // Note that a potential implicit null check is handled in this // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( @@ -5413,14 +5544,6 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, bool is_volatile = field_info.IsVolatile(); DataType::Type field_type = field_info.GetFieldType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - bool is_predicated = - instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet(); - - NearLabel pred_is_null; - if (is_predicated) { - __ testl(base, base); - __ j(kZero, &pred_is_null); - } HandleFieldSet(instruction, /*value_index=*/ 1, @@ -5433,10 +5556,6 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, value_can_be_null, /*byte_swap=*/ false, write_barrier_kind); - - if (is_predicated) { - __ Bind(&pred_is_null); - } } void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { @@ -5450,26 +5569,10 @@ void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* in instruction->GetWriteBarrierKind()); } -void LocationsBuilderX86_64::VisitPredicatedInstanceFieldGet( - HPredicatedInstanceFieldGet* instruction) { - HandleFieldGet(instruction); -} - void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { HandleFieldGet(instruction); } -void InstructionCodeGeneratorX86_64::VisitPredicatedInstanceFieldGet( - HPredicatedInstanceFieldGet* instruction) { - NearLabel finish; - LocationSummary* locations = instruction->GetLocations(); - CpuRegister target = locations->InAt(1).AsRegister<CpuRegister>(); - __ testl(target, target); - __ j(kZero, &finish); - HandleFieldGet(instruction, instruction->GetFieldInfo()); - __ Bind(&finish); -} - void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { HandleFieldGet(instruction, instruction->GetFieldInfo()); } @@ -5615,7 +5718,7 @@ void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { bool object_array_get_with_read_barrier = - gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); + (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_array_get_with_read_barrier @@ -5653,7 +5756,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // Note that a potential implicit null check is handled in this // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( @@ -5930,8 +6033,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ movsd(address, value.AsFpuRegister<XmmRegister>()); codegen_->MaybeRecordImplicitNullCheck(instruction); } else { - int64_t v = - bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); + int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); Address address_high = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t)); codegen_->MoveInt64ToAddress(address, address_high, v, instruction); @@ -6084,7 +6186,7 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, } } -void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { +void LocationsBuilderX86_64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) { LOG(FATAL) << "Unimplemented"; } @@ -6458,7 +6560,7 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { load_kind == HLoadClass::LoadKind::kBssEntryPublic || load_kind == HLoadClass::LoadKind::kBssEntryPackage); - const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage(); + const bool requires_read_barrier = !cls->IsInBootImage() && codegen_->EmitReadBarrier(); LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; @@ -6471,12 +6573,14 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { locations->SetInAt(0, Location::RequiresRegister()); } locations->SetOut(Location::RequiresRegister()); - if (load_kind == HLoadClass::LoadKind::kBssEntry) { - if (!gUseReadBarrier || kUseBakerReadBarrier) { + if (load_kind == HLoadClass::LoadKind::kBssEntry || + load_kind == HLoadClass::LoadKind::kBssEntryPublic || + load_kind == HLoadClass::LoadKind::kBssEntryPackage) { + if (codegen_->EmitNonBakerReadBarrier()) { + // For non-Baker read barrier we have a temp-clobbering call. + } else { // Rely on the type resolution and/or initialization to save everything. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); - } else { - // For non-Baker read barrier we have a temp-clobbering call. } } } @@ -6507,9 +6611,8 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S Location out_loc = locations->Out(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); - const ReadBarrierOption read_barrier_option = cls->IsInBootImage() - ? kWithoutReadBarrier - : gCompilerReadBarrierOption; + const ReadBarrierOption read_barrier_option = + cls->IsInBootImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption(); bool generate_null_check = false; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { @@ -6612,13 +6715,50 @@ void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* lo } void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) { - // Custom calling convention: RAX serves as both input and output. - Location location = Location::RegisterLocation(RAX); - CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kCallOnSlowPath); + if (load->GetLoadKind() == HLoadMethodType::LoadKind::kRuntimeCall) { + Location location = Location::RegisterLocation(RAX); + CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location); + } else { + DCHECK_EQ(load->GetLoadKind(), HLoadMethodType::LoadKind::kBssEntry); + locations->SetOut(Location::RequiresRegister()); + if (codegen_->EmitNonBakerReadBarrier()) { + // For non-Baker read barrier we have a temp-clobbering call. + } else { + // Rely on the pResolveMethodType to save everything. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); + } + } } void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) { - codegen_->GenerateLoadMethodTypeRuntimeCall(load); + LocationSummary* locations = load->GetLocations(); + Location out_loc = locations->Out(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); + + switch (load->GetLoadKind()) { + case HLoadMethodType::LoadKind::kBssEntry: { + Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, + /* no_rip= */ false); + Label* fixup_label = codegen_->NewMethodTypeBssEntryPatch(load); + // /* GcRoot<mirror::MethodType> */ out = *address /* PC-relative */ + GenerateGcRootFieldLoad( + load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption()); + // No need for memory fence, thanks to the x86-64 memory model. + SlowPathCode* slow_path = + new (codegen_->GetScopedAllocator()) LoadMethodTypeSlowPathX86_64(load); + codegen_->AddSlowPath(slow_path); + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + default: + DCHECK_EQ(load->GetLoadKind(), HLoadMethodType::LoadKind::kRuntimeCall); + codegen_->GenerateLoadMethodTypeRuntimeCall(load); + break; + } } void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { @@ -6649,18 +6789,18 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( } void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { - LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); + LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { locations->SetOut(Location::RegisterLocation(RAX)); } else { locations->SetOut(Location::RequiresRegister()); if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { - if (!gUseReadBarrier || kUseBakerReadBarrier) { + if (codegen_->EmitNonBakerReadBarrier()) { + // For non-Baker read barrier we have a temp-clobbering call. + } else { // Rely on the pResolveString to save everything. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); - } else { - // For non-Baker read barrier we have a temp-clobbering call. } } } @@ -6704,7 +6844,8 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA /* no_rip= */ false); Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption); + GenerateGcRootFieldLoad( + load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption()); // No need for memory fence, thanks to the x86-64 memory model. SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load); codegen_->AddSlowPath(slow_path); @@ -6725,14 +6866,14 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA Label* fixup_label = codegen_->NewJitRootStringPatch( load->GetDexFile(), load->GetStringIndex(), load->GetString()); // /* GcRoot<mirror::String> */ out = *address - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption); + GenerateGcRootFieldLoad( + load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption()); return; } default: break; } - // TODO: Re-add the compiler code to do string dex cache lookup again. // Custom calling convention: RAX serves as both input and output. __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_)); codegen_->InvokeRuntime(kQuickResolveString, @@ -6760,7 +6901,7 @@ void LocationsBuilderX86_64::VisitClearException(HClearException* clear) { new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); } -void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitClearException([[maybe_unused]] HClearException* clear) { __ gs()->movl(GetExceptionTlsAddress(), Immediate(0)); } @@ -6777,8 +6918,8 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { } // Temp is used for read barrier. -static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { - if (gUseReadBarrier && +static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) { + if (emit_read_barrier && !kUseBakerReadBarrier && (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -6791,11 +6932,11 @@ static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { // Interface case has 2 temps, one for holding the number of interfaces, one for the current // interface pointer, the current interface is compared in memory. // The other checks have one temp for loading the object's class. -static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { +static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) { if (type_check_kind == TypeCheckKind::kInterfaceCheck) { return 2; } - return 1 + NumberOfInstanceOfTemps(type_check_kind); + return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind); } void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { @@ -6807,7 +6948,7 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: { - bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); + bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction); call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; break; @@ -6836,7 +6977,8 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { } // Note that TypeCheckSlowPathX86_64 uses this "out" register too. locations->SetOut(Location::RequiresRegister()); - locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); + locations->AddRegisterTemps( + NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind)); } void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { @@ -6847,7 +6989,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { Location cls = locations->InAt(1); Location out_loc = locations->Out(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); - const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); + const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind); DCHECK_LE(num_temps, 1u); Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -6867,7 +7009,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + codegen_->ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, @@ -6895,7 +7037,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kAbstractClassCheck: { ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + codegen_->ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, @@ -6931,7 +7073,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kClassHierarchyCheck: { ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + codegen_->ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, @@ -6968,7 +7110,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: { ReadBarrierOption read_barrier_option = - CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); + codegen_->ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, @@ -7097,7 +7239,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); + LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); @@ -7112,8 +7254,7 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { } else { locations->SetInAt(1, Location::Any()); } - // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86. - locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); + locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind)); } void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { @@ -7124,7 +7265,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); - const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); + const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind); DCHECK_GE(num_temps, 1u); DCHECK_LE(num_temps, 2u); Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation(); @@ -7137,7 +7278,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { const uint32_t object_array_data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); + bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction); SlowPathCode* type_check_slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( instruction, is_type_check_slow_path_fatal); @@ -7301,11 +7442,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // /* HeapReference<Class> */ temp = temp->iftable_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - temp_loc, - iftable_offset, - kWithoutReadBarrier); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + iftable_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Iftable is never null. __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset)); // Maybe poison the `cls` for direct comparison with memory. @@ -7532,7 +7673,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister( ReadBarrierOption read_barrier_option) { CpuRegister out_reg = out.AsRegister<CpuRegister>(); if (read_barrier_option == kWithReadBarrier) { - CHECK(gUseReadBarrier); + DCHECK(codegen_->EmitReadBarrier()); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) @@ -7566,7 +7707,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters( CpuRegister out_reg = out.AsRegister<CpuRegister>(); CpuRegister obj_reg = obj.AsRegister<CpuRegister>(); if (read_barrier_option == kWithReadBarrier) { - CHECK(gUseReadBarrier); + DCHECK(codegen_->EmitReadBarrier()); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -7594,7 +7735,7 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad( ReadBarrierOption read_barrier_option) { CpuRegister root_reg = root.AsRegister<CpuRegister>(); if (read_barrier_option == kWithReadBarrier) { - DCHECK(gUseReadBarrier); + DCHECK(codegen_->EmitReadBarrier()); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used: @@ -7658,8 +7799,7 @@ void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in CpuRegister obj, uint32_t offset, bool needs_null_check) { - DCHECK(gUseReadBarrier); - DCHECK(kUseBakerReadBarrier); + DCHECK(EmitBakerReadBarrier()); // /* HeapReference<Object> */ ref = *(obj + offset) Address src(obj, offset); @@ -7672,8 +7812,7 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in uint32_t data_offset, Location index, bool needs_null_check) { - DCHECK(gUseReadBarrier); - DCHECK(kUseBakerReadBarrier); + DCHECK(EmitBakerReadBarrier()); static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), @@ -7692,8 +7831,7 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction bool always_update_field, CpuRegister* temp1, CpuRegister* temp2) { - DCHECK(gUseReadBarrier); - DCHECK(kUseBakerReadBarrier); + DCHECK(EmitBakerReadBarrier()); // In slow path based read barriers, the read barrier call is // inserted after the original load. However, in fast path based @@ -7774,7 +7912,7 @@ void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - DCHECK(gUseReadBarrier); + DCHECK(EmitReadBarrier()); // Insert a slow path based read barrier *after* the reference load. // @@ -7801,7 +7939,7 @@ void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction Location obj, uint32_t offset, Location index) { - if (gUseReadBarrier) { + if (EmitReadBarrier()) { // Baker's read barriers shall be handled by the fast path // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier). DCHECK(!kUseBakerReadBarrier); @@ -7816,7 +7954,7 @@ void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root) { - DCHECK(gUseReadBarrier); + DCHECK(EmitReadBarrier()); // Insert a slow path based read barrier *after* the GC root load. // @@ -7830,12 +7968,12 @@ void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instructi __ Bind(slow_path->GetExitLabel()); } -void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { +void LocationsBuilderX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; } -void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; } @@ -7930,13 +8068,13 @@ void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_ins __ jmp(temp_reg); } -void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction - ATTRIBUTE_UNUSED) { +void LocationsBuilderX86_64::VisitIntermediateAddress( + [[maybe_unused]] HIntermediateAddress* instruction) { LOG(FATAL) << "Unreachable"; } -void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction - ATTRIBUTE_UNUSED) { +void InstructionCodeGeneratorX86_64::VisitIntermediateAddress( + [[maybe_unused]] HIntermediateAddress* instruction) { LOG(FATAL) << "Unreachable"; } @@ -8037,9 +8175,9 @@ Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj, Location index, ScaleFactor scale, uint32_t data_offset) { - return index.IsConstant() ? - Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) : - Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset); + return index.IsConstant() + ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) + : Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset); } void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) { @@ -8119,7 +8257,7 @@ class JumpTableRIPFixup : public RIPFixup { const HPackedSwitch* switch_instr_; }; -void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { +void CodeGeneratorX86_64::Finalize() { // Generate the constant area if needed. X86_64Assembler* assembler = GetAssembler(); if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) { @@ -8137,7 +8275,7 @@ void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { } // And finish up. - CodeGenerator::Finalize(allocator); + CodeGenerator::Finalize(); } Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) { @@ -8217,7 +8355,7 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code, reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t; reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] = - dchecked_integral_cast<uint32_t>(address); + dchecked_integral_cast<uint32_t>(address); } void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index dff2e799e0..e4d3eac6bc 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -87,19 +87,8 @@ static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, V(StringBuilderLength) \ V(StringBuilderToString) \ /* 1.8 */ \ - V(UnsafeGetAndAddInt) \ - V(UnsafeGetAndAddLong) \ - V(UnsafeGetAndSetInt) \ - V(UnsafeGetAndSetLong) \ - V(UnsafeGetAndSetObject) \ V(MethodHandleInvokeExact) \ - V(MethodHandleInvoke) \ - /* OpenJDK 11 */ \ - V(JdkUnsafeGetAndAddInt) \ - V(JdkUnsafeGetAndAddLong) \ - V(JdkUnsafeGetAndSetInt) \ - V(JdkUnsafeGetAndSetLong) \ - V(JdkUnsafeGetAndSetObject) + V(MethodHandleInvoke) class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { public: @@ -162,16 +151,16 @@ class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention { Location GetFieldIndexLocation() const override { return Location::RegisterLocation(RDI); } - Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { + Location GetReturnLocation([[maybe_unused]] DataType::Type type) const override { return Location::RegisterLocation(RAX); } - Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, bool is_instance) - const override { + Location GetSetValueLocation([[maybe_unused]] DataType::Type type, + bool is_instance) const override { return is_instance ? Location::RegisterLocation(RDX) : Location::RegisterLocation(RSI); } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { + Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override { return Location::FpuRegisterLocation(XMM0); } @@ -468,7 +457,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { void SetupBlockedRegisters() const override; void DumpCoreRegister(std::ostream& stream, int reg) const override; void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; - void Finalize(CodeAllocator* allocator) override; + void Finalize() override; InstructionSet GetInstructionSet() const override { return InstructionSet::kX86_64; @@ -502,9 +491,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { block_labels_ = CommonInitializeLabels<Label>(); } - bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override { - return false; - } + bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; } // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. @@ -536,6 +523,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { Label* NewTypeBssEntryPatch(HLoadClass* load_class); void RecordBootImageStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); + Label* NewMethodTypeBssEntryPatch(HLoadMethodType* load_method_type); void RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke); Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex string_index, @@ -748,6 +736,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { ArenaDeque<PatchInfo<Label>> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_; + // PC-relative MethodType patch info for kBssEntry. + ArenaDeque<PatchInfo<Label>> method_type_bss_entry_patches_; // PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative. ArenaDeque<PatchInfo<Label>> boot_image_jni_entrypoint_patches_; // PC-relative patch info for IntrinsicObjects for the boot image, diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc index d759a16f48..a2371817ee 100644 --- a/compiler/optimizing/code_sinking.cc +++ b/compiler/optimizing/code_sinking.cc @@ -16,6 +16,9 @@ #include "code_sinking.h" +#include <sstream> + +#include "android-base/logging.h" #include "base/arena_bit_vector.h" #include "base/array_ref.h" #include "base/bit_vector-inl.h" @@ -134,7 +137,6 @@ static bool IsInterestingInstruction(HInstruction* instruction) { // hard to test, as LSE removes them. if (instruction->IsStaticFieldGet() || instruction->IsInstanceFieldGet() || - instruction->IsPredicatedInstanceFieldGet() || instruction->IsArrayGet()) { return false; } @@ -335,10 +337,6 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { processed_instructions.ClearAllBits(); ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable= */ false); post_dominated.ClearAllBits(); - ArenaBitVector instructions_that_can_move( - &allocator, number_of_instructions, /* expandable= */ false); - instructions_that_can_move.ClearAllBits(); - ScopedArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc)); // Step (1): Visit post order to get a subset of blocks post dominated by `end_block`. // TODO(ngeoffray): Getting the full set of post-dominated should be done by @@ -411,6 +409,13 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { HBasicBlock* common_dominator = finder.Get(); // Step (2): iterate over the worklist to find sinking candidates. + ArenaBitVector instructions_that_can_move( + &allocator, number_of_instructions, /* expandable= */ false); + instructions_that_can_move.ClearAllBits(); + ScopedArenaVector<ScopedArenaVector<HInstruction*>> instructions_to_move( + graph_->GetBlocks().size(), + ScopedArenaVector<HInstruction*>(allocator.Adapter(kArenaAllocMisc)), + allocator.Adapter(kArenaAllocMisc)); while (!worklist.empty()) { HInstruction* instruction = worklist.back(); if (processed_instructions.IsBitSet(instruction->GetId())) { @@ -467,7 +472,7 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { // Instruction is a candidate for being sunk. Mark it as such, remove it from the // work list, and add its inputs to the work list. instructions_that_can_move.SetBit(instruction->GetId()); - move_in_order.push_back(instruction); + instructions_to_move[instruction->GetBlock()->GetBlockId()].push_back(instruction); processed_instructions.SetBit(instruction->GetId()); worklist.pop_back(); AddInputs(instruction, processed_instructions, post_dominated, &worklist); @@ -493,14 +498,50 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { } } - // Make sure we process instructions in dominated order. This is required for heap - // stores. - std::sort(move_in_order.begin(), move_in_order.end(), [](HInstruction* a, HInstruction* b) { - return b->StrictlyDominates(a); - }); + // We want to process the instructions in reverse dominated order. This is required for heap + // stores. To guarantee this (including the transitivity of incomparability) we have some extra + // bookkeeping. + ScopedArenaVector<HInstruction*> instructions_to_move_sorted(allocator.Adapter(kArenaAllocMisc)); + for (HBasicBlock* block : graph_->GetPostOrder()) { + const int block_id = block->GetBlockId(); + + // Order the block itself first. + std::sort(instructions_to_move[block_id].begin(), + instructions_to_move[block_id].end(), + [&block](HInstruction* a, HInstruction* b) { + return block->GetInstructions().FoundBefore(b, a); + }); + + for (HInstruction* instruction : instructions_to_move[block_id]) { + instructions_to_move_sorted.push_back(instruction); + } + } + + if (kIsDebugBuild) { + // We should have ordered the instructions in reverse dominated order. This means that + // instructions shouldn't dominate instructions that come after it in the vector. + for (size_t i = 0; i < instructions_to_move_sorted.size(); ++i) { + for (size_t j = i + 1; j < instructions_to_move_sorted.size(); ++j) { + if (instructions_to_move_sorted[i]->StrictlyDominates(instructions_to_move_sorted[j])) { + std::stringstream ss; + graph_->Dump(ss, nullptr); + ss << "\n" + << "{"; + for (HInstruction* instr : instructions_to_move_sorted) { + ss << *instr << " in block: " << instr->GetBlock() << ", "; + } + ss << "}\n"; + ss << "i = " << i << " which is " << *instructions_to_move_sorted[i] + << "strictly dominates j = " << j << " which is " << *instructions_to_move_sorted[j] + << "\n"; + LOG(FATAL) << "Unexpected ordering of code sinking instructions: " << ss.str(); + } + } + } + } // Step (3): Try to move sinking candidates. - for (HInstruction* instruction : move_in_order) { + for (HInstruction* instruction : instructions_to_move_sorted) { HInstruction* position = nullptr; if (instruction->IsArraySet() || instruction->IsInstanceFieldSet() diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 2d9acc49b3..c72d3ea24a 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -733,8 +733,7 @@ TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) { move->AddMove(Location::StackSlot(8192), Location::StackSlot(0), DataType::Type::kInt32, nullptr); codegen.GetMoveResolver()->EmitNativeCode(move); - InternalCodeAllocator code_allocator; - codegen.Finalize(&code_allocator); + codegen.Finalize(); } #endif @@ -785,8 +784,7 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) { nullptr); codegen.GetMoveResolver()->EmitNativeCode(move); - InternalCodeAllocator code_allocator; - codegen.Finalize(&code_allocator); + codegen.Finalize(); } // Check that ParallelMoveResolver works fine for ARM64 for both cases when SIMD is on and off. @@ -798,7 +796,7 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) { codegen.Initialize(); - graph->SetHasSIMD(true); + graph->SetHasTraditionalSIMD(true); for (int i = 0; i < 2; i++) { HParallelMove* move = new (graph->GetAllocator()) HParallelMove(graph->GetAllocator()); move->AddMove(Location::SIMDStackSlot(0), @@ -818,11 +816,10 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) { DataType::Type::kFloat64, nullptr); codegen.GetMoveResolver()->EmitNativeCode(move); - graph->SetHasSIMD(false); + graph->SetHasTraditionalSIMD(false); } - InternalCodeAllocator code_allocator; - codegen.Finalize(&code_allocator); + codegen.Finalize(); } // Check that ART ISA Features are propagated to VIXL for arm64 (using cortex-a75 as example). @@ -867,7 +864,7 @@ TEST_F(CodegenTest, ARM64FrameSizeSIMD) { arm64::CodeGeneratorARM64 codegen(graph, *compiler_options); codegen.Initialize(); - graph->SetHasSIMD(true); + graph->SetHasTraditionalSIMD(true); DCHECK_EQ(arm64::callee_saved_fp_registers.GetCount(), 8); vixl::aarch64::CPURegList reg_list = arm64::callee_saved_fp_registers; @@ -887,7 +884,8 @@ TEST_F(CodegenTest, ARM64FrameSizeNoSIMD) { arm64::CodeGeneratorARM64 codegen(graph, *compiler_options); codegen.Initialize(); - graph->SetHasSIMD(false); + graph->SetHasTraditionalSIMD(false); + graph->SetHasPredicatedSIMD(false); DCHECK_EQ(arm64::callee_saved_fp_registers.GetCount(), 8); vixl::aarch64::CPURegList reg_list = arm64::callee_saved_fp_registers; diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h index 7af9d0f44c..a8425c9915 100644 --- a/compiler/optimizing/codegen_test_utils.h +++ b/compiler/optimizing/codegen_test_utils.h @@ -103,8 +103,8 @@ class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL { blocked_core_registers_[arm::R7] = false; } - void MaybeGenerateMarkingRegisterCheck(int code ATTRIBUTE_UNUSED, - Location temp_loc ATTRIBUTE_UNUSED) override { + void MaybeGenerateMarkingRegisterCheck([[maybe_unused]] int code, + [[maybe_unused]] Location temp_loc) override { // When turned on, the marking register checks in // CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck expects the // Thread Register and the Marking Register to be set to @@ -135,8 +135,8 @@ class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 { TestCodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options) : arm64::CodeGeneratorARM64(graph, compiler_options) {} - void MaybeGenerateMarkingRegisterCheck(int codem ATTRIBUTE_UNUSED, - Location temp_loc ATTRIBUTE_UNUSED) override { + void MaybeGenerateMarkingRegisterCheck([[maybe_unused]] int codem, + [[maybe_unused]] Location temp_loc) override { // When turned on, the marking register checks in // CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck expect the // Thread Register and the Marking Register to be set to @@ -167,28 +167,6 @@ class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 { }; #endif -class InternalCodeAllocator : public CodeAllocator { - public: - InternalCodeAllocator() : size_(0) { } - - uint8_t* Allocate(size_t size) override { - size_ = size; - memory_.reset(new uint8_t[size]); - return memory_.get(); - } - - size_t GetSize() const { return size_; } - ArrayRef<const uint8_t> GetMemory() const override { - return ArrayRef<const uint8_t>(memory_.get(), size_); - } - - private: - size_t size_; - std::unique_ptr<uint8_t[]> memory_; - - DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator); -}; - static bool CanExecuteOnHardware(InstructionSet target_isa) { return (target_isa == kRuntimeISA) // Handle the special case of ARM, with two instructions sets (ARM32 and Thumb-2). @@ -247,8 +225,7 @@ static void VerifyGeneratedCode(InstructionSet target_isa, } template <typename Expected> -static void Run(const InternalCodeAllocator& allocator, - const CodeGenerator& codegen, +static void Run(const CodeGenerator& codegen, bool has_result, Expected expected) { InstructionSet target_isa = codegen.GetInstructionSet(); @@ -260,7 +237,7 @@ static void Run(const InternalCodeAllocator& allocator, }; CodeHolder code_holder; const void* method_code = - code_holder.MakeExecutable(allocator.GetMemory(), ArrayRef<const uint8_t>(), target_isa); + code_holder.MakeExecutable(codegen.GetCode(), ArrayRef<const uint8_t>(), target_isa); using fptr = Expected (*)(); fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(method_code)); @@ -294,9 +271,8 @@ static void RunCodeNoCheck(CodeGenerator* codegen, register_allocator->AllocateRegisters(); } hook_before_codegen(graph); - InternalCodeAllocator allocator; - codegen->Compile(&allocator); - Run(allocator, *codegen, has_result, expected); + codegen->Compile(); + Run(*codegen, has_result, expected); } template <typename Expected> diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index 20b0e38af5..e2ef8d52f2 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -311,10 +311,8 @@ inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* } } -inline Location ARM64EncodableConstantOrRegister(HInstruction* constant, - HInstruction* instr) { - if (constant->IsConstant() - && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { +inline Location ARM64EncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) { + if (constant->IsConstant() && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { return Location::ConstantLocation(constant); } diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc index 06d19e3f29..66bbf548bb 100644 --- a/compiler/optimizing/constant_folding.cc +++ b/compiler/optimizing/constant_folding.cc @@ -18,7 +18,11 @@ #include <algorithm> +#include "base/bit_utils.h" +#include "base/casts.h" +#include "base/logging.h" #include "dex/dex_file-inl.h" +#include "intrinsics_enum.h" #include "optimizing/data_type.h" #include "optimizing/nodes.h" @@ -37,13 +41,31 @@ class HConstantFoldingVisitor final : public HGraphDelegateVisitor { void VisitUnaryOperation(HUnaryOperation* inst) override; void VisitBinaryOperation(HBinaryOperation* inst) override; + // Tries to replace constants in binary operations like: + // * BinaryOp(Select(false_constant, true_constant, condition), other_constant), or + // * BinaryOp(other_constant, Select(false_constant, true_constant, condition)) + // with consolidated constants. For example, Add(Select(10, 20, condition), 5) can be replaced + // with Select(15, 25, condition). + bool TryRemoveBinaryOperationViaSelect(HBinaryOperation* inst); + void VisitArrayLength(HArrayLength* inst) override; void VisitDivZeroCheck(HDivZeroCheck* inst) override; void VisitIf(HIf* inst) override; + void VisitInvoke(HInvoke* inst) override; void VisitTypeConversion(HTypeConversion* inst) override; void PropagateValue(HBasicBlock* starting_block, HInstruction* variable, HConstant* constant); + // Intrinsics foldings + void FoldReverseIntrinsic(HInvoke* invoke); + void FoldReverseBytesIntrinsic(HInvoke* invoke); + void FoldBitCountIntrinsic(HInvoke* invoke); + void FoldDivideUnsignedIntrinsic(HInvoke* invoke); + void FoldHighestOneBitIntrinsic(HInvoke* invoke); + void FoldLowestOneBitIntrinsic(HInvoke* invoke); + void FoldNumberOfLeadingZerosIntrinsic(HInvoke* invoke); + void FoldNumberOfTrailingZerosIntrinsic(HInvoke* invoke); + // Use all optimizations without restrictions. bool use_all_optimizations_; @@ -113,7 +135,67 @@ void HConstantFoldingVisitor::VisitUnaryOperation(HUnaryOperation* inst) { if (constant != nullptr) { inst->ReplaceWith(constant); inst->GetBlock()->RemoveInstruction(inst); + } else if (inst->InputAt(0)->IsSelect() && inst->InputAt(0)->HasOnlyOneNonEnvironmentUse()) { + // Try to replace the select's inputs in Select+UnaryOperation cases. We can do this if both + // inputs to the select are constants, and this is the only use of the select. + HSelect* select = inst->InputAt(0)->AsSelect(); + HConstant* false_constant = inst->TryStaticEvaluation(select->GetFalseValue()); + if (false_constant == nullptr) { + return; + } + HConstant* true_constant = inst->TryStaticEvaluation(select->GetTrueValue()); + if (true_constant == nullptr) { + return; + } + DCHECK_EQ(select->InputAt(0), select->GetFalseValue()); + DCHECK_EQ(select->InputAt(1), select->GetTrueValue()); + select->ReplaceInput(false_constant, 0); + select->ReplaceInput(true_constant, 1); + select->UpdateType(); + inst->ReplaceWith(select); + inst->GetBlock()->RemoveInstruction(inst); + } +} + +bool HConstantFoldingVisitor::TryRemoveBinaryOperationViaSelect(HBinaryOperation* inst) { + if (inst->GetLeft()->IsSelect() == inst->GetRight()->IsSelect()) { + // If both of them are constants, VisitBinaryOperation already tried the static evaluation. If + // both of them are selects, then we can't simplify. + // TODO(solanes): Technically, if both of them are selects we could simplify iff both select's + // conditions are equal e.g. Add(Select(1, 2, cond), Select(3, 4, cond)) could be replaced with + // Select(4, 6, cond). This seems very unlikely to happen so we don't implement it. + return false; + } + + const bool left_is_select = inst->GetLeft()->IsSelect(); + HSelect* select = left_is_select ? inst->GetLeft()->AsSelect() : inst->GetRight()->AsSelect(); + HInstruction* maybe_constant = left_is_select ? inst->GetRight() : inst->GetLeft(); + + if (select->HasOnlyOneNonEnvironmentUse()) { + // Try to replace the select's inputs in Select+BinaryOperation. We can do this if both + // inputs to the select are constants, and this is the only use of the select. + HConstant* false_constant = + inst->TryStaticEvaluation(left_is_select ? select->GetFalseValue() : maybe_constant, + left_is_select ? maybe_constant : select->GetFalseValue()); + if (false_constant == nullptr) { + return false; + } + HConstant* true_constant = + inst->TryStaticEvaluation(left_is_select ? select->GetTrueValue() : maybe_constant, + left_is_select ? maybe_constant : select->GetTrueValue()); + if (true_constant == nullptr) { + return false; + } + DCHECK_EQ(select->InputAt(0), select->GetFalseValue()); + DCHECK_EQ(select->InputAt(1), select->GetTrueValue()); + select->ReplaceInput(false_constant, 0); + select->ReplaceInput(true_constant, 1); + select->UpdateType(); + inst->ReplaceWith(select); + inst->GetBlock()->RemoveInstruction(inst); + return true; } + return false; } void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) { @@ -123,6 +205,8 @@ void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) { if (constant != nullptr) { inst->ReplaceWith(constant); inst->GetBlock()->RemoveInstruction(inst); + } else if (TryRemoveBinaryOperationViaSelect(inst)) { + // Already replaced inside TryRemoveBinaryOperationViaSelect. } else { InstructionWithAbsorbingInputSimplifier simplifier(GetGraph()); inst->Accept(&simplifier); @@ -281,6 +365,245 @@ void HConstantFoldingVisitor::VisitIf(HIf* inst) { } } +void HConstantFoldingVisitor::VisitInvoke(HInvoke* inst) { + switch (inst->GetIntrinsic()) { + case Intrinsics::kIntegerReverse: + case Intrinsics::kLongReverse: + FoldReverseIntrinsic(inst); + break; + case Intrinsics::kIntegerReverseBytes: + case Intrinsics::kLongReverseBytes: + case Intrinsics::kShortReverseBytes: + FoldReverseBytesIntrinsic(inst); + break; + case Intrinsics::kIntegerBitCount: + case Intrinsics::kLongBitCount: + FoldBitCountIntrinsic(inst); + break; + case Intrinsics::kIntegerDivideUnsigned: + case Intrinsics::kLongDivideUnsigned: + FoldDivideUnsignedIntrinsic(inst); + break; + case Intrinsics::kIntegerHighestOneBit: + case Intrinsics::kLongHighestOneBit: + FoldHighestOneBitIntrinsic(inst); + break; + case Intrinsics::kIntegerLowestOneBit: + case Intrinsics::kLongLowestOneBit: + FoldLowestOneBitIntrinsic(inst); + break; + case Intrinsics::kIntegerNumberOfLeadingZeros: + case Intrinsics::kLongNumberOfLeadingZeros: + FoldNumberOfLeadingZerosIntrinsic(inst); + break; + case Intrinsics::kIntegerNumberOfTrailingZeros: + case Intrinsics::kLongNumberOfTrailingZeros: + FoldNumberOfTrailingZerosIntrinsic(inst); + break; + default: + break; + } +} + +void HConstantFoldingVisitor::FoldReverseIntrinsic(HInvoke* inst) { + DCHECK(inst->GetIntrinsic() == Intrinsics::kIntegerReverse || + inst->GetIntrinsic() == Intrinsics::kLongReverse); + + HInstruction* input = inst->InputAt(0); + if (!input->IsConstant()) { + return; + } + + // Integer and Long intrinsics have different return types. + if (inst->GetIntrinsic() == Intrinsics::kIntegerReverse) { + DCHECK(input->IsIntConstant()); + inst->ReplaceWith( + GetGraph()->GetIntConstant(ReverseBits32(input->AsIntConstant()->GetValue()))); + } else { + DCHECK(input->IsLongConstant()); + inst->ReplaceWith( + GetGraph()->GetLongConstant(ReverseBits64(input->AsLongConstant()->GetValue()))); + } + inst->GetBlock()->RemoveInstruction(inst); +} + +void HConstantFoldingVisitor::FoldReverseBytesIntrinsic(HInvoke* inst) { + DCHECK(inst->GetIntrinsic() == Intrinsics::kIntegerReverseBytes || + inst->GetIntrinsic() == Intrinsics::kLongReverseBytes || + inst->GetIntrinsic() == Intrinsics::kShortReverseBytes); + + HInstruction* input = inst->InputAt(0); + if (!input->IsConstant()) { + return; + } + + // Integer, Long, and Short intrinsics have different return types. + if (inst->GetIntrinsic() == Intrinsics::kIntegerReverseBytes) { + DCHECK(input->IsIntConstant()); + inst->ReplaceWith(GetGraph()->GetIntConstant(BSWAP(input->AsIntConstant()->GetValue()))); + } else if (inst->GetIntrinsic() == Intrinsics::kLongReverseBytes) { + DCHECK(input->IsLongConstant()); + inst->ReplaceWith(GetGraph()->GetLongConstant(BSWAP(input->AsLongConstant()->GetValue()))); + } else { + DCHECK(input->IsIntConstant()); + inst->ReplaceWith(GetGraph()->GetIntConstant( + BSWAP(dchecked_integral_cast<int16_t>(input->AsIntConstant()->GetValue())))); + } + inst->GetBlock()->RemoveInstruction(inst); +} + +void HConstantFoldingVisitor::FoldBitCountIntrinsic(HInvoke* inst) { + DCHECK(inst->GetIntrinsic() == Intrinsics::kIntegerBitCount || + inst->GetIntrinsic() == Intrinsics::kLongBitCount); + + HInstruction* input = inst->InputAt(0); + if (!input->IsConstant()) { + return; + } + + DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kIntegerBitCount, input->IsIntConstant()); + DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kLongBitCount, input->IsLongConstant()); + + // Note that both the Integer and Long intrinsics return an int as a result. + int result = inst->GetIntrinsic() == Intrinsics::kIntegerBitCount ? + POPCOUNT(input->AsIntConstant()->GetValue()) : + POPCOUNT(input->AsLongConstant()->GetValue()); + inst->ReplaceWith(GetGraph()->GetIntConstant(result)); + inst->GetBlock()->RemoveInstruction(inst); +} + +void HConstantFoldingVisitor::FoldDivideUnsignedIntrinsic(HInvoke* inst) { + DCHECK(inst->GetIntrinsic() == Intrinsics::kIntegerDivideUnsigned || + inst->GetIntrinsic() == Intrinsics::kLongDivideUnsigned); + + HInstruction* divisor = inst->InputAt(1); + if (!divisor->IsConstant()) { + return; + } + DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kIntegerDivideUnsigned, + divisor->IsIntConstant()); + DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kLongDivideUnsigned, + divisor->IsLongConstant()); + const bool is_int_intrinsic = inst->GetIntrinsic() == Intrinsics::kIntegerDivideUnsigned; + if ((is_int_intrinsic && divisor->AsIntConstant()->IsArithmeticZero()) || + (!is_int_intrinsic && divisor->AsLongConstant()->IsArithmeticZero())) { + // We will be throwing, don't constant fold. + inst->SetAlwaysThrows(true); + GetGraph()->SetHasAlwaysThrowingInvokes(true); + return; + } + + HInstruction* dividend = inst->InputAt(0); + if (!dividend->IsConstant()) { + return; + } + DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kIntegerDivideUnsigned, + dividend->IsIntConstant()); + DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kLongDivideUnsigned, + dividend->IsLongConstant()); + + if (is_int_intrinsic) { + uint32_t dividend_val = + dchecked_integral_cast<uint32_t>(dividend->AsIntConstant()->GetValueAsUint64()); + uint32_t divisor_val = + dchecked_integral_cast<uint32_t>(divisor->AsIntConstant()->GetValueAsUint64()); + inst->ReplaceWith(GetGraph()->GetIntConstant(static_cast<int32_t>(dividend_val / divisor_val))); + } else { + uint64_t dividend_val = dividend->AsLongConstant()->GetValueAsUint64(); + uint64_t divisor_val = divisor->AsLongConstant()->GetValueAsUint64(); + inst->ReplaceWith( + GetGraph()->GetLongConstant(static_cast<int64_t>(dividend_val / divisor_val))); + } + + inst->GetBlock()->RemoveInstruction(inst); +} + +void HConstantFoldingVisitor::FoldHighestOneBitIntrinsic(HInvoke* inst) { + DCHECK(inst->GetIntrinsic() == Intrinsics::kIntegerHighestOneBit || + inst->GetIntrinsic() == Intrinsics::kLongHighestOneBit); + + HInstruction* input = inst->InputAt(0); + if (!input->IsConstant()) { + return; + } + + // Integer and Long intrinsics have different return types. + if (inst->GetIntrinsic() == Intrinsics::kIntegerHighestOneBit) { + DCHECK(input->IsIntConstant()); + inst->ReplaceWith( + GetGraph()->GetIntConstant(HighestOneBitValue(input->AsIntConstant()->GetValue()))); + } else { + DCHECK(input->IsLongConstant()); + inst->ReplaceWith( + GetGraph()->GetLongConstant(HighestOneBitValue(input->AsLongConstant()->GetValue()))); + } + inst->GetBlock()->RemoveInstruction(inst); +} + +void HConstantFoldingVisitor::FoldLowestOneBitIntrinsic(HInvoke* inst) { + DCHECK(inst->GetIntrinsic() == Intrinsics::kIntegerLowestOneBit || + inst->GetIntrinsic() == Intrinsics::kLongLowestOneBit); + + HInstruction* input = inst->InputAt(0); + if (!input->IsConstant()) { + return; + } + + // Integer and Long intrinsics have different return types. + if (inst->GetIntrinsic() == Intrinsics::kIntegerLowestOneBit) { + DCHECK(input->IsIntConstant()); + inst->ReplaceWith( + GetGraph()->GetIntConstant(LowestOneBitValue(input->AsIntConstant()->GetValue()))); + } else { + DCHECK(input->IsLongConstant()); + inst->ReplaceWith( + GetGraph()->GetLongConstant(LowestOneBitValue(input->AsLongConstant()->GetValue()))); + } + inst->GetBlock()->RemoveInstruction(inst); +} + +void HConstantFoldingVisitor::FoldNumberOfLeadingZerosIntrinsic(HInvoke* inst) { + DCHECK(inst->GetIntrinsic() == Intrinsics::kIntegerNumberOfLeadingZeros || + inst->GetIntrinsic() == Intrinsics::kLongNumberOfLeadingZeros); + + HInstruction* input = inst->InputAt(0); + if (!input->IsConstant()) { + return; + } + + DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kIntegerNumberOfLeadingZeros, + input->IsIntConstant()); + DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kLongNumberOfLeadingZeros, + input->IsLongConstant()); + + // Note that both the Integer and Long intrinsics return an int as a result. + int result = input->IsIntConstant() ? JAVASTYLE_CLZ(input->AsIntConstant()->GetValue()) : + JAVASTYLE_CLZ(input->AsLongConstant()->GetValue()); + inst->ReplaceWith(GetGraph()->GetIntConstant(result)); + inst->GetBlock()->RemoveInstruction(inst); +} + +void HConstantFoldingVisitor::FoldNumberOfTrailingZerosIntrinsic(HInvoke* inst) { + DCHECK(inst->GetIntrinsic() == Intrinsics::kIntegerNumberOfTrailingZeros || + inst->GetIntrinsic() == Intrinsics::kLongNumberOfTrailingZeros); + + HInstruction* input = inst->InputAt(0); + if (!input->IsConstant()) { + return; + } + + DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kIntegerNumberOfTrailingZeros, + input->IsIntConstant()); + DCHECK_IMPLIES(inst->GetIntrinsic() == Intrinsics::kLongNumberOfTrailingZeros, + input->IsLongConstant()); + + // Note that both the Integer and Long intrinsics return an int as a result. + int result = input->IsIntConstant() ? JAVASTYLE_CTZ(input->AsIntConstant()->GetValue()) : + JAVASTYLE_CTZ(input->AsLongConstant()->GetValue()); + inst->ReplaceWith(GetGraph()->GetIntConstant(result)); + inst->GetBlock()->RemoveInstruction(inst); +} + void HConstantFoldingVisitor::VisitArrayLength(HArrayLength* inst) { HInstruction* input = inst->InputAt(0); if (input->IsLoadString()) { @@ -299,6 +622,25 @@ void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) { if (constant != nullptr) { inst->ReplaceWith(constant); inst->GetBlock()->RemoveInstruction(inst); + } else if (inst->InputAt(0)->IsSelect() && inst->InputAt(0)->HasOnlyOneNonEnvironmentUse()) { + // Try to replace the select's inputs in Select+TypeConversion. We can do this if both + // inputs to the select are constants, and this is the only use of the select. + HSelect* select = inst->InputAt(0)->AsSelect(); + HConstant* false_constant = inst->TryStaticEvaluation(select->GetFalseValue()); + if (false_constant == nullptr) { + return; + } + HConstant* true_constant = inst->TryStaticEvaluation(select->GetTrueValue()); + if (true_constant == nullptr) { + return; + } + DCHECK_EQ(select->InputAt(0), select->GetFalseValue()); + DCHECK_EQ(select->InputAt(1), select->GetTrueValue()); + select->ReplaceInput(false_constant, 0); + select->ReplaceInput(true_constant, 1); + select->UpdateType(); + inst->ReplaceWith(select); + inst->GetBlock()->RemoveInstruction(inst); } } @@ -583,7 +925,7 @@ void InstructionWithAbsorbingInputSimplifier::VisitRem(HRem* instruction) { block->RemoveInstruction(instruction); } - HConstant* cst_right = instruction->GetRight()->AsConstant(); + HConstant* cst_right = instruction->GetRight()->AsConstantOrNull(); if (((cst_right != nullptr) && (cst_right->IsOne() || cst_right->IsMinusOne())) || (instruction->GetLeft() == instruction->GetRight())) { diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index 741fd3f822..acdc8e6d3c 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -551,7 +551,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingOnSubtraction) { * * The intent of this test is to ensure that all constant expressions * are actually evaluated at compile-time, thanks to the reverse - * (forward) post-order traversal of the the dominator tree. + * (forward) post-order traversal of the dominator tree. * * 16-bit * offset diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc index d9b7652f32..48635cfd15 100644 --- a/compiler/optimizing/constructor_fence_redundancy_elimination.cc +++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc @@ -78,7 +78,7 @@ class CFREVisitor final : public HGraphVisitor { VisitSetLocation(instruction, value); } - void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) override { + void VisitDeoptimize([[maybe_unused]] HDeoptimize* instruction) override { // Pessimize: Merge all fences. MergeCandidateFences(); } @@ -151,7 +151,7 @@ class CFREVisitor final : public HGraphVisitor { } } - void VisitSetLocation(HInstruction* inst ATTRIBUTE_UNUSED, HInstruction* store_input) { + void VisitSetLocation([[maybe_unused]] HInstruction* inst, HInstruction* store_input) { // An object is considered "published" if it's stored onto the heap. // Sidenote: A later "LSE" pass can still remove the fence if it proves the // object doesn't actually escape. diff --git a/compiler/optimizing/critical_native_abi_fixup_arm.cc b/compiler/optimizing/critical_native_abi_fixup_arm.cc index 77e156608b..4b1dec05b5 100644 --- a/compiler/optimizing/critical_native_abi_fixup_arm.cc +++ b/compiler/optimizing/critical_native_abi_fixup_arm.cc @@ -16,12 +16,8 @@ #include "critical_native_abi_fixup_arm.h" -#include "art_method-inl.h" #include "intrinsics.h" -#include "jni/jni_internal.h" #include "nodes.h" -#include "scoped_thread_state_change-inl.h" -#include "well_known_classes.h" namespace art HIDDEN { namespace arm { @@ -43,46 +39,7 @@ static void FixUpArguments(HInvokeStaticOrDirect* invoke) { break; // Remaining arguments are passed on stack. } if (DataType::IsFloatingPointType(input_type)) { - bool is_double = (input_type == DataType::Type::kFloat64); - DataType::Type converted_type = is_double ? DataType::Type::kInt64 : DataType::Type::kInt32; - ArtMethod* resolved_method = is_double - ? WellKnownClasses::java_lang_Double_doubleToRawLongBits - : WellKnownClasses::java_lang_Float_floatToRawIntBits; - DCHECK(resolved_method != nullptr); - DCHECK(resolved_method->IsIntrinsic()); - MethodReference target_method(nullptr, 0); - { - ScopedObjectAccess soa(Thread::Current()); - target_method = - MethodReference(resolved_method->GetDexFile(), resolved_method->GetDexMethodIndex()); - } - // Use arbitrary dispatch info that does not require the method argument. - HInvokeStaticOrDirect::DispatchInfo dispatch_info = { - MethodLoadKind::kBssEntry, - CodePtrLocation::kCallArtMethod, - /*method_load_data=*/ 0u - }; - HBasicBlock* block = invoke->GetBlock(); - ArenaAllocator* allocator = block->GetGraph()->GetAllocator(); - HInvokeStaticOrDirect* new_input = new (allocator) HInvokeStaticOrDirect( - allocator, - /*number_of_arguments=*/ 1u, - converted_type, - invoke->GetDexPc(), - /*method_reference=*/ MethodReference(nullptr, dex::kDexNoIndex), - resolved_method, - dispatch_info, - kStatic, - target_method, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, - !block->GetGraph()->IsDebuggable()); - // The intrinsic has no side effects and does not need environment or dex cache on ARM. - new_input->SetSideEffects(SideEffects::None()); - IntrinsicOptimizations opt(new_input); - opt.SetDoesNotNeedEnvironment(); - new_input->SetRawInputAt(0u, input); - block->InsertInstructionBefore(new_input, invoke); - invoke->ReplaceInput(new_input, i); + InsertFpToIntegralIntrinsic(invoke, i); } reg = next_reg; } diff --git a/compiler/optimizing/critical_native_abi_fixup_riscv64.cc b/compiler/optimizing/critical_native_abi_fixup_riscv64.cc new file mode 100644 index 0000000000..c2c98d1df9 --- /dev/null +++ b/compiler/optimizing/critical_native_abi_fixup_riscv64.cc @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "critical_native_abi_fixup_riscv64.h" + +#include "arch/riscv64/jni_frame_riscv64.h" +#include "intrinsics.h" +#include "nodes.h" + +namespace art HIDDEN { +namespace riscv64 { + +// Fix up FP arguments passed in core registers for call to @CriticalNative by inserting fake calls +// to Float.floatToRawIntBits() or Double.doubleToRawLongBits() to satisfy type consistency checks. +static void FixUpArguments(HInvokeStaticOrDirect* invoke) { + DCHECK_EQ(invoke->GetCodePtrLocation(), CodePtrLocation::kCallCriticalNative); + size_t core_reg = 0u; + size_t fp_reg = 0u; + for (size_t i = 0, num_args = invoke->GetNumberOfArguments(); i != num_args; ++i) { + if (core_reg == kMaxIntLikeArgumentRegisters) { + break; // Remaining arguments are passed in FP regs or on the stack. + } + HInstruction* input = invoke->InputAt(i); + DataType::Type input_type = input->GetType(); + if (DataType::IsFloatingPointType(input_type)) { + if (fp_reg < kMaxFloatOrDoubleArgumentRegisters) { + ++fp_reg; + } else { + DCHECK_LT(core_reg, kMaxIntLikeArgumentRegisters); + InsertFpToIntegralIntrinsic(invoke, i); + ++core_reg; + } + } else { + ++core_reg; + } + } +} + +bool CriticalNativeAbiFixupRiscv64::Run() { + if (!graph_->HasDirectCriticalNativeCall()) { + return false; + } + + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (instruction->IsInvokeStaticOrDirect() && + instruction->AsInvokeStaticOrDirect()->GetCodePtrLocation() == + CodePtrLocation::kCallCriticalNative) { + FixUpArguments(instruction->AsInvokeStaticOrDirect()); + } + } + } + return true; +} + +} // namespace riscv64 +} // namespace art diff --git a/compiler/optimizing/critical_native_abi_fixup_riscv64.h b/compiler/optimizing/critical_native_abi_fixup_riscv64.h new file mode 100644 index 0000000000..dc76cff2b8 --- /dev/null +++ b/compiler/optimizing/critical_native_abi_fixup_riscv64.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CRITICAL_NATIVE_ABI_FIXUP_RISCV64_H_ +#define ART_COMPILER_OPTIMIZING_CRITICAL_NATIVE_ABI_FIXUP_RISCV64_H_ + +#include "base/macros.h" +#include "nodes.h" +#include "optimization.h" + +namespace art HIDDEN { +namespace riscv64 { + +class CriticalNativeAbiFixupRiscv64 : public HOptimization { + public: + CriticalNativeAbiFixupRiscv64(HGraph* graph, OptimizingCompilerStats* stats) + : HOptimization(graph, kCriticalNativeAbiFixupRiscv64PassName, stats) {} + + static constexpr const char* kCriticalNativeAbiFixupRiscv64PassName = + "critical_native_abi_fixup_riscv64"; + + bool Run() override; +}; + +} // namespace riscv64 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_CRITICAL_NATIVE_ABI_FIXUP_RISCV64_H_ diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index cf49e39849..5b420db5be 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -24,6 +24,7 @@ #include "base/scoped_arena_containers.h" #include "base/stl_util.h" #include "optimizing/nodes.h" +#include "optimizing/nodes_vector.h" #include "ssa_phi_elimination.h" namespace art HIDDEN { @@ -311,9 +312,7 @@ bool HDeadCodeElimination::SimplifyAlwaysThrows() { // We need to re-analyze the graph in order to run DCE afterwards. if (rerun_dominance_and_loop_analysis) { - graph_->ClearLoopInformation(); - graph_->ClearDominanceInformation(); - graph_->BuildDominatorTree(); + graph_->RecomputeDominatorTree(); return true; } return false; @@ -437,9 +436,7 @@ bool HDeadCodeElimination::SimplifyIfs() { // We need to re-analyze the graph in order to run DCE afterwards. if (simplified_one_or_more_ifs) { if (rerun_dominance_and_loop_analysis) { - graph_->ClearLoopInformation(); - graph_->ClearDominanceInformation(); - graph_->BuildDominatorTree(); + graph_->RecomputeDominatorTree(); } else { graph_->ClearDominanceInformation(); // We have introduced critical edges, remove them. @@ -773,6 +770,93 @@ bool HDeadCodeElimination::RemoveUnneededTries() { } } +bool HDeadCodeElimination::RemoveEmptyIfs() { + bool did_opt = false; + for (HBasicBlock* block : graph_->GetPostOrder()) { + if (!block->EndsWithIf()) { + continue; + } + + HIf* if_instr = block->GetLastInstruction()->AsIf(); + HBasicBlock* true_block = if_instr->IfTrueSuccessor(); + HBasicBlock* false_block = if_instr->IfFalseSuccessor(); + + // We can use `visited_blocks` to detect cases like + // 1 + // / \ + // 2 3 + // \ / + // 4 ... + // | / + // 5 + // where 2, 3, and 4 are single HGoto blocks, and block 5 has Phis. + ScopedArenaAllocator allocator(graph_->GetArenaStack()); + ScopedArenaHashSet<HBasicBlock*> visited_blocks(allocator.Adapter(kArenaAllocDCE)); + HBasicBlock* merge_true = true_block; + visited_blocks.insert(merge_true); + while (merge_true->IsSingleGoto()) { + merge_true = merge_true->GetSuccessors()[0]; + visited_blocks.insert(merge_true); + } + + HBasicBlock* merge_false = false_block; + while (visited_blocks.find(merge_false) == visited_blocks.end() && + merge_false->IsSingleGoto()) { + merge_false = merge_false->GetSuccessors()[0]; + } + + if (visited_blocks.find(merge_false) == visited_blocks.end() || + !merge_false->GetPhis().IsEmpty()) { + // TODO(solanes): We could allow Phis iff both branches have the same value for all Phis. This + // may not be covered by SsaRedundantPhiElimination in cases like `HPhi[A,A,B]` where the Phi + // itself is not redundant for the general case but it is for a pair of branches. + continue; + } + + // Data structures to help remove now-dead instructions. + ScopedArenaQueue<HInstruction*> maybe_remove(allocator.Adapter(kArenaAllocDCE)); + ScopedArenaHashSet<HInstruction*> visited(allocator.Adapter(kArenaAllocDCE)); + maybe_remove.push(if_instr->InputAt(0)); + + // Swap HIf with HGoto + block->ReplaceAndRemoveInstructionWith( + if_instr, new (graph_->GetAllocator()) HGoto(if_instr->GetDexPc())); + + // Reconnect blocks + block->RemoveSuccessor(true_block); + block->RemoveSuccessor(false_block); + true_block->RemovePredecessor(block); + false_block->RemovePredecessor(block); + block->AddSuccessor(merge_false); + + // Remove now dead instructions e.g. comparisons that are only used as input to the if + // instruction. This can allow for further removal of other empty ifs. + while (!maybe_remove.empty()) { + HInstruction* instr = maybe_remove.front(); + maybe_remove.pop(); + if (visited.find(instr) != visited.end()) { + continue; + } + visited.insert(instr); + if (instr->IsDeadAndRemovable()) { + for (HInstruction* input : instr->GetInputs()) { + maybe_remove.push(input); + } + instr->GetBlock()->RemoveInstructionOrPhi(instr); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedDeadInstruction); + } + } + + did_opt = true; + } + + if (did_opt) { + graph_->RecomputeDominatorTree(); + } + + return did_opt; +} + bool HDeadCodeElimination::RemoveDeadBlocks(bool force_recomputation, bool force_loop_recomputation) { DCHECK_IMPLIES(force_loop_recomputation, force_recomputation); @@ -807,9 +891,7 @@ bool HDeadCodeElimination::RemoveDeadBlocks(bool force_recomputation, // dominator tree and try block membership. if (removed_one_or_more_blocks || force_recomputation) { if (rerun_dominance_and_loop_analysis || force_loop_recomputation) { - graph_->ClearLoopInformation(); - graph_->ClearDominanceInformation(); - graph_->BuildDominatorTree(); + graph_->RecomputeDominatorTree(); } else { graph_->ClearDominanceInformation(); graph_->ComputeDominanceInformation(); @@ -837,12 +919,23 @@ void HDeadCodeElimination::RemoveDeadInstructions() { MaybeRecordStat(stats_, MethodCompilationStat::kRemovedDeadInstruction); } } + + // Same for Phis. + for (HBackwardInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + DCHECK(phi_it.Current()->IsPhi()); + HPhi* phi = phi_it.Current()->AsPhi(); + if (phi->IsDeadAndRemovable()) { + block->RemovePhi(phi); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedDeadPhi); + } + } } } void HDeadCodeElimination::UpdateGraphFlags() { bool has_monitor_operations = false; - bool has_simd = false; + bool has_traditional_simd = false; + bool has_predicated_simd = false; bool has_bounds_checks = false; bool has_always_throwing_invokes = false; @@ -852,7 +945,12 @@ void HDeadCodeElimination::UpdateGraphFlags() { if (instruction->IsMonitorOperation()) { has_monitor_operations = true; } else if (instruction->IsVecOperation()) { - has_simd = true; + HVecOperation* vec_instruction = instruction->AsVecOperation(); + if (vec_instruction->IsPredicated()) { + has_predicated_simd = true; + } else { + has_traditional_simd = true; + } } else if (instruction->IsBoundsCheck()) { has_bounds_checks = true; } else if (instruction->IsInvoke() && instruction->AsInvoke()->AlwaysThrows()) { @@ -862,7 +960,8 @@ void HDeadCodeElimination::UpdateGraphFlags() { } graph_->SetHasMonitorOperations(has_monitor_operations); - graph_->SetHasSIMD(has_simd); + graph_->SetHasTraditionalSIMD(has_traditional_simd); + graph_->SetHasPredicatedSIMD(has_predicated_simd); graph_->SetHasBoundsChecks(has_bounds_checks); graph_->SetHasAlwaysThrowingInvokes(has_always_throwing_invokes); } @@ -877,6 +976,7 @@ bool HDeadCodeElimination::Run() { bool did_any_simplification = false; did_any_simplification |= SimplifyAlwaysThrows(); did_any_simplification |= SimplifyIfs(); + did_any_simplification |= RemoveEmptyIfs(); did_any_simplification |= RemoveDeadBlocks(); // We call RemoveDeadBlocks before RemoveUnneededTries to remove the dead blocks from the // previous optimizations. Otherwise, we might detect that a try has throwing instructions but diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index ddd01f7103..789962f93c 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -40,6 +40,17 @@ class HDeadCodeElimination : public HOptimization { private: void MaybeRecordDeadBlock(HBasicBlock* block); void MaybeRecordSimplifyIf(); + // Detects and remove ifs that are empty e.g. it turns + // 1 + // / \ + // 2 3 + // \ / + // 4 + // where 2 and 3 are single goto blocks and 4 doesn't contain a Phi into: + // 1 + // | + // 4 + bool RemoveEmptyIfs(); // If `force_recomputation` is true, we will recompute the dominance information even when we // didn't delete any blocks. `force_loop_recomputation` is similar but it also forces the loop // information recomputation. diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index b789434add..4082ec58fc 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -99,8 +99,9 @@ TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) { // Expected difference after dead code elimination. diff_t expected_diff = { - { " 3: IntConstant [9, 8, 5]\n", " 3: IntConstant [8, 5]\n" }, - { " 8: Phi(4, 3) [9]\n", " 8: Phi(4, 3)\n" }, + { " 3: IntConstant [9, 8, 5]\n", " 3: IntConstant [5]\n" }, + { " 4: IntConstant [8, 5]\n", " 4: IntConstant [5]\n" }, + { " 8: Phi(4, 3) [9]\n", removed }, { " 9: Add(8, 3)\n", removed } }; std::string expected_after = Patch(expected_before, expected_diff); @@ -114,7 +115,7 @@ TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) { * * The intent of this test is to ensure that all dead instructions are * actually pruned at compile-time, thanks to the (backward) - * post-order traversal of the the dominator tree. + * post-order traversal of the dominator tree. * * 16-bit * offset diff --git a/compiler/optimizing/execution_subgraph.cc b/compiler/optimizing/execution_subgraph.cc deleted file mode 100644 index 06aabbe040..0000000000 --- a/compiler/optimizing/execution_subgraph.cc +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Copyright (C) 2020 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "execution_subgraph.h" - -#include <algorithm> -#include <unordered_set> - -#include "android-base/macros.h" -#include "base/arena_allocator.h" -#include "base/arena_bit_vector.h" -#include "base/globals.h" -#include "base/scoped_arena_allocator.h" -#include "nodes.h" - -namespace art HIDDEN { - -ExecutionSubgraph::ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator) - : graph_(graph), - allocator_(allocator), - allowed_successors_(graph_->GetBlocks().size(), - ~(std::bitset<kMaxFilterableSuccessors> {}), - allocator_->Adapter(kArenaAllocLSA)), - unreachable_blocks_( - allocator_, graph_->GetBlocks().size(), /*expandable=*/ false, kArenaAllocLSA), - valid_(true), - needs_prune_(false), - finalized_(false) { - if (valid_) { - DCHECK(std::all_of(graph->GetBlocks().begin(), graph->GetBlocks().end(), [](HBasicBlock* it) { - return it == nullptr || it->GetSuccessors().size() <= kMaxFilterableSuccessors; - })); - } -} - -void ExecutionSubgraph::RemoveBlock(const HBasicBlock* to_remove) { - if (!valid_) { - return; - } - uint32_t id = to_remove->GetBlockId(); - if (unreachable_blocks_.IsBitSet(id)) { - if (kIsDebugBuild) { - // This isn't really needed but it's good to have this so it functions as - // a DCHECK that we always call Prune after removing any block. - needs_prune_ = true; - } - return; - } - unreachable_blocks_.SetBit(id); - for (HBasicBlock* pred : to_remove->GetPredecessors()) { - std::bitset<kMaxFilterableSuccessors> allowed_successors {}; - // ZipCount iterates over both the successors and the index of them at the same time. - for (auto [succ, i] : ZipCount(MakeIterationRange(pred->GetSuccessors()))) { - if (succ != to_remove) { - allowed_successors.set(i); - } - } - LimitBlockSuccessors(pred, allowed_successors); - } -} - -// Removes sink nodes. -void ExecutionSubgraph::Prune() { - if (UNLIKELY(!valid_)) { - return; - } - needs_prune_ = false; - // This is the record of the edges that were both (1) explored and (2) reached - // the exit node. - { - // Allocator for temporary values. - ScopedArenaAllocator temporaries(graph_->GetArenaStack()); - ScopedArenaVector<std::bitset<kMaxFilterableSuccessors>> results( - graph_->GetBlocks().size(), temporaries.Adapter(kArenaAllocLSA)); - unreachable_blocks_.ClearAllBits(); - // Fills up the 'results' map with what we need to add to update - // allowed_successors in order to prune sink nodes. - bool start_reaches_end = false; - // This is basically a DFS of the graph with some edges skipped. - { - const size_t num_blocks = graph_->GetBlocks().size(); - constexpr ssize_t kUnvisitedSuccIdx = -1; - ArenaBitVector visiting(&temporaries, num_blocks, false, kArenaAllocLSA); - // How many of the successors of each block we have already examined. This - // has three states. - // (1) kUnvisitedSuccIdx: we have not examined any edges, - // (2) 0 <= val < # of successors: we have examined 'val' successors/are - // currently examining successors_[val], - // (3) kMaxFilterableSuccessors: We have examined all of the successors of - // the block (the 'result' is final). - ScopedArenaVector<ssize_t> last_succ_seen( - num_blocks, kUnvisitedSuccIdx, temporaries.Adapter(kArenaAllocLSA)); - // A stack of which blocks we are visiting in this DFS traversal. Does not - // include the current-block. Used with last_succ_seen to figure out which - // bits to set if we find a path to the end/loop. - ScopedArenaVector<uint32_t> current_path(temporaries.Adapter(kArenaAllocLSA)); - // Just ensure we have enough space. The allocator will be cleared shortly - // anyway so this is fast. - current_path.reserve(num_blocks); - // Current block we are examining. Modified only by 'push_block' and 'pop_block' - const HBasicBlock* cur_block = graph_->GetEntryBlock(); - // Used to note a recur where we will start iterating on 'blk' and save - // where we are. We must 'continue' immediately after this. - auto push_block = [&](const HBasicBlock* blk) { - DCHECK(std::find(current_path.cbegin(), current_path.cend(), cur_block->GetBlockId()) == - current_path.end()); - if (kIsDebugBuild) { - std::for_each(current_path.cbegin(), current_path.cend(), [&](auto id) { - DCHECK_GT(last_succ_seen[id], kUnvisitedSuccIdx) << id; - DCHECK_LT(last_succ_seen[id], static_cast<ssize_t>(kMaxFilterableSuccessors)) << id; - }); - } - current_path.push_back(cur_block->GetBlockId()); - visiting.SetBit(cur_block->GetBlockId()); - cur_block = blk; - }; - // Used to note that we have fully explored a block and should return back - // up. Sets cur_block appropriately. We must 'continue' immediately after - // calling this. - auto pop_block = [&]() { - if (UNLIKELY(current_path.empty())) { - // Should only happen if entry-blocks successors are exhausted. - DCHECK_GE(last_succ_seen[graph_->GetEntryBlock()->GetBlockId()], - static_cast<ssize_t>(graph_->GetEntryBlock()->GetSuccessors().size())); - cur_block = nullptr; - } else { - const HBasicBlock* last = graph_->GetBlocks()[current_path.back()]; - visiting.ClearBit(current_path.back()); - current_path.pop_back(); - cur_block = last; - } - }; - // Mark the current path as a path to the end. This is in contrast to paths - // that end in (eg) removed blocks. - auto propagate_true = [&]() { - for (uint32_t id : current_path) { - DCHECK_GT(last_succ_seen[id], kUnvisitedSuccIdx); - DCHECK_LT(last_succ_seen[id], static_cast<ssize_t>(kMaxFilterableSuccessors)); - results[id].set(last_succ_seen[id]); - } - }; - ssize_t num_entry_succ = graph_->GetEntryBlock()->GetSuccessors().size(); - // As long as the entry-block has not explored all successors we still have - // work to do. - const uint32_t entry_block_id = graph_->GetEntryBlock()->GetBlockId(); - while (num_entry_succ > last_succ_seen[entry_block_id]) { - DCHECK(cur_block != nullptr); - uint32_t id = cur_block->GetBlockId(); - DCHECK((current_path.empty() && cur_block == graph_->GetEntryBlock()) || - current_path.front() == graph_->GetEntryBlock()->GetBlockId()) - << "current path size: " << current_path.size() - << " cur_block id: " << cur_block->GetBlockId() << " entry id " - << graph_->GetEntryBlock()->GetBlockId(); - if (visiting.IsBitSet(id)) { - // TODO We should support infinite loops as well. - start_reaches_end = false; - break; - } - std::bitset<kMaxFilterableSuccessors>& result = results[id]; - if (cur_block == graph_->GetExitBlock()) { - start_reaches_end = true; - propagate_true(); - pop_block(); - continue; - } else if (last_succ_seen[id] == kMaxFilterableSuccessors) { - // Already fully explored. - if (result.any()) { - propagate_true(); - } - pop_block(); - continue; - } - // NB This is a pointer. Modifications modify the last_succ_seen. - ssize_t* cur_succ = &last_succ_seen[id]; - std::bitset<kMaxFilterableSuccessors> succ_bitmap = GetAllowedSuccessors(cur_block); - // Get next successor allowed. - while (++(*cur_succ) < static_cast<ssize_t>(kMaxFilterableSuccessors) && - !succ_bitmap.test(*cur_succ)) { - DCHECK_GE(*cur_succ, 0); - } - if (*cur_succ >= static_cast<ssize_t>(cur_block->GetSuccessors().size())) { - // No more successors. Mark that we've checked everything. Later visits - // to this node can use the existing data. - DCHECK_LE(*cur_succ, static_cast<ssize_t>(kMaxFilterableSuccessors)); - *cur_succ = kMaxFilterableSuccessors; - pop_block(); - continue; - } - const HBasicBlock* nxt = cur_block->GetSuccessors()[*cur_succ]; - DCHECK(nxt != nullptr) << "id: " << *cur_succ - << " max: " << cur_block->GetSuccessors().size(); - if (visiting.IsBitSet(nxt->GetBlockId())) { - // This is a loop. Mark it and continue on. Mark allowed-successor on - // this block's results as well. - result.set(*cur_succ); - propagate_true(); - } else { - // Not a loop yet. Recur. - push_block(nxt); - } - } - } - // If we can't reach the end then there is no path through the graph without - // hitting excluded blocks - if (UNLIKELY(!start_reaches_end)) { - valid_ = false; - return; - } - // Mark blocks we didn't see in the ReachesEnd flood-fill - for (const HBasicBlock* blk : graph_->GetBlocks()) { - if (blk != nullptr && - results[blk->GetBlockId()].none() && - blk != graph_->GetExitBlock() && - blk != graph_->GetEntryBlock()) { - // We never visited this block, must be unreachable. - unreachable_blocks_.SetBit(blk->GetBlockId()); - } - } - // write the new data. - memcpy(allowed_successors_.data(), - results.data(), - results.size() * sizeof(std::bitset<kMaxFilterableSuccessors>)); - } - RecalculateExcludedCohort(); -} - -void ExecutionSubgraph::RemoveConcavity() { - if (UNLIKELY(!valid_)) { - return; - } - DCHECK(!needs_prune_); - for (const HBasicBlock* blk : graph_->GetBlocks()) { - if (blk == nullptr || unreachable_blocks_.IsBitSet(blk->GetBlockId())) { - continue; - } - uint32_t blkid = blk->GetBlockId(); - if (std::any_of(unreachable_blocks_.Indexes().begin(), - unreachable_blocks_.Indexes().end(), - [&](uint32_t skipped) { return graph_->PathBetween(skipped, blkid); }) && - std::any_of(unreachable_blocks_.Indexes().begin(), - unreachable_blocks_.Indexes().end(), - [&](uint32_t skipped) { return graph_->PathBetween(blkid, skipped); })) { - RemoveBlock(blk); - } - } - Prune(); -} - -void ExecutionSubgraph::RecalculateExcludedCohort() { - DCHECK(!needs_prune_); - excluded_list_.emplace(allocator_->Adapter(kArenaAllocLSA)); - ScopedArenaVector<ExcludedCohort>& res = excluded_list_.value(); - // Make a copy of unreachable_blocks_; - ArenaBitVector unreachable(allocator_, graph_->GetBlocks().size(), false, kArenaAllocLSA); - unreachable.Copy(&unreachable_blocks_); - // Split cohorts with union-find - while (unreachable.IsAnyBitSet()) { - res.emplace_back(allocator_, graph_); - ExcludedCohort& cohort = res.back(); - // We don't allocate except for the queue beyond here so create another arena to save memory. - ScopedArenaAllocator alloc(graph_->GetArenaStack()); - ScopedArenaQueue<const HBasicBlock*> worklist(alloc.Adapter(kArenaAllocLSA)); - // Select an arbitrary node - const HBasicBlock* first = graph_->GetBlocks()[unreachable.GetHighestBitSet()]; - worklist.push(first); - do { - // Flood-fill both forwards and backwards. - const HBasicBlock* cur = worklist.front(); - worklist.pop(); - if (!unreachable.IsBitSet(cur->GetBlockId())) { - // Already visited or reachable somewhere else. - continue; - } - unreachable.ClearBit(cur->GetBlockId()); - cohort.blocks_.SetBit(cur->GetBlockId()); - // don't bother filtering here, it's done next go-around - for (const HBasicBlock* pred : cur->GetPredecessors()) { - worklist.push(pred); - } - for (const HBasicBlock* succ : cur->GetSuccessors()) { - worklist.push(succ); - } - } while (!worklist.empty()); - } - // Figure out entry & exit nodes. - for (ExcludedCohort& cohort : res) { - DCHECK(cohort.blocks_.IsAnyBitSet()); - auto is_external = [&](const HBasicBlock* ext) -> bool { - return !cohort.blocks_.IsBitSet(ext->GetBlockId()); - }; - for (const HBasicBlock* blk : cohort.Blocks()) { - const auto& preds = blk->GetPredecessors(); - const auto& succs = blk->GetSuccessors(); - if (std::any_of(preds.cbegin(), preds.cend(), is_external)) { - cohort.entry_blocks_.SetBit(blk->GetBlockId()); - } - if (std::any_of(succs.cbegin(), succs.cend(), is_external)) { - cohort.exit_blocks_.SetBit(blk->GetBlockId()); - } - } - } -} - -std::ostream& operator<<(std::ostream& os, const ExecutionSubgraph::ExcludedCohort& ex) { - ex.Dump(os); - return os; -} - -void ExecutionSubgraph::ExcludedCohort::Dump(std::ostream& os) const { - auto dump = [&](BitVecBlockRange arr) { - os << "["; - bool first = true; - for (const HBasicBlock* b : arr) { - if (!first) { - os << ", "; - } - first = false; - os << b->GetBlockId(); - } - os << "]"; - }; - auto dump_blocks = [&]() { - os << "["; - bool first = true; - for (const HBasicBlock* b : Blocks()) { - if (!entry_blocks_.IsBitSet(b->GetBlockId()) && !exit_blocks_.IsBitSet(b->GetBlockId())) { - if (!first) { - os << ", "; - } - first = false; - os << b->GetBlockId(); - } - } - os << "]"; - }; - - os << "{ entry: "; - dump(EntryBlocks()); - os << ", interior: "; - dump_blocks(); - os << ", exit: "; - dump(ExitBlocks()); - os << "}"; -} - -} // namespace art diff --git a/compiler/optimizing/execution_subgraph.h b/compiler/optimizing/execution_subgraph.h deleted file mode 100644 index 5ddf17de60..0000000000 --- a/compiler/optimizing/execution_subgraph.h +++ /dev/null @@ -1,365 +0,0 @@ -/* - * Copyright (C) 2020 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ -#define ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ - -#include <algorithm> -#include <sstream> - -#include "base/arena_allocator.h" -#include "base/arena_bit_vector.h" -#include "base/arena_containers.h" -#include "base/array_ref.h" -#include "base/bit_vector-inl.h" -#include "base/globals.h" -#include "base/iteration_range.h" -#include "base/macros.h" -#include "base/mutex.h" -#include "base/scoped_arena_allocator.h" -#include "base/scoped_arena_containers.h" -#include "base/stl_util.h" -#include "base/transform_iterator.h" -#include "nodes.h" - -namespace art HIDDEN { - -// Helper for transforming blocks to block_ids. -class BlockToBlockIdTransformer { - public: - BlockToBlockIdTransformer(BlockToBlockIdTransformer&&) = default; - BlockToBlockIdTransformer(const BlockToBlockIdTransformer&) = default; - BlockToBlockIdTransformer() {} - - inline uint32_t operator()(const HBasicBlock* b) const { - return b->GetBlockId(); - } -}; - -// Helper for transforming block ids to blocks. -class BlockIdToBlockTransformer { - public: - BlockIdToBlockTransformer(BlockIdToBlockTransformer&&) = default; - BlockIdToBlockTransformer(const BlockIdToBlockTransformer&) = default; - explicit BlockIdToBlockTransformer(const HGraph* graph) : graph_(graph) {} - - inline const HGraph* GetGraph() const { - return graph_; - } - - inline HBasicBlock* GetBlock(uint32_t id) const { - DCHECK_LT(id, graph_->GetBlocks().size()) << graph_->PrettyMethod(); - HBasicBlock* blk = graph_->GetBlocks()[id]; - DCHECK(blk != nullptr); - return blk; - } - - inline HBasicBlock* operator()(uint32_t id) const { - return GetBlock(id); - } - - private: - const HGraph* const graph_; -}; - -class BlockIdFilterThunk { - public: - explicit BlockIdFilterThunk(const BitVector& i) : inner_(i) {} - BlockIdFilterThunk(BlockIdFilterThunk&& other) noexcept = default; - BlockIdFilterThunk(const BlockIdFilterThunk&) = default; - - bool operator()(const HBasicBlock* b) const { - return inner_.IsBitSet(b->GetBlockId()); - } - - private: - const BitVector& inner_; -}; - -// A representation of a particular section of the graph. The graph is split -// into an excluded and included area and is used to track escapes. -// -// This object is a view of the graph and is not updated as the graph is -// changed. -// -// This is implemented by removing various escape points from the subgraph using -// the 'RemoveBlock' function. Once all required blocks are removed one will -// 'Finalize' the subgraph. This will extend the removed area to include: -// (1) Any block which inevitably leads to (post-dominates) a removed block -// (2) any block which is between 2 removed blocks -// -// This allows us to create a set of 'ExcludedCohorts' which are the -// well-connected subsets of the graph made up of removed blocks. These cohorts -// have a set of entry and exit blocks which act as the boundary of the cohort. -// Since we removed blocks between 2 excluded blocks it is impossible for any -// cohort-exit block to reach any cohort-entry block. This means we can use the -// boundary between the cohort and the rest of the graph to insert -// materialization blocks for partial LSE. -// -// TODO We really should expand this to take into account where the object -// allocation takes place directly. Currently we always act as though it were -// allocated in the entry block. This is a massively simplifying assumption but -// means we can't partially remove objects that are repeatedly allocated in a -// loop. -class ExecutionSubgraph : public DeletableArenaObject<kArenaAllocLSA> { - public: - using BitVecBlockRange = - IterationRange<TransformIterator<BitVector::IndexIterator, BlockIdToBlockTransformer>>; - using FilteredBitVecBlockRange = IterationRange< - FilterIterator<ArenaVector<HBasicBlock*>::const_iterator, BlockIdFilterThunk>>; - - // A set of connected blocks which are connected and removed from the - // ExecutionSubgraph. See above comment for explanation. - class ExcludedCohort : public ArenaObject<kArenaAllocLSA> { - public: - ExcludedCohort(ExcludedCohort&&) = default; - ExcludedCohort(const ExcludedCohort&) = delete; - explicit ExcludedCohort(ScopedArenaAllocator* allocator, HGraph* graph) - : graph_(graph), - entry_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA), - exit_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA), - blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA) {} - - ~ExcludedCohort() = default; - - // All blocks in the cohort. - BitVecBlockRange Blocks() const { - return BlockIterRange(blocks_); - } - - // Blocks that have predecessors outside of the cohort. These blocks will - // need to have PHIs/control-flow added to create the escaping value. - BitVecBlockRange EntryBlocks() const { - return BlockIterRange(entry_blocks_); - } - - FilteredBitVecBlockRange EntryBlocksReversePostOrder() const { - return Filter(MakeIterationRange(graph_->GetReversePostOrder()), - BlockIdFilterThunk(entry_blocks_)); - } - - bool IsEntryBlock(const HBasicBlock* blk) const { - return entry_blocks_.IsBitSet(blk->GetBlockId()); - } - - // Blocks that have successors outside of the cohort. The successors of - // these blocks will need to have PHI's to restore state. - BitVecBlockRange ExitBlocks() const { - return BlockIterRange(exit_blocks_); - } - - bool operator==(const ExcludedCohort& other) const { - return blocks_.Equal(&other.blocks_); - } - - bool ContainsBlock(const HBasicBlock* blk) const { - return blocks_.IsBitSet(blk->GetBlockId()); - } - - // Returns true if there is a path from 'blk' to any block in this cohort. - // NB blocks contained within the cohort are not considered to be succeeded - // by the cohort (i.e. this function will return false). - bool SucceedsBlock(const HBasicBlock* blk) const { - if (ContainsBlock(blk)) { - return false; - } - auto idxs = entry_blocks_.Indexes(); - return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t entry) -> bool { - return blk->GetGraph()->PathBetween(blk->GetBlockId(), entry); - }); - } - - // Returns true if there is a path from any block in this cohort to 'blk'. - // NB blocks contained within the cohort are not considered to be preceded - // by the cohort (i.e. this function will return false). - bool PrecedesBlock(const HBasicBlock* blk) const { - if (ContainsBlock(blk)) { - return false; - } - auto idxs = exit_blocks_.Indexes(); - return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t exit) -> bool { - return blk->GetGraph()->PathBetween(exit, blk->GetBlockId()); - }); - } - - void Dump(std::ostream& os) const; - - private: - BitVecBlockRange BlockIterRange(const ArenaBitVector& bv) const { - auto indexes = bv.Indexes(); - BitVecBlockRange res = MakeTransformRange(indexes, BlockIdToBlockTransformer(graph_)); - return res; - } - - ExcludedCohort() = delete; - - HGraph* graph_; - ArenaBitVector entry_blocks_; - ArenaBitVector exit_blocks_; - ArenaBitVector blocks_; - - friend class ExecutionSubgraph; - friend class LoadStoreAnalysisTest; - }; - - // The number of successors we can track on a single block. Graphs which - // contain a block with a branching factor greater than this will not be - // analysed. This is used to both limit the memory usage of analysis to - // reasonable levels and ensure that the analysis will complete in a - // reasonable amount of time. It also simplifies the implementation somewhat - // to have a constant branching factor. - static constexpr uint32_t kMaxFilterableSuccessors = 8; - - // Instantiate a subgraph. The subgraph can be instantiated only if partial-escape - // analysis is desired (eg not when being used for instruction scheduling) and - // when the branching factor in the graph is not too high. These conditions - // are determined once and passed down for performance reasons. - ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator); - - void Invalidate() { - valid_ = false; - } - - // A block is contained by the ExecutionSubgraph if it is reachable. This - // means it has not been removed explicitly or via pruning/concavity removal. - // Finalization is needed to call this function. - // See RemoveConcavity and Prune for more information. - bool ContainsBlock(const HBasicBlock* blk) const { - DCHECK_IMPLIES(finalized_, !needs_prune_); - if (!valid_) { - return false; - } - return !unreachable_blocks_.IsBitSet(blk->GetBlockId()); - } - - // Mark the block as removed from the subgraph. - void RemoveBlock(const HBasicBlock* to_remove); - - // Called when no more updates will be done to the subgraph. Calculate the - // final subgraph - void Finalize() { - Prune(); - RemoveConcavity(); - finalized_ = true; - } - - BitVecBlockRange UnreachableBlocks() const { - auto idxs = unreachable_blocks_.Indexes(); - return MakeTransformRange(idxs, BlockIdToBlockTransformer(graph_)); - } - - // Returns true if all allowed execution paths from start eventually reach the - // graph's exit block (or diverge). - bool IsValid() const { - return valid_; - } - - ArrayRef<const ExcludedCohort> GetExcludedCohorts() const { - DCHECK_IMPLIES(valid_, !needs_prune_); - if (!valid_ || !unreachable_blocks_.IsAnyBitSet()) { - return ArrayRef<const ExcludedCohort>(); - } else { - return ArrayRef<const ExcludedCohort>(*excluded_list_); - } - } - - // Helper class to create reachable blocks iterator. - class ContainsFunctor { - public: - bool operator()(HBasicBlock* blk) const { - return subgraph_->ContainsBlock(blk); - } - - private: - explicit ContainsFunctor(const ExecutionSubgraph* subgraph) : subgraph_(subgraph) {} - const ExecutionSubgraph* const subgraph_; - friend class ExecutionSubgraph; - }; - // Returns an iterator over reachable blocks (filtered as we go). This is primarilly for testing. - IterationRange< - FilterIterator<typename ArenaVector<HBasicBlock*>::const_iterator, ContainsFunctor>> - ReachableBlocks() const { - return Filter(MakeIterationRange(graph_->GetBlocks()), ContainsFunctor(this)); - } - - static bool CanAnalyse(HGraph* graph) { - // If there are any blocks with more than kMaxFilterableSuccessors we can't - // analyse the graph. We avoid this case to prevent excessive memory and - // time usage while allowing a simpler algorithm with a fixed-width - // branching factor. - return std::all_of(graph->GetBlocks().begin(), graph->GetBlocks().end(), [](HBasicBlock* blk) { - return blk == nullptr || blk->GetSuccessors().size() <= kMaxFilterableSuccessors; - }); - } - - private: - std::bitset<kMaxFilterableSuccessors> GetAllowedSuccessors(const HBasicBlock* blk) const { - DCHECK(valid_); - return allowed_successors_[blk->GetBlockId()]; - } - - void LimitBlockSuccessors(const HBasicBlock* block, - std::bitset<kMaxFilterableSuccessors> allowed) { - needs_prune_ = true; - allowed_successors_[block->GetBlockId()] &= allowed; - } - - // Remove nodes which both precede and follow any exclusions. This ensures we don't need to deal - // with only conditionally materializing objects depending on if we already materialized them - // Ensure that for all blocks A, B, C: Unreachable(A) && Unreachable(C) && PathBetween(A, B) && - // PathBetween(A, C) implies Unreachable(B). This simplifies later transforms since it ensures - // that no execution can leave and then re-enter any exclusion. - void RemoveConcavity(); - - // Removes sink nodes. Sink nodes are nodes where there is no execution which - // avoids all removed nodes. - void Prune(); - - void RecalculateExcludedCohort(); - - HGraph* graph_; - ScopedArenaAllocator* allocator_; - // The map from block_id -> allowed-successors. - // This is the canonical representation of this subgraph. If a bit in the - // bitset is not set then the corresponding outgoing edge of that block is not - // considered traversable. - ScopedArenaVector<std::bitset<kMaxFilterableSuccessors>> allowed_successors_; - // Helper that holds which blocks we are able to reach. Only valid if - // 'needs_prune_ == false'. - ArenaBitVector unreachable_blocks_; - // A list of the excluded-cohorts of this subgraph. This is only valid when - // 'needs_prune_ == false' - std::optional<ScopedArenaVector<ExcludedCohort>> excluded_list_; - // Bool to hold if there is at least one known path from the start block to - // the end in this graph. Used to short-circuit computation. - bool valid_; - // True if the subgraph is consistent and can be queried. Modifying the - // subgraph clears this and requires a prune to restore. - bool needs_prune_; - // True if no more modification of the subgraph is permitted. - bool finalized_; - - friend class ExecutionSubgraphTest; - friend class LoadStoreAnalysisTest; - - DISALLOW_COPY_AND_ASSIGN(ExecutionSubgraph); -}; - -std::ostream& operator<<(std::ostream& os, const ExecutionSubgraph::ExcludedCohort& ex); - -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ diff --git a/compiler/optimizing/execution_subgraph_test.cc b/compiler/optimizing/execution_subgraph_test.cc deleted file mode 100644 index 921ef056ba..0000000000 --- a/compiler/optimizing/execution_subgraph_test.cc +++ /dev/null @@ -1,975 +0,0 @@ -/* - * Copyright (C) 2020 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "execution_subgraph_test.h" - -#include <array> -#include <sstream> -#include <string_view> -#include <unordered_map> -#include <unordered_set> - -#include "base/scoped_arena_allocator.h" -#include "base/stl_util.h" -#include "class_root.h" -#include "dex/dex_file_types.h" -#include "dex/method_reference.h" -#include "entrypoints/quick/quick_entrypoints_enum.h" -#include "execution_subgraph.h" -#include "gtest/gtest.h" -#include "handle.h" -#include "handle_scope.h" -#include "nodes.h" -#include "optimizing/data_type.h" -#include "optimizing_unit_test.h" -#include "scoped_thread_state_change.h" - -namespace art HIDDEN { - -using BlockSet = std::unordered_set<const HBasicBlock*>; - -// Helper that checks validity directly. -bool ExecutionSubgraphTestHelper::CalculateValidity(HGraph* graph, const ExecutionSubgraph* esg) { - bool reached_end = false; - std::queue<const HBasicBlock*> worklist; - std::unordered_set<const HBasicBlock*> visited; - worklist.push(graph->GetEntryBlock()); - while (!worklist.empty()) { - const HBasicBlock* cur = worklist.front(); - worklist.pop(); - if (visited.find(cur) != visited.end()) { - continue; - } else { - visited.insert(cur); - } - if (cur == graph->GetExitBlock()) { - reached_end = true; - continue; - } - bool has_succ = false; - for (const HBasicBlock* succ : cur->GetSuccessors()) { - DCHECK(succ != nullptr) << "Bad successors on block " << cur->GetBlockId(); - if (!esg->ContainsBlock(succ)) { - continue; - } - has_succ = true; - worklist.push(succ); - } - if (!has_succ) { - // We aren't at the end and have nowhere to go so fail. - return false; - } - } - return reached_end; -} - -class ExecutionSubgraphTest : public OptimizingUnitTest { - public: - ExecutionSubgraphTest() : graph_(CreateGraph()) {} - - AdjacencyListGraph SetupFromAdjacencyList(const std::string_view entry_name, - const std::string_view exit_name, - const std::vector<AdjacencyListGraph::Edge>& adj) { - return AdjacencyListGraph(graph_, GetAllocator(), entry_name, exit_name, adj); - } - - bool IsValidSubgraph(const ExecutionSubgraph* esg) { - return ExecutionSubgraphTestHelper::CalculateValidity(graph_, esg); - } - - bool IsValidSubgraph(const ExecutionSubgraph& esg) { - return ExecutionSubgraphTestHelper::CalculateValidity(graph_, &esg); - } - - HGraph* graph_; -}; - -// Some comparators used by these tests to avoid having to deal with various set types. -template <typename BLKS, typename = std::enable_if_t<!std::is_same_v<BlockSet, BLKS>>> -bool operator==(const BlockSet& bs, const BLKS& sas) { - std::unordered_set<const HBasicBlock*> us(sas.begin(), sas.end()); - return bs == us; -} -template <typename BLKS, typename = std::enable_if_t<!std::is_same_v<BlockSet, BLKS>>> -bool operator==(const BLKS& sas, const BlockSet& bs) { - return bs == sas; -} -template <typename BLKS, typename = std::enable_if_t<!std::is_same_v<BlockSet, BLKS>>> -bool operator!=(const BlockSet& bs, const BLKS& sas) { - return !(bs == sas); -} -template <typename BLKS, typename = std::enable_if_t<!std::is_same_v<BlockSet, BLKS>>> -bool operator!=(const BLKS& sas, const BlockSet& bs) { - return !(bs == sas); -} - -// +-------+ +-------+ -// | right | <-- | entry | -// +-------+ +-------+ -// | | -// | | -// | v -// | + - - - - - + -// | ' removed ' -// | ' ' -// | ' +-------+ ' -// | ' | left | ' -// | ' +-------+ ' -// | ' ' -// | + - - - - - + -// | | -// | | -// | v -// | +-------+ -// +---------> | exit | -// +-------+ -TEST_F(ExecutionSubgraphTest, Basic) { - AdjacencyListGraph blks(SetupFromAdjacencyList( - "entry", - "exit", - { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); - ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, GetScopedAllocator()); - esg.RemoveBlock(blks.Get("left")); - esg.Finalize(); - ASSERT_TRUE(esg.IsValid()); - ASSERT_TRUE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), - esg.ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 3u); - ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end()); - - ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); - esg.RemoveBlock(blks.Get("right")); - esg.Finalize(); - std::unordered_set<const HBasicBlock*> contents_2(esg.ReachableBlocks().begin(), - esg.ReachableBlocks().end()); - ASSERT_EQ(contents_2.size(), 0u); -} - -// +-------+ +-------+ -// | right | <-- | entry | -// +-------+ +-------+ -// | | -// | | -// | v -// | + - - - - - - - - - - - - - - - - - - - -+ -// | ' indirectly_removed ' -// | ' ' -// | ' +-------+ +-----+ ' -// | ' | l1 | -------------------> | l1r | ' -// | ' +-------+ +-----+ ' -// | ' | | ' -// | ' | | ' -// | ' v | ' -// | ' +-------+ | ' -// | ' | l1l | | ' -// | ' +-------+ | ' -// | ' | | ' -// | ' | | ' -// | ' | | ' -// + - - - - - - - -+ | +- - - | | ' -// ' ' | +- v | ' -// ' +-----+ | +----------------+ | ' -// ' | l2r | <---------+-------------- | l2 (removed) | <-------------+ ' -// ' +-----+ | +----------------+ ' -// ' | ' | +- | ' -// ' | - - -+ | +- - - | - - - - - - - - - - - - - -+ -// ' | ' | ' | ' -// ' | ' | ' | ' -// ' | ' | ' v ' -// ' | ' | ' +-------+ ' -// ' | ' | ' | l2l | ' -// ' | ' | ' +-------+ ' -// ' | ' | ' | ' -// ' | ' | ' | ' -// ' | ' | ' | ' -// ' | - - -+ | +- - - | ' -// ' | ' | +- v ' -// ' | | +-------+ ' -// ' +---------------+-------------> | l3 | ' -// ' | +-------+ ' -// ' ' | +- ' -// + - - - - - - - -+ | +- - - - - - - - - + -// | | -// | | -// | v -// | +-------+ -// +-----------> | exit | -// +-------+ -TEST_F(ExecutionSubgraphTest, Propagation) { - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - { { "entry", "l1" }, - { "l1", "l1l" }, - { "l1", "l1r" }, - { "l1l", "l2" }, - { "l1r", "l2" }, - { "l2", "l2l" }, - { "l2", "l2r" }, - { "l2l", "l3" }, - { "l2r", "l3" }, - { "l3", "exit" }, - { "entry", "right" }, - { "right", "exit" } })); - ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, GetScopedAllocator()); - esg.RemoveBlock(blks.Get("l2")); - esg.Finalize(); - ASSERT_TRUE(esg.IsValid()); - ASSERT_TRUE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), - esg.ReachableBlocks().end()); - - // ASSERT_EQ(contents.size(), 3u); - // Not present, no path through. - ASSERT_TRUE(contents.find(blks.Get("l1")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l2")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l3")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l1l")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l1r")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l2l")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l2r")) == contents.end()); - - // present, path through. - ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); -} - -// +------------------------------------+ -// | | -// | +-------+ +-------+ | -// | | right | <-- | entry | | -// | +-------+ +-------+ | -// | | | | -// | | | | -// | | v | -// | | +-------+ +--------+ -// +----+---------> | l1 | --> | l1loop | -// | +-------+ +--------+ -// | | -// | | -// | v -// | +- - - - - -+ -// | ' removed ' -// | ' ' -// | ' +-------+ ' -// | ' | l2 | ' -// | ' +-------+ ' -// | ' ' -// | +- - - - - -+ -// | | -// | | -// | v -// | +-------+ -// +---------> | exit | -// +-------+ -TEST_F(ExecutionSubgraphTest, PropagationLoop) { - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - { { "entry", "l1" }, - { "l1", "l2" }, - { "l1", "l1loop" }, - { "l1loop", "l1" }, - { "l2", "exit" }, - { "entry", "right" }, - { "right", "exit" } })); - ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, GetScopedAllocator()); - esg.RemoveBlock(blks.Get("l2")); - esg.Finalize(); - ASSERT_TRUE(esg.IsValid()); - ASSERT_TRUE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), - esg.ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 5u); - - // Not present, no path through. - ASSERT_TRUE(contents.find(blks.Get("l2")) == contents.end()); - - // present, path through. - // Since the loop can diverge we should leave it in the execution subgraph. - ASSERT_TRUE(contents.find(blks.Get("l1")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l1loop")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); -} - -// +--------------------------------+ -// | | -// | +-------+ +-------+ | -// | | right | <-- | entry | | -// | +-------+ +-------+ | -// | | | | -// | | | | -// | | v | -// | | +-------+ +--------+ -// +----+---------> | l1 | --> | l1loop | -// | +-------+ +--------+ -// | | -// | | -// | v -// | +-------+ -// | | l2 | -// | +-------+ -// | | -// | | -// | v -// | +-------+ -// +---------> | exit | -// +-------+ -TEST_F(ExecutionSubgraphTest, PropagationLoop2) { - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - { { "entry", "l1" }, - { "l1", "l2" }, - { "l1", "l1loop" }, - { "l1loop", "l1" }, - { "l2", "exit" }, - { "entry", "right" }, - { "right", "exit" } })); - ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, GetScopedAllocator()); - esg.RemoveBlock(blks.Get("l1")); - esg.Finalize(); - ASSERT_TRUE(esg.IsValid()); - ASSERT_TRUE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), - esg.ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 3u); - - // Not present, no path through. - ASSERT_TRUE(contents.find(blks.Get("l1")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l1loop")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l2")) == contents.end()); - - // present, path through. - ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); -} - -// +--------------------------------+ -// | | -// | +-------+ +-------+ | -// | | right | <-- | entry | | -// | +-------+ +-------+ | -// | | | | -// | | | | -// | | v | -// | | +-------+ +--------+ -// +----+---------> | l1 | --> | l1loop | -// | +-------+ +--------+ -// | | -// | | -// | v -// | +-------+ -// | | l2 | -// | +-------+ -// | | -// | | -// | v -// | +-------+ -// +---------> | exit | -// +-------+ -TEST_F(ExecutionSubgraphTest, PropagationLoop3) { - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - { { "entry", "l1" }, - { "l1", "l2" }, - { "l1", "l1loop" }, - { "l1loop", "l1" }, - { "l2", "exit" }, - { "entry", "right" }, - { "right", "exit" } })); - ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, GetScopedAllocator()); - esg.RemoveBlock(blks.Get("l1loop")); - esg.Finalize(); - ASSERT_TRUE(esg.IsValid()); - ASSERT_TRUE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), - esg.ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 3u); - - // Not present, no path through. If we got to l1 loop then we must merge back - // with l1 and l2 so they're bad too. - ASSERT_TRUE(contents.find(blks.Get("l1loop")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l1")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l2")) == contents.end()); - - // present, path through. - ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); -} - -// ┌───────┐ ┌──────────────┐ -// │ right │ ◀── │ entry │ -// └───────┘ └──────────────┘ -// │ │ -// │ │ -// ▼ ▼ -// ┌────┐ ┌───────┐ ┌──────────────┐ -// │ l2 │ ──▶ │ exit │ ┌─ │ l1 │ ◀┐ -// └────┘ └───────┘ │ └──────────────┘ │ -// ▲ │ │ │ -// └───────────────────┘ │ │ -// ▼ │ -// ┌──────────────┐ │ ┌──────────────┐ -// ┌─ │ l1loop │ │ │ l1loop_right │ ◀┐ -// │ └──────────────┘ │ └──────────────┘ │ -// │ │ │ │ │ -// │ │ │ │ │ -// │ ▼ │ │ │ -// │ ┌−−−−−−−−−−−−−−−−−−┐ │ │ │ -// │ ╎ removed ╎ │ │ │ -// │ ╎ ╎ │ │ │ -// │ ╎ ┌──────────────┐ ╎ │ │ │ -// │ ╎ │ l1loop_left │ ╎ │ │ │ -// │ ╎ └──────────────┘ ╎ │ │ │ -// │ ╎ ╎ │ │ │ -// │ └−−−−−−−−−−−−−−−−−−┘ │ │ │ -// │ │ │ │ │ -// │ │ │ │ │ -// │ ▼ │ │ │ -// │ ┌──────────────┐ │ │ │ -// │ │ l1loop_merge │ ─┘ │ │ -// │ └──────────────┘ │ │ -// │ ▲ │ │ -// │ └──────────────────────┘ │ -// │ │ -// │ │ -// └─────────────────────────────────────────────┘ - -TEST_F(ExecutionSubgraphTest, PropagationLoop4) { - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "l1"}, - {"l1", "l2"}, - {"l1", "l1loop"}, - {"l1loop", "l1loop_left"}, - {"l1loop", "l1loop_right"}, - {"l1loop_left", "l1loop_merge"}, - {"l1loop_right", "l1loop_merge"}, - {"l1loop_merge", "l1"}, - {"l2", "exit"}, - {"entry", "right"}, - {"right", "exit"}})); - ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, GetScopedAllocator()); - esg.RemoveBlock(blks.Get("l1loop_left")); - esg.Finalize(); - ASSERT_TRUE(esg.IsValid()); - ASSERT_TRUE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), - esg.ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 3u); - - // Not present, no path through. If we got to l1 loop then we must merge back - // with l1 and l2 so they're bad too. - ASSERT_TRUE(contents.find(blks.Get("l1loop")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l1")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l1loop_left")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l1loop_right")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l1loop_merge")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l2")) == contents.end()); - - // present, path through. - ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); -} - -// +------------------------------------------------------+ -// | | -// | +--------------+ +-------------+ | -// | | right | <-- | entry | | -// | +--------------+ +-------------+ | -// | | | | -// | | | | -// | v v | -// | +--------------+ +--------------------+ +----+ -// +> | exit | +> | l1 | --> | l2 | -// +--------------+ | +--------------------+ +----+ -// | | ^ -// +---------------+ | | -// | v | -// +--------------+ +-------------+ | -// | l1loop_right | <-- | l1loop | | -// +--------------+ +-------------+ | -// | | -// | | -// v | -// + - - - - - - - - + | -// ' removed ' | -// ' ' | -// ' +-------------+ ' | -// ' | l1loop_left | ' -+ -// ' +-------------+ ' -// ' ' -// + - - - - - - - - + -TEST_F(ExecutionSubgraphTest, PropagationLoop5) { - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "l1"}, - {"l1", "l2"}, - {"l1", "l1loop"}, - {"l1loop", "l1loop_left"}, - {"l1loop", "l1loop_right"}, - {"l1loop_left", "l1"}, - {"l1loop_right", "l1"}, - {"l2", "exit"}, - {"entry", "right"}, - {"right", "exit"}})); - ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, GetScopedAllocator()); - esg.RemoveBlock(blks.Get("l1loop_left")); - esg.Finalize(); - ASSERT_TRUE(esg.IsValid()); - ASSERT_TRUE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), - esg.ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 3u); - - // Not present, no path through. If we got to l1 loop then we must merge back - // with l1 and l2 so they're bad too. - ASSERT_TRUE(contents.find(blks.Get("l1loop")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l1")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l1loop_left")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l1loop_right")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("l2")) == contents.end()); - - // present, path through. - ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); -} - -TEST_F(ExecutionSubgraphTest, Invalid) { - AdjacencyListGraph blks(SetupFromAdjacencyList( - "entry", - "exit", - { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); - ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, GetScopedAllocator()); - esg.RemoveBlock(blks.Get("left")); - esg.RemoveBlock(blks.Get("right")); - esg.Finalize(); - - ASSERT_FALSE(esg.IsValid()); - ASSERT_FALSE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), - esg.ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 0u); -} -// Sibling branches are disconnected. -TEST_F(ExecutionSubgraphTest, Exclusions) { - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - { { "entry", "a" }, - { "entry", "b" }, - { "entry", "c" }, - { "a", "exit" }, - { "b", "exit" }, - { "c", "exit" } })); - ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, GetScopedAllocator()); - esg.RemoveBlock(blks.Get("a")); - esg.RemoveBlock(blks.Get("c")); - esg.Finalize(); - ASSERT_TRUE(esg.IsValid()); - ASSERT_TRUE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), - esg.ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 3u); - // Not present, no path through. - ASSERT_TRUE(contents.find(blks.Get("a")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("c")) == contents.end()); - - // present, path through. - ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("b")) != contents.end()); - - ArrayRef<const ExecutionSubgraph::ExcludedCohort> exclusions(esg.GetExcludedCohorts()); - ASSERT_EQ(exclusions.size(), 2u); - std::unordered_set<const HBasicBlock*> exclude_a({ blks.Get("a") }); - std::unordered_set<const HBasicBlock*> exclude_c({ blks.Get("c") }); - ASSERT_TRUE(std::find_if(exclusions.cbegin(), - exclusions.cend(), - [&](const ExecutionSubgraph::ExcludedCohort& it) { - return it.Blocks() == exclude_a; - }) != exclusions.cend()); - ASSERT_TRUE(std::find_if(exclusions.cbegin(), - exclusions.cend(), - [&](const ExecutionSubgraph::ExcludedCohort& it) { - return it.Blocks() == exclude_c; - }) != exclusions.cend()); -} - -// Sibling branches are disconnected. -// +- - - - - - - - - - - - - - - - - - - - - - + -// ' remove_c ' -// ' ' -// ' +-----------+ ' -// ' | c_begin_2 | -------------------------+ ' -// ' +-----------+ | ' -// ' | ' -// +- - - - - - - - - - - - - - - - - - | ' -// ^ ' | ' -// | ' | ' -// | ' | ' -// + - - - - - -+ ' | ' -// ' remove_a ' ' | ' -// ' ' ' | ' -// ' +--------+ ' +-----------+ +---+' | ' -// ' | **a** | ' <-- | entry | --> | b |' | ' -// ' +--------+ ' +-----------+ +---+' | ' -// ' ' ' | ' -// + - - - - - -+ ' | ' -// | | | ' | ' -// | | | ' | ' -// | v | ' | ' -// | +- - - - - - - -+ | ' | ' -// | ' ' | ' | ' -// | ' +-----------+ ' | ' | ' -// | ' | c_begin_1 | ' | ' | ' -// | ' +-----------+ ' | ' | ' -// | ' | ' | ' | ' -// | ' | ' | ' | ' -// | ' | ' | ' | ' -// + - - - - - - - - -+ | + - - - | - - - - - - - + | ' | ' -// ' ' | + v ' | + | ' -// ' +---------+ | +-----------+ | | ' -// ' | c_end_2 | <-------+--------------- | **c_mid** | <-----------------+------+ ' -// ' +---------+ | +-----------+ | ' -// ' ' | + | ' | + ' -// + - - - - - - - - -+ | + - - - | - - - - - - - + | + - - - + -// | | ' | ' | -// | | ' | ' | -// | | ' v ' | -// | | ' +-----------+ ' | -// | | ' | c_end_1 | ' | -// | | ' +-----------+ ' | -// | | ' ' | -// | | +- - - - - - - -+ | -// | | | | -// | | | | -// | | v v -// | | +---------------------------------+ -// | +------------> | exit | -// | +---------------------------------+ -// | ^ -// +------------------------------------+ -TEST_F(ExecutionSubgraphTest, ExclusionExtended) { - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - { { "entry", "a" }, - { "entry", "b" }, - { "entry", "c_begin_1" }, - { "entry", "c_begin_2" }, - { "c_begin_1", "c_mid" }, - { "c_begin_2", "c_mid" }, - { "c_mid", "c_end_1" }, - { "c_mid", "c_end_2" }, - { "a", "exit" }, - { "b", "exit" }, - { "c_end_1", "exit" }, - { "c_end_2", "exit" } })); - ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, GetScopedAllocator()); - esg.RemoveBlock(blks.Get("a")); - esg.RemoveBlock(blks.Get("c_mid")); - esg.Finalize(); - ASSERT_TRUE(esg.IsValid()); - ASSERT_TRUE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), - esg.ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 3u); - // Not present, no path through. - ASSERT_TRUE(contents.find(blks.Get("a")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("c_begin_1")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("c_begin_2")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("c_mid")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("c_end_1")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("c_end_2")) == contents.end()); - - // present, path through. - ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("b")) != contents.end()); - - ArrayRef<const ExecutionSubgraph::ExcludedCohort> exclusions(esg.GetExcludedCohorts()); - ASSERT_EQ(exclusions.size(), 2u); - BlockSet exclude_a({ blks.Get("a") }); - BlockSet exclude_c({ blks.Get("c_begin_1"), - blks.Get("c_begin_2"), - blks.Get("c_mid"), - blks.Get("c_end_1"), - blks.Get("c_end_2") }); - ASSERT_TRUE(std::find_if(exclusions.cbegin(), - exclusions.cend(), - [&](const ExecutionSubgraph::ExcludedCohort& it) { - return it.Blocks() == exclude_a; - }) != exclusions.cend()); - ASSERT_TRUE( - std::find_if( - exclusions.cbegin(), exclusions.cend(), [&](const ExecutionSubgraph::ExcludedCohort& it) { - return it.Blocks() == exclude_c && - BlockSet({ blks.Get("c_begin_1"), blks.Get("c_begin_2") }) == it.EntryBlocks() && - BlockSet({ blks.Get("c_end_1"), blks.Get("c_end_2") }) == it.ExitBlocks(); - }) != exclusions.cend()); -} - -// ┌───────┐ ┌────────────┐ -// ┌─ │ right │ ◀── │ entry │ -// │ └───────┘ └────────────┘ -// │ │ -// │ │ -// │ ▼ -// │ ┌────────────┐ -// │ │ esc_top │ -// │ └────────────┘ -// │ │ -// │ │ -// │ ▼ -// │ ┌────────────┐ -// └──────────────▶ │ middle │ ─┐ -// └────────────┘ │ -// │ │ -// │ │ -// ▼ │ -// ┌────────────┐ │ -// │ esc_bottom │ │ -// └────────────┘ │ -// │ │ -// │ │ -// ▼ │ -// ┌────────────┐ │ -// │ exit │ ◀┘ -// └────────────┘ -TEST_F(ExecutionSubgraphTest, InAndOutEscape) { - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - { { "entry", "esc_top" }, - { "entry", "right" }, - { "esc_top", "middle" }, - { "right", "middle" }, - { "middle", "exit" }, - { "middle", "esc_bottom" }, - { "esc_bottom", "exit" } })); - - ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, GetScopedAllocator()); - esg.RemoveBlock(blks.Get("esc_top")); - esg.RemoveBlock(blks.Get("esc_bottom")); - esg.Finalize(); - - std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), - esg.ReachableBlocks().end()); - ASSERT_EQ(contents.size(), 0u); - ASSERT_FALSE(esg.IsValid()); - ASSERT_FALSE(IsValidSubgraph(esg)); - - ASSERT_EQ(contents.size(), 0u); -} - -// Test with max number of successors and no removals. -TEST_F(ExecutionSubgraphTest, BigNodes) { - std::vector<std::string> mid_blocks; - for (auto i : Range(ExecutionSubgraph::kMaxFilterableSuccessors)) { - std::ostringstream oss; - oss << "blk" << i; - mid_blocks.push_back(oss.str().c_str()); - } - ASSERT_EQ(mid_blocks.size(), ExecutionSubgraph::kMaxFilterableSuccessors); - std::vector<AdjacencyListGraph::Edge> edges; - for (const auto& mid : mid_blocks) { - edges.emplace_back("entry", mid); - edges.emplace_back(mid, "exit"); - } - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", edges)); - ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, GetScopedAllocator()); - esg.Finalize(); - ASSERT_TRUE(esg.IsValid()); - ASSERT_TRUE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), - esg.ReachableBlocks().end()); - - for (const auto& mid : mid_blocks) { - EXPECT_TRUE(contents.find(blks.Get(mid)) != contents.end()) << mid; - } - // + 2 for entry and exit nodes. - ASSERT_EQ(contents.size(), ExecutionSubgraph::kMaxFilterableSuccessors + 2); -} - -// Test with max number of successors and some removals. -TEST_F(ExecutionSubgraphTest, BigNodesMissing) { - std::vector<std::string> mid_blocks; - for (auto i : Range(ExecutionSubgraph::kMaxFilterableSuccessors)) { - std::ostringstream oss; - oss << "blk" << i; - mid_blocks.push_back(oss.str()); - } - std::vector<AdjacencyListGraph::Edge> edges; - for (const auto& mid : mid_blocks) { - edges.emplace_back("entry", mid); - edges.emplace_back(mid, "exit"); - } - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", edges)); - ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, GetScopedAllocator()); - esg.RemoveBlock(blks.Get("blk2")); - esg.RemoveBlock(blks.Get("blk4")); - esg.Finalize(); - ASSERT_TRUE(esg.IsValid()); - ASSERT_TRUE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), - esg.ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), ExecutionSubgraph::kMaxFilterableSuccessors + 2 - 2); - - // Not present, no path through. - ASSERT_TRUE(contents.find(blks.Get("blk2")) == contents.end()); - ASSERT_TRUE(contents.find(blks.Get("blk4")) == contents.end()); -} - -// Test with max number of successors and all successors removed. -TEST_F(ExecutionSubgraphTest, BigNodesNoPath) { - std::vector<std::string> mid_blocks; - for (auto i : Range(ExecutionSubgraph::kMaxFilterableSuccessors)) { - std::ostringstream oss; - oss << "blk" << i; - mid_blocks.push_back(oss.str()); - } - std::vector<AdjacencyListGraph::Edge> edges; - for (const auto& mid : mid_blocks) { - edges.emplace_back("entry", mid); - edges.emplace_back(mid, "exit"); - } - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", edges)); - ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, GetScopedAllocator()); - for (const auto& mid : mid_blocks) { - esg.RemoveBlock(blks.Get(mid)); - } - esg.Finalize(); - ASSERT_FALSE(esg.IsValid()); - ASSERT_FALSE(IsValidSubgraph(esg)); -} - -// Test with max number of successors -TEST_F(ExecutionSubgraphTest, CanAnalyseBig) { - // Make an absurdly huge and well connected graph. This should be pretty worst-case scenario. - constexpr size_t kNumBlocks = ExecutionSubgraph::kMaxFilterableSuccessors + 1000; - std::vector<std::string> mid_blocks; - for (auto i : Range(kNumBlocks)) { - std::ostringstream oss; - oss << "blk" << i; - mid_blocks.push_back(oss.str()); - } - std::vector<AdjacencyListGraph::Edge> edges; - for (auto cur : Range(kNumBlocks)) { - for (auto nxt : - Range(cur + 1, - std::min(cur + ExecutionSubgraph::kMaxFilterableSuccessors + 1, kNumBlocks))) { - edges.emplace_back(mid_blocks[cur], mid_blocks[nxt]); - } - } - AdjacencyListGraph blks(SetupFromAdjacencyList(mid_blocks.front(), mid_blocks.back(), edges)); - ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - - ExecutionSubgraph esg(graph_, GetScopedAllocator()); - esg.Finalize(); - ASSERT_TRUE(esg.IsValid()); - ASSERT_TRUE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), - esg.ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), kNumBlocks); -} - -// Test with many successors -TEST_F(ExecutionSubgraphTest, CanAnalyseBig2) { - // Make an absurdly huge and well connected graph. This should be pretty worst-case scenario. - constexpr size_t kNumBlocks = ExecutionSubgraph::kMaxFilterableSuccessors + 1000; - constexpr size_t kTestMaxSuccessors = ExecutionSubgraph::kMaxFilterableSuccessors - 1; - std::vector<std::string> mid_blocks; - for (auto i : Range(kNumBlocks)) { - std::ostringstream oss; - oss << "blk" << i; - mid_blocks.push_back(oss.str()); - } - std::vector<AdjacencyListGraph::Edge> edges; - for (auto cur : Range(kNumBlocks)) { - for (auto nxt : Range(cur + 1, std::min(cur + 1 + kTestMaxSuccessors, kNumBlocks))) { - edges.emplace_back(mid_blocks[cur], mid_blocks[nxt]); - } - } - edges.emplace_back(mid_blocks.front(), mid_blocks.back()); - AdjacencyListGraph blks(SetupFromAdjacencyList(mid_blocks.front(), mid_blocks.back(), edges)); - ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); - ExecutionSubgraph esg(graph_, GetScopedAllocator()); - constexpr size_t kToRemoveIdx = kNumBlocks / 2; - HBasicBlock* remove_implicit = blks.Get(mid_blocks[kToRemoveIdx]); - for (HBasicBlock* pred : remove_implicit->GetPredecessors()) { - esg.RemoveBlock(pred); - } - esg.Finalize(); - EXPECT_TRUE(esg.IsValid()); - EXPECT_TRUE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), - esg.ReachableBlocks().end()); - - // Only entry and exit. The middle ones should eliminate everything else. - EXPECT_EQ(contents.size(), 2u); - EXPECT_TRUE(contents.find(remove_implicit) == contents.end()); - EXPECT_TRUE(contents.find(blks.Get(mid_blocks.front())) != contents.end()); - EXPECT_TRUE(contents.find(blks.Get(mid_blocks.back())) != contents.end()); -} - -// Test with too many successors -TEST_F(ExecutionSubgraphTest, CanNotAnalyseBig) { - std::vector<std::string> mid_blocks; - for (auto i : Range(ExecutionSubgraph::kMaxFilterableSuccessors + 4)) { - std::ostringstream oss; - oss << "blk" << i; - mid_blocks.push_back(oss.str()); - } - std::vector<AdjacencyListGraph::Edge> edges; - for (const auto& mid : mid_blocks) { - edges.emplace_back("entry", mid); - edges.emplace_back(mid, "exit"); - } - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", edges)); - ASSERT_FALSE(ExecutionSubgraph::CanAnalyse(graph_)); -} -} // namespace art diff --git a/compiler/optimizing/execution_subgraph_test.h b/compiler/optimizing/execution_subgraph_test.h deleted file mode 100644 index cee105a045..0000000000 --- a/compiler/optimizing/execution_subgraph_test.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (C) 2020 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_TEST_H_ -#define ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_TEST_H_ - -#include "android-base/macros.h" - -#include "base/macros.h" - -namespace art HIDDEN { - -class HGraph; -class ExecutionSubgraph; - -class ExecutionSubgraphTestHelper { - public: - static bool CalculateValidity(HGraph* graph, const ExecutionSubgraph* subgraph); - - private: - ExecutionSubgraphTestHelper() = delete; - - DISALLOW_COPY_AND_ASSIGN(ExecutionSubgraphTestHelper); -}; -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_TEST_H_ diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 190b362145..e8c94dd6b4 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -28,6 +28,7 @@ #include "code_generator.h" #include "handle.h" #include "mirror/class.h" +#include "nodes.h" #include "obj_ptr-inl.h" #include "scoped_thread_state_change-inl.h" #include "subtype_check.h" @@ -168,52 +169,68 @@ void GraphChecker::CheckGraphFlags() { void GraphChecker::VisitBasicBlock(HBasicBlock* block) { current_block_ = block; - // Use local allocator for allocating memory. - ScopedArenaAllocator allocator(GetGraph()->GetArenaStack()); - - // Check consistency with respect to predecessors of `block`. - // Note: Counting duplicates with a sorted vector uses up to 6x less memory - // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse. - ScopedArenaVector<HBasicBlock*> sorted_predecessors(allocator.Adapter(kArenaAllocGraphChecker)); - sorted_predecessors.assign(block->GetPredecessors().begin(), block->GetPredecessors().end()); - std::sort(sorted_predecessors.begin(), sorted_predecessors.end()); - for (auto it = sorted_predecessors.begin(), end = sorted_predecessors.end(); it != end; ) { - HBasicBlock* p = *it++; - size_t p_count_in_block_predecessors = 1u; - for (; it != end && *it == p; ++it) { - ++p_count_in_block_predecessors; - } - size_t block_count_in_p_successors = - std::count(p->GetSuccessors().begin(), p->GetSuccessors().end(), block); - if (p_count_in_block_predecessors != block_count_in_p_successors) { - AddError(StringPrintf( - "Block %d lists %zu occurrences of block %d in its predecessors, whereas " - "block %d lists %zu occurrences of block %d in its successors.", - block->GetBlockId(), p_count_in_block_predecessors, p->GetBlockId(), - p->GetBlockId(), block_count_in_p_successors, block->GetBlockId())); - } - } + { + // Use local allocator for allocating memory. We use C++ scopes (i.e. `{}`) to reclaim the + // memory as soon as possible, and to end the scope of this `ScopedArenaAllocator`. + ScopedArenaAllocator allocator(GetGraph()->GetArenaStack()); - // Check consistency with respect to successors of `block`. - // Note: Counting duplicates with a sorted vector uses up to 6x less memory - // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse. - ScopedArenaVector<HBasicBlock*> sorted_successors(allocator.Adapter(kArenaAllocGraphChecker)); - sorted_successors.assign(block->GetSuccessors().begin(), block->GetSuccessors().end()); - std::sort(sorted_successors.begin(), sorted_successors.end()); - for (auto it = sorted_successors.begin(), end = sorted_successors.end(); it != end; ) { - HBasicBlock* s = *it++; - size_t s_count_in_block_successors = 1u; - for (; it != end && *it == s; ++it) { - ++s_count_in_block_successors; + { + // Check consistency with respect to predecessors of `block`. + // Note: Counting duplicates with a sorted vector uses up to 6x less memory + // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse. + ScopedArenaVector<HBasicBlock*> sorted_predecessors( + allocator.Adapter(kArenaAllocGraphChecker)); + sorted_predecessors.assign(block->GetPredecessors().begin(), block->GetPredecessors().end()); + std::sort(sorted_predecessors.begin(), sorted_predecessors.end()); + for (auto it = sorted_predecessors.begin(), end = sorted_predecessors.end(); it != end;) { + HBasicBlock* p = *it++; + size_t p_count_in_block_predecessors = 1u; + for (; it != end && *it == p; ++it) { + ++p_count_in_block_predecessors; + } + size_t block_count_in_p_successors = + std::count(p->GetSuccessors().begin(), p->GetSuccessors().end(), block); + if (p_count_in_block_predecessors != block_count_in_p_successors) { + AddError(StringPrintf( + "Block %d lists %zu occurrences of block %d in its predecessors, whereas " + "block %d lists %zu occurrences of block %d in its successors.", + block->GetBlockId(), + p_count_in_block_predecessors, + p->GetBlockId(), + p->GetBlockId(), + block_count_in_p_successors, + block->GetBlockId())); + } + } } - size_t block_count_in_s_predecessors = - std::count(s->GetPredecessors().begin(), s->GetPredecessors().end(), block); - if (s_count_in_block_successors != block_count_in_s_predecessors) { - AddError(StringPrintf( - "Block %d lists %zu occurrences of block %d in its successors, whereas " - "block %d lists %zu occurrences of block %d in its predecessors.", - block->GetBlockId(), s_count_in_block_successors, s->GetBlockId(), - s->GetBlockId(), block_count_in_s_predecessors, block->GetBlockId())); + + { + // Check consistency with respect to successors of `block`. + // Note: Counting duplicates with a sorted vector uses up to 6x less memory + // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse. + ScopedArenaVector<HBasicBlock*> sorted_successors(allocator.Adapter(kArenaAllocGraphChecker)); + sorted_successors.assign(block->GetSuccessors().begin(), block->GetSuccessors().end()); + std::sort(sorted_successors.begin(), sorted_successors.end()); + for (auto it = sorted_successors.begin(), end = sorted_successors.end(); it != end;) { + HBasicBlock* s = *it++; + size_t s_count_in_block_successors = 1u; + for (; it != end && *it == s; ++it) { + ++s_count_in_block_successors; + } + size_t block_count_in_s_predecessors = + std::count(s->GetPredecessors().begin(), s->GetPredecessors().end(), block); + if (s_count_in_block_successors != block_count_in_s_predecessors) { + AddError( + StringPrintf("Block %d lists %zu occurrences of block %d in its successors, whereas " + "block %d lists %zu occurrences of block %d in its predecessors.", + block->GetBlockId(), + s_count_in_block_successors, + s->GetBlockId(), + s->GetBlockId(), + block_count_in_s_predecessors, + block->GetBlockId())); + } + } } } @@ -506,6 +523,26 @@ void GraphChecker::VisitMonitorOperation(HMonitorOperation* monitor_op) { flag_info_.seen_monitor_operation = true; } +bool GraphChecker::ContainedInItsBlockList(HInstruction* instruction) { + HBasicBlock* block = instruction->GetBlock(); + ScopedArenaSafeMap<HBasicBlock*, ScopedArenaHashSet<HInstruction*>>& instruction_set = + instruction->IsPhi() ? phis_per_block_ : instructions_per_block_; + auto map_it = instruction_set.find(block); + if (map_it == instruction_set.end()) { + // Populate extra bookkeeping. + map_it = instruction_set.insert( + {block, ScopedArenaHashSet<HInstruction*>(allocator_.Adapter(kArenaAllocGraphChecker))}) + .first; + const HInstructionList& instruction_list = instruction->IsPhi() ? + instruction->GetBlock()->GetPhis() : + instruction->GetBlock()->GetInstructions(); + for (HInstructionIterator list_it(instruction_list); !list_it.Done(); list_it.Advance()) { + map_it->second.insert(list_it.Current()); + } + } + return map_it->second.find(instruction) != map_it->second.end(); +} + void GraphChecker::VisitInstruction(HInstruction* instruction) { if (seen_ids_.IsBitSet(instruction->GetId())) { AddError(StringPrintf("Instruction id %d is duplicate in graph.", @@ -528,23 +565,19 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { instruction->GetBlock()->GetBlockId())); } - // Ensure the inputs of `instruction` are defined in a block of the graph. + // Ensure the inputs of `instruction` are defined in a block of the graph, and the entry in the + // use list is consistent. for (HInstruction* input : instruction->GetInputs()) { if (input->GetBlock() == nullptr) { AddError(StringPrintf("Input %d of instruction %d is not in any " "basic block of the control-flow graph.", input->GetId(), instruction->GetId())); - } else { - const HInstructionList& list = input->IsPhi() - ? input->GetBlock()->GetPhis() - : input->GetBlock()->GetInstructions(); - if (!list.Contains(input)) { + } else if (!ContainedInItsBlockList(input)) { AddError(StringPrintf("Input %d of instruction %d is not defined " "in a basic block of the control-flow graph.", input->GetId(), instruction->GetId())); - } } } @@ -552,10 +585,7 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { // and the entry in the use list is consistent. for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { HInstruction* user = use.GetUser(); - const HInstructionList& list = user->IsPhi() - ? user->GetBlock()->GetPhis() - : user->GetBlock()->GetInstructions(); - if (!list.Contains(user)) { + if (!ContainedInItsBlockList(user)) { AddError(StringPrintf("User %s:%d of instruction %d is not defined " "in a basic block of the control-flow graph.", user->DebugName(), @@ -587,21 +617,38 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { } // Ensure 'instruction' has pointers to its inputs' use entries. - auto&& input_records = instruction->GetInputRecords(); - for (size_t i = 0; i < input_records.size(); ++i) { - const HUserRecord<HInstruction*>& input_record = input_records[i]; - HInstruction* input = input_record.GetInstruction(); - if ((input_record.GetBeforeUseNode() == input->GetUses().end()) || - (input_record.GetUseNode() == input->GetUses().end()) || - !input->GetUses().ContainsNode(*input_record.GetUseNode()) || - (input_record.GetUseNode()->GetIndex() != i)) { - AddError(StringPrintf("Instruction %s:%d has an invalid iterator before use entry " - "at input %u (%s:%d).", - instruction->DebugName(), - instruction->GetId(), - static_cast<unsigned>(i), - input->DebugName(), - input->GetId())); + { + auto&& input_records = instruction->GetInputRecords(); + for (size_t i = 0; i < input_records.size(); ++i) { + const HUserRecord<HInstruction*>& input_record = input_records[i]; + HInstruction* input = input_record.GetInstruction(); + + // Populate bookkeeping, if needed. See comment in graph_checker.h for uses_per_instruction_. + auto it = uses_per_instruction_.find(input->GetId()); + if (it == uses_per_instruction_.end()) { + it = uses_per_instruction_ + .insert({input->GetId(), + ScopedArenaSet<const art::HUseListNode<art::HInstruction*>*>( + allocator_.Adapter(kArenaAllocGraphChecker))}) + .first; + for (auto&& use : input->GetUses()) { + it->second.insert(std::addressof(use)); + } + } + + if ((input_record.GetBeforeUseNode() == input->GetUses().end()) || + (input_record.GetUseNode() == input->GetUses().end()) || + (it->second.find(std::addressof(*input_record.GetUseNode())) == it->second.end()) || + (input_record.GetUseNode()->GetIndex() != i)) { + AddError( + StringPrintf("Instruction %s:%d has an invalid iterator before use entry " + "at input %u (%s:%d).", + instruction->DebugName(), + instruction->GetId(), + static_cast<unsigned>(i), + input->DebugName(), + input->GetId())); + } } } @@ -688,10 +735,59 @@ void GraphChecker::VisitInvoke(HInvoke* invoke) { } flag_info_.seen_always_throwing_invokes = true; } + + // Check for intrinsics which should have been replaced by intermediate representation in the + // instruction builder. + switch (invoke->GetIntrinsic()) { + case Intrinsics::kIntegerRotateRight: + case Intrinsics::kLongRotateRight: + case Intrinsics::kIntegerRotateLeft: + case Intrinsics::kLongRotateLeft: + case Intrinsics::kIntegerCompare: + case Intrinsics::kLongCompare: + case Intrinsics::kIntegerSignum: + case Intrinsics::kLongSignum: + case Intrinsics::kFloatIsNaN: + case Intrinsics::kDoubleIsNaN: + case Intrinsics::kStringIsEmpty: + case Intrinsics::kUnsafeLoadFence: + case Intrinsics::kUnsafeStoreFence: + case Intrinsics::kUnsafeFullFence: + case Intrinsics::kJdkUnsafeLoadFence: + case Intrinsics::kJdkUnsafeStoreFence: + case Intrinsics::kJdkUnsafeFullFence: + case Intrinsics::kVarHandleFullFence: + case Intrinsics::kVarHandleAcquireFence: + case Intrinsics::kVarHandleReleaseFence: + case Intrinsics::kVarHandleLoadLoadFence: + case Intrinsics::kVarHandleStoreStoreFence: + case Intrinsics::kMathMinIntInt: + case Intrinsics::kMathMinLongLong: + case Intrinsics::kMathMinFloatFloat: + case Intrinsics::kMathMinDoubleDouble: + case Intrinsics::kMathMaxIntInt: + case Intrinsics::kMathMaxLongLong: + case Intrinsics::kMathMaxFloatFloat: + case Intrinsics::kMathMaxDoubleDouble: + case Intrinsics::kMathAbsInt: + case Intrinsics::kMathAbsLong: + case Intrinsics::kMathAbsFloat: + case Intrinsics::kMathAbsDouble: + AddError( + StringPrintf("The graph contains an instrinsic which should have been replaced in the " + "instruction builder: %s:%d in block %d.", + invoke->DebugName(), + invoke->GetId(), + invoke->GetBlock()->GetBlockId())); + break; + default: + break; + } } void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // We call VisitInvoke and not VisitInstruction to de-duplicate the always throwing code check. + // We call VisitInvoke and not VisitInstruction to de-duplicate the common code: always throwing + // and instrinsic checks. VisitInvoke(invoke); if (invoke->IsStaticWithExplicitClinitCheck()) { @@ -944,8 +1040,7 @@ static bool IsSameSizeConstant(const HInstruction* insn1, const HInstruction* in static bool IsConstantEquivalent(const HInstruction* insn1, const HInstruction* insn2, BitVector* visited) { - if (insn1->IsPhi() && - insn1->AsPhi()->IsVRegEquivalentOf(insn2)) { + if (insn1->IsPhi() && insn1->AsPhi()->IsVRegEquivalentOf(insn2)) { HConstInputsRef insn1_inputs = insn1->GetInputs(); HConstInputsRef insn2_inputs = insn2->GetInputs(); if (insn1_inputs.size() != insn2_inputs.size()) { diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index d6644f3b50..38e2d7ced9 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -22,7 +22,7 @@ #include "base/arena_bit_vector.h" #include "base/bit_vector-inl.h" #include "base/macros.h" -#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" #include "nodes.h" namespace art HIDDEN { @@ -35,12 +35,15 @@ class GraphChecker : public HGraphDelegateVisitor { explicit GraphChecker(HGraph* graph, CodeGenerator* codegen = nullptr, const char* dump_prefix = "art::GraphChecker: ") - : HGraphDelegateVisitor(graph), - errors_(graph->GetAllocator()->Adapter(kArenaAllocGraphChecker)), - dump_prefix_(dump_prefix), - allocator_(graph->GetArenaStack()), - seen_ids_(&allocator_, graph->GetCurrentInstructionId(), false, kArenaAllocGraphChecker), - codegen_(codegen) { + : HGraphDelegateVisitor(graph), + errors_(graph->GetAllocator()->Adapter(kArenaAllocGraphChecker)), + dump_prefix_(dump_prefix), + allocator_(graph->GetArenaStack()), + seen_ids_(&allocator_, graph->GetCurrentInstructionId(), false, kArenaAllocGraphChecker), + uses_per_instruction_(allocator_.Adapter(kArenaAllocGraphChecker)), + instructions_per_block_(allocator_.Adapter(kArenaAllocGraphChecker)), + phis_per_block_(allocator_.Adapter(kArenaAllocGraphChecker)), + codegen_(codegen) { seen_ids_.ClearAllBits(); } @@ -107,7 +110,7 @@ class GraphChecker : public HGraphDelegateVisitor { } } - protected: + private: // Report a new error. void AddError(const std::string& error) { errors_.push_back(error); @@ -118,17 +121,33 @@ class GraphChecker : public HGraphDelegateVisitor { // Errors encountered while checking the graph. ArenaVector<std::string> errors_; - private: void VisitReversePostOrder(); // Checks that the graph's flags are set correctly. void CheckGraphFlags(); + // Checks if `instruction` is in its block's instruction/phi list. To do so, it searches + // instructions_per_block_/phis_per_block_ which are set versions of that. If the set to + // check hasn't been populated yet, it does so now. + bool ContainedInItsBlockList(HInstruction* instruction); + // String displayed before dumped errors. const char* const dump_prefix_; ScopedArenaAllocator allocator_; ArenaBitVector seen_ids_; + // As part of VisitInstruction, we verify that the instruction's input_record is present in the + // corresponding input's GetUses. If an instruction is used in many places (e.g. 200K+ uses), the + // linear search through GetUses is too slow. We can use bookkeeping to search in a set, instead + // of a list. + ScopedArenaSafeMap<int, ScopedArenaSet<const art::HUseListNode<art::HInstruction*>*>> + uses_per_instruction_; + + // Extra bookkeeping to increase GraphChecker's speed while asking if an instruction is contained + // in a list of instructions/phis. + ScopedArenaSafeMap<HBasicBlock*, ScopedArenaHashSet<HInstruction*>> instructions_per_block_; + ScopedArenaSafeMap<HBasicBlock*, ScopedArenaHashSet<HInstruction*>> phis_per_block_; + // Used to access target information. CodeGenerator* codegen_; diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 73bdd1e223..b7f7a0f550 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -115,7 +115,9 @@ std::ostream& operator<<(std::ostream& os, const StringList& list) { } } -#ifndef ART_STATIC_LIBART_COMPILER +// On target: load `libart-disassembler` only when required (to save on memory). +// On host: `libart-disassembler` should be linked directly (either as a static or dynamic lib) +#ifdef ART_TARGET using create_disasm_prototype = Disassembler*(InstructionSet, DisassemblerOptions*); #endif @@ -125,7 +127,7 @@ class HGraphVisualizerDisassembler { const uint8_t* base_address, const uint8_t* end_address) : instruction_set_(instruction_set), disassembler_(nullptr) { -#ifndef ART_STATIC_LIBART_COMPILER +#ifdef ART_TARGET constexpr const char* libart_disassembler_so_name = kIsDebugBuild ? "libartd-disassembler.so" : "libart-disassembler.so"; libart_disassembler_handle_ = dlopen(libart_disassembler_so_name, RTLD_NOW); @@ -159,7 +161,7 @@ class HGraphVisualizerDisassembler { ~HGraphVisualizerDisassembler() { // We need to call ~Disassembler() before we close the library. disassembler_.reset(); -#ifndef ART_STATIC_LIBART_COMPILER +#ifdef ART_TARGET if (libart_disassembler_handle_ != nullptr) { dlclose(libart_disassembler_handle_); } @@ -184,7 +186,7 @@ class HGraphVisualizerDisassembler { InstructionSet instruction_set_; std::unique_ptr<Disassembler> disassembler_; -#ifndef ART_STATIC_LIBART_COMPILER +#ifdef ART_TARGET void* libart_disassembler_handle_; #endif }; @@ -494,6 +496,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("bias") << condition->GetBias(); } + void VisitIf(HIf* if_instr) override { + StartAttributeStream("true_count") << if_instr->GetTrueCount(); + StartAttributeStream("false_count") << if_instr->GetFalseCount(); + } + void VisitInvoke(HInvoke* invoke) override { StartAttributeStream("dex_file_index") << invoke->GetMethodReference().index; ArtMethod* method = invoke->GetResolvedMethod(); @@ -538,13 +545,6 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("invoke_type") << "InvokePolymorphic"; } - void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* iget) override { - StartAttributeStream("field_name") << - iget->GetFieldInfo().GetDexFile().PrettyField(iget->GetFieldInfo().GetFieldIndex(), - /* with type */ false); - StartAttributeStream("field_type") << iget->GetFieldType(); - } - void VisitInstanceFieldGet(HInstanceFieldGet* iget) override { StartAttributeStream("field_name") << iget->GetFieldInfo().GetDexFile().PrettyField(iget->GetFieldInfo().GetFieldIndex(), @@ -557,8 +557,6 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { iset->GetFieldInfo().GetDexFile().PrettyField(iset->GetFieldInfo().GetFieldIndex(), /* with type */ false); StartAttributeStream("field_type") << iset->GetFieldType(); - StartAttributeStream("predicated") - << std::boolalpha << iset->GetIsPredicatedSet() << std::noboolalpha; StartAttributeStream("write_barrier_kind") << iset->GetWriteBarrierKind(); } @@ -610,6 +608,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { } void VisitVecMemoryOperation(HVecMemoryOperation* vec_mem_operation) override { + VisitVecOperation(vec_mem_operation); StartAttributeStream("alignment") << vec_mem_operation->GetAlignment().ToString(); } diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 9b78699ead..8568062933 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -255,8 +255,8 @@ bool InductionVarRange::CanGenerateRange(const HBasicBlock* context, nullptr, // nothing generated yet &stride_value, needs_finite_test, - needs_taken_test) - && (stride_value == -1 || + needs_taken_test) && + (stride_value == -1 || stride_value == 0 || stride_value == 1); // avoid arithmetic wrap-around anomalies. } @@ -280,7 +280,10 @@ void InductionVarRange::GenerateRange(const HBasicBlock* context, nullptr, &stride_value, &b1, - &b2)) { + &b2) || + (stride_value != -1 && + stride_value != 0 && + stride_value != 1)) { LOG(FATAL) << "Failed precondition: CanGenerateRange()"; } } @@ -303,7 +306,10 @@ HInstruction* InductionVarRange::GenerateTakenTest(HInstruction* loop_control, &taken_test, &stride_value, &b1, - &b2)) { + &b2) || + (stride_value != -1 && + stride_value != 0 && + stride_value != 1)) { LOG(FATAL) << "Failed precondition: CanGenerateRange()"; } return taken_test; @@ -336,7 +342,8 @@ HInstruction* InductionVarRange::GenerateLastValue(HInstruction* instruction, HInstruction* last_value = nullptr; bool is_last_value = true; int64_t stride_value = 0; - bool b1, b2; // unused + bool needs_finite_test = false; + bool needs_taken_test = false; if (!GenerateRangeOrLastValue(context, instruction, is_last_value, @@ -346,8 +353,10 @@ HInstruction* InductionVarRange::GenerateLastValue(HInstruction* instruction, &last_value, nullptr, &stride_value, - &b1, - &b2)) { + &needs_finite_test, + &needs_taken_test) || + needs_finite_test || + needs_taken_test) { LOG(FATAL) << "Failed precondition: CanGenerateLastValue()"; } return last_value; @@ -1066,11 +1075,11 @@ bool InductionVarRange::GenerateRangeOrLastValue(const HBasicBlock* context, if (*stride_value > 0) { lower = nullptr; return GenerateLastValueLinear( - context, loop, info, trip, graph, block, /*is_min=*/false, upper); + context, loop, info, trip, graph, block, /*is_min=*/false, upper, needs_taken_test); } else { upper = nullptr; return GenerateLastValueLinear( - context, loop, info, trip, graph, block, /*is_min=*/true, lower); + context, loop, info, trip, graph, block, /*is_min=*/true, lower, needs_taken_test); } case HInductionVarAnalysis::kPolynomial: return GenerateLastValuePolynomial(context, loop, info, trip, graph, block, lower); @@ -1124,7 +1133,8 @@ bool InductionVarRange::GenerateLastValueLinear(const HBasicBlock* context, HGraph* graph, HBasicBlock* block, bool is_min, - /*out*/ HInstruction** result) const { + /*out*/ HInstruction** result, + /*inout*/ bool* needs_taken_test) const { DataType::Type type = info->type; // Avoid any narrowing linear induction or any type mismatch between the linear induction and the // trip count expression. @@ -1132,18 +1142,27 @@ bool InductionVarRange::GenerateLastValueLinear(const HBasicBlock* context, return false; } - // Stride value must be a known constant that fits into int32. + // Stride value must be a known constant that fits into int32. The stride will be the `i` in `a * + // i + b`. int64_t stride_value = 0; if (!IsConstant(context, loop, info->op_a, kExact, &stride_value) || !CanLongValueFitIntoInt(stride_value)) { return false; } - // We require `a` to be a constant value that didn't overflow. + // We require the calculation of `a` to not overflow. const bool is_min_a = stride_value >= 0 ? is_min : !is_min; - Value val_a = GetVal(context, loop, trip, trip, is_min_a); + HInstruction* opa; HInstruction* opb; - if (!IsConstantValue(val_a) || + if (!GenerateCode(context, + loop, + trip, + trip, + graph, + block, + is_min_a, + &opa, + /*allow_potential_overflow=*/false) || !GenerateCode(context, loop, info->op_b, trip, graph, block, is_min, &opb)) { return false; } @@ -1151,7 +1170,8 @@ bool InductionVarRange::GenerateLastValueLinear(const HBasicBlock* context, if (graph != nullptr) { ArenaAllocator* allocator = graph->GetAllocator(); HInstruction* oper; - HInstruction* opa = graph->GetConstant(type, val_a.b_constant); + // Emit instructions for `a * i + b`. These are fine to overflow as they would have overflown + // also if we had kept the loop. if (stride_value == 1) { oper = new (allocator) HAdd(type, opa, opb); } else if (stride_value == -1) { @@ -1162,6 +1182,15 @@ bool InductionVarRange::GenerateLastValueLinear(const HBasicBlock* context, } *result = Insert(block, oper); } + + if (*needs_taken_test) { + if (TryGenerateTakenTest(context, loop, trip->op_b, graph, block, result, opb)) { + *needs_taken_test = false; // taken care of + } else { + return false; + } + } + return true; } @@ -1298,8 +1327,8 @@ bool InductionVarRange::GenerateLastValuePeriodic(const HBasicBlock* context, HInductionVarAnalysis::InductionInfo* trip, HGraph* graph, HBasicBlock* block, - /*out*/HInstruction** result, - /*out*/bool* needs_taken_test) const { + /*out*/ HInstruction** result, + /*inout*/ bool* needs_taken_test) const { DCHECK(info != nullptr); DCHECK_EQ(info->induction_class, HInductionVarAnalysis::kPeriodic); // Count period and detect all-invariants. @@ -1339,6 +1368,15 @@ bool InductionVarRange::GenerateLastValuePeriodic(const HBasicBlock* context, HInstruction* x = nullptr; HInstruction* y = nullptr; HInstruction* t = nullptr; + + // Overflows when the stride is equal to `1` are fine since the periodicity is + // `2` and the lowest bit is the same. Similar with `-1`. + auto allow_potential_overflow = [&]() { + int64_t stride_value = 0; + return IsConstant(context, loop, trip->op_a->op_b, kExact, &stride_value) && + (stride_value == 1 || stride_value == -1); + }; + if (period == 2 && GenerateCode(context, loop, @@ -1363,7 +1401,8 @@ bool InductionVarRange::GenerateLastValuePeriodic(const HBasicBlock* context, graph, block, /*is_min=*/ false, - graph ? &t : nullptr)) { + graph ? &t : nullptr, + allow_potential_overflow())) { // During actual code generation (graph != nullptr), generate is_even ? x : y. if (graph != nullptr) { DataType::Type type = trip->type; @@ -1374,21 +1413,9 @@ bool InductionVarRange::GenerateLastValuePeriodic(const HBasicBlock* context, Insert(block, new (allocator) HEqual(msk, graph->GetConstant(type, 0), kNoDexPc)); *result = Insert(block, new (graph->GetAllocator()) HSelect(is_even, x, y, kNoDexPc)); } - // Guard select with taken test if needed. + if (*needs_taken_test) { - HInstruction* is_taken = nullptr; - if (GenerateCode(context, - loop, - trip->op_b, - /*trip=*/ nullptr, - graph, - block, - /*is_min=*/ false, - graph ? &is_taken : nullptr)) { - if (graph != nullptr) { - ArenaAllocator* allocator = graph->GetAllocator(); - *result = Insert(block, new (allocator) HSelect(is_taken, *result, x, kNoDexPc)); - } + if (TryGenerateTakenTest(context, loop, trip->op_b, graph, block, result, x)) { *needs_taken_test = false; // taken care of } else { return false; @@ -1406,7 +1433,8 @@ bool InductionVarRange::GenerateCode(const HBasicBlock* context, HGraph* graph, // when set, code is generated HBasicBlock* block, bool is_min, - /*out*/HInstruction** result) const { + /*out*/ HInstruction** result, + bool allow_potential_overflow) const { if (info != nullptr) { // If during codegen, the result is not needed (nullptr), simply return success. if (graph != nullptr && result == nullptr) { @@ -1431,8 +1459,41 @@ bool InductionVarRange::GenerateCode(const HBasicBlock* context, case HInductionVarAnalysis::kLE: case HInductionVarAnalysis::kGT: case HInductionVarAnalysis::kGE: - if (GenerateCode(context, loop, info->op_a, trip, graph, block, is_min, &opa) && - GenerateCode(context, loop, info->op_b, trip, graph, block, is_min, &opb)) { + if (GenerateCode(context, + loop, + info->op_a, + trip, + graph, + block, + is_min, + &opa, + allow_potential_overflow) && + GenerateCode(context, + loop, + info->op_b, + trip, + graph, + block, + is_min, + &opb, + allow_potential_overflow)) { + // Check for potentially invalid operations. + if (!allow_potential_overflow) { + switch (info->operation) { + case HInductionVarAnalysis::kAdd: + return TryGenerateAddWithoutOverflow( + context, loop, info, graph, opa, opb, result); + case HInductionVarAnalysis::kSub: + return TryGenerateSubWithoutOverflow(context, loop, info, graph, opa, result); + default: + // The rest of the operations are not relevant in the cases where + // `allow_potential_overflow` is false. Fall through to the allowed overflow + // case. + break; + } + } + + // Overflows here are accepted. if (graph != nullptr) { HInstruction* operation = nullptr; switch (info->operation) { @@ -1465,7 +1526,15 @@ bool InductionVarRange::GenerateCode(const HBasicBlock* context, } break; case HInductionVarAnalysis::kNeg: - if (GenerateCode(context, loop, info->op_b, trip, graph, block, !is_min, &opb)) { + if (GenerateCode(context, + loop, + info->op_b, + trip, + graph, + block, + !is_min, + &opb, + allow_potential_overflow)) { if (graph != nullptr) { *result = Insert(block, new (graph->GetAllocator()) HNeg(type, opb)); } @@ -1481,8 +1550,15 @@ bool InductionVarRange::GenerateCode(const HBasicBlock* context, case HInductionVarAnalysis::kTripCountInLoopUnsafe: if (UseFullTripCount(context, loop, is_min)) { // Generate the full trip count (do not subtract 1 as we do in loop body). - return GenerateCode( - context, loop, info->op_a, trip, graph, block, /*is_min=*/ false, result); + return GenerateCode(context, + loop, + info->op_a, + trip, + graph, + block, + /*is_min=*/false, + result, + allow_potential_overflow); } FALLTHROUGH_INTENDED; case HInductionVarAnalysis::kTripCountInBody: @@ -1492,12 +1568,31 @@ bool InductionVarRange::GenerateCode(const HBasicBlock* context, *result = graph->GetConstant(type, 0); } return true; - } else if (IsContextInBody(context, loop)) { - if (GenerateCode(context, loop, info->op_a, trip, graph, block, is_min, &opb)) { + } else if (IsContextInBody(context, loop) || + (context == loop->GetHeader() && !allow_potential_overflow)) { + if (GenerateCode(context, + loop, + info->op_a, + trip, + graph, + block, + is_min, + &opb, + allow_potential_overflow)) { if (graph != nullptr) { - ArenaAllocator* allocator = graph->GetAllocator(); - *result = - Insert(block, new (allocator) HSub(type, opb, graph->GetConstant(type, 1))); + if (IsContextInBody(context, loop)) { + ArenaAllocator* allocator = graph->GetAllocator(); + *result = + Insert(block, new (allocator) HSub(type, opb, graph->GetConstant(type, 1))); + } else { + // We want to generate the full trip count since we want the last value. This + // will be combined with an `is_taken` test so we don't want to subtract one. + DCHECK(context == loop->GetHeader()); + // TODO(solanes): Remove the !allow_potential_overflow restriction and allow + // other parts e.g. BCE to take advantage of this. + DCHECK(!allow_potential_overflow); + *result = opb; + } } return true; } @@ -1519,8 +1614,24 @@ bool InductionVarRange::GenerateCode(const HBasicBlock* context, if (IsConstant(context, loop, info->op_a, kExact, &stride_value) && CanLongValueFitIntoInt(stride_value)) { const bool is_min_a = stride_value >= 0 ? is_min : !is_min; - if (GenerateCode(context, loop, trip, trip, graph, block, is_min_a, &opa) && - GenerateCode(context, loop, info->op_b, trip, graph, block, is_min, &opb)) { + if (GenerateCode(context, + loop, + trip, + trip, + graph, + block, + is_min_a, + &opa, + allow_potential_overflow) && + GenerateCode(context, + loop, + info->op_b, + trip, + graph, + block, + is_min, + &opb, + allow_potential_overflow)) { if (graph != nullptr) { ArenaAllocator* allocator = graph->GetAllocator(); HInstruction* oper; @@ -1562,6 +1673,119 @@ bool InductionVarRange::GenerateCode(const HBasicBlock* context, return false; } +bool InductionVarRange::TryGenerateAddWithoutOverflow(const HBasicBlock* context, + const HLoopInformation* loop, + HInductionVarAnalysis::InductionInfo* info, + HGraph* graph, + /*in*/ HInstruction* opa, + /*in*/ HInstruction* opb, + /*out*/ HInstruction** result) const { + // Calculate `a + b` making sure we can't overflow. + int64_t val_a; + const bool a_is_const = IsConstant(context, loop, info->op_a, kExact, &val_a); + int64_t val_b; + const bool b_is_const = IsConstant(context, loop, info->op_b, kExact, &val_b); + if (a_is_const && b_is_const) { + // Calculate `a + b` and use that. Note that even when the values are known, + // their addition can still overflow. + Value add_val = AddValue(Value(val_a), Value(val_b)); + if (add_val.is_known) { + DCHECK(IsConstantValue(add_val)); + // Known value not overflowing. + if (graph != nullptr) { + *result = graph->GetConstant(info->type, add_val.b_constant); + } + return true; + } + } + + // When `a` is `0`, we can just use `b`. + if (a_is_const && val_a == 0) { + if (graph != nullptr) { + *result = opb; + } + return true; + } + + if (b_is_const && val_b == 0) { + if (graph != nullptr) { + *result = opa; + } + return true; + } + + // Couldn't safely calculate the addition. + return false; +} + +bool InductionVarRange::TryGenerateSubWithoutOverflow(const HBasicBlock* context, + const HLoopInformation* loop, + HInductionVarAnalysis::InductionInfo* info, + HGraph* graph, + /*in*/ HInstruction* opa, + /*out*/ HInstruction** result) const { + // Calculate `a - b` making sure we can't overflow. + int64_t val_b; + if (!IsConstant(context, loop, info->op_b, kExact, &val_b)) { + // If b is unknown, a - b can potentially overflow for any value of a since b + // can be Integer.MIN_VALUE. + return false; + } + + int64_t val_a; + if (IsConstant(context, loop, info->op_a, kExact, &val_a)) { + // Calculate `a - b` and use that. Note that even when the values are known, + // their subtraction can still overflow. + Value sub_val = SubValue(Value(val_a), Value(val_b)); + if (sub_val.is_known) { + DCHECK(IsConstantValue(sub_val)); + // Known value not overflowing. + if (graph != nullptr) { + *result = graph->GetConstant(info->type, sub_val.b_constant); + } + return true; + } + } + + // When `b` is `0`, we can just use `a`. + if (val_b == 0) { + if (graph != nullptr) { + *result = opa; + } + return true; + } + + // Couldn't safely calculate the subtraction. + return false; +} + +bool InductionVarRange::TryGenerateTakenTest(const HBasicBlock* context, + const HLoopInformation* loop, + HInductionVarAnalysis::InductionInfo* info, + HGraph* graph, + HBasicBlock* block, + /*inout*/ HInstruction** result, + /*inout*/ HInstruction* not_taken_result) const { + HInstruction* is_taken = nullptr; + if (GenerateCode(context, + loop, + info, + /*trip=*/nullptr, + graph, + block, + /*is_min=*/false, + graph != nullptr ? &is_taken : nullptr)) { + if (graph != nullptr) { + ArenaAllocator* allocator = graph->GetAllocator(); + *result = + Insert(block, new (allocator) HSelect(is_taken, *result, not_taken_result, kNoDexPc)); + } + return true; + } else { + return false; + } +} + void InductionVarRange::ReplaceInduction(HInductionVarAnalysis::InductionInfo* info, HInstruction* fetch, HInstruction* replacement) { diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 3e1212bec8..a81227b41b 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -325,7 +325,8 @@ class InductionVarRange { HGraph* graph, HBasicBlock* block, bool is_min, - /*out*/ HInstruction** result) const; + /*out*/ HInstruction** result, + /*inout*/ bool* needs_taken_test) const; bool GenerateLastValuePolynomial(const HBasicBlock* context, const HLoopInformation* loop, @@ -357,8 +358,8 @@ class InductionVarRange { HInductionVarAnalysis::InductionInfo* trip, HGraph* graph, HBasicBlock* block, - /*out*/HInstruction** result, - /*out*/ bool* needs_taken_test) const; + /*out*/ HInstruction** result, + /*inout*/ bool* needs_taken_test) const; bool GenerateCode(const HBasicBlock* context, const HLoopInformation* loop, @@ -367,7 +368,34 @@ class InductionVarRange { HGraph* graph, HBasicBlock* block, bool is_min, - /*out*/ HInstruction** result) const; + /*out*/ HInstruction** result, + // TODO(solanes): Remove default value when all cases have been assessed. + bool allow_potential_overflow = true) const; + + bool TryGenerateAddWithoutOverflow(const HBasicBlock* context, + const HLoopInformation* loop, + HInductionVarAnalysis::InductionInfo* info, + HGraph* graph, + /*in*/ HInstruction* opa, + /*in*/ HInstruction* opb, + /*out*/ HInstruction** result) const; + + bool TryGenerateSubWithoutOverflow(const HBasicBlock* context, + const HLoopInformation* loop, + HInductionVarAnalysis::InductionInfo* info, + HGraph* graph, + /*in*/ HInstruction* opa, + /*out*/ HInstruction** result) const; + + // Try to guard the taken test with an HSelect instruction. Returns true if it can generate the + // code, or false otherwise. The caller is responsible of updating `needs_taken_test`. + bool TryGenerateTakenTest(const HBasicBlock* context, + const HLoopInformation* loop, + HInductionVarAnalysis::InductionInfo* info, + HGraph* graph, + HBasicBlock* block, + /*inout*/ HInstruction** result, + /*inout*/ HInstruction* not_taken_result) const; void ReplaceInduction(HInductionVarAnalysis::InductionInfo* info, HInstruction* fetch, diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index d879897959..40fb0d6092 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -1061,11 +1061,13 @@ TEST_F(InductionVarRangeTest, ConstantTripCountDown) { range_.CanGenerateRange(exit->GetBlock(), exit, &needs_finite_test, &needs_taken_test)); EXPECT_FALSE(range_.CanGenerateLastValue(exit)); - // Last value (unsimplified). + // Last value (unsimplified). We expect Sub(1000, Neg(-1000)) which is equivalent to Sub(1000, + // 1000) aka 0. HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_); ASSERT_TRUE(last->IsSub()); ExpectInt(1000, last->InputAt(0)); - ExpectInt(1000, last->InputAt(1)); + ASSERT_TRUE(last->InputAt(1)->IsNeg()); + ExpectInt(-1000, last->InputAt(1)->AsNeg()->InputAt(0)); // Loop logic. int64_t tc = 0; diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 5a4478dc14..35582297f3 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -144,11 +144,6 @@ bool HInliner::Run() { } bool did_inline = false; - // The inliner is the only phase that sets invokes as `always throwing`, and since we only run the - // inliner once per graph this value should always be false at the beginning of the inlining - // phase. This is important since we use `HasAlwaysThrowingInvokes` to know whether the inliner - // phase performed a relevant change in the graph. - DCHECK(!graph_->HasAlwaysThrowingInvokes()); // Initialize the number of instructions for the method being compiled. Recursive calls // to HInliner::Run have already updated the instruction count. @@ -180,7 +175,7 @@ bool HInliner::Run() { for (HBasicBlock* block : blocks) { for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr;) { HInstruction* next = instruction->GetNext(); - HInvoke* call = instruction->AsInvoke(); + HInvoke* call = instruction->AsInvokeOrNull(); // As long as the call is not intrinsified, it is worth trying to inline. if (call != nullptr && !codegen_->IsImplementedIntrinsic(call)) { if (honor_noinline_directives) { @@ -210,6 +205,16 @@ bool HInliner::Run() { // We return true if we either inlined at least one method, or we marked one of our methods as // always throwing. + // To check if we added an always throwing method we can either: + // 1) Pass a boolean throughout the pipeline and get an accurate result, or + // 2) Just check that the `HasAlwaysThrowingInvokes()` flag is true now. This is not 100% + // accurate but the only other part where we set `HasAlwaysThrowingInvokes` is constant + // folding the DivideUnsigned intrinsics for when the divisor is known to be 0. This case is + // rare enough that changing the pipeline for this is not worth it. In the case of the false + // positive (i.e. A) we didn't inline at all, B) the graph already had an always throwing + // invoke, and C) we didn't set any new always throwing invokes), we will be running constant + // folding, instruction simplifier, and dead code elimination one more time even though it + // shouldn't change things. There's no false negative case. return did_inline || graph_->HasAlwaysThrowingInvokes(); } @@ -223,7 +228,7 @@ static bool IsMethodOrDeclaringClassFinal(ArtMethod* method) * the actual runtime target of an interface or virtual call. * Return nullptr if the runtime target cannot be proven. */ -static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke) +static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ReferenceTypeInfo info) REQUIRES_SHARED(Locks::mutator_lock_) { ArtMethod* resolved_method = invoke->GetResolvedMethod(); if (IsMethodOrDeclaringClassFinal(resolved_method)) { @@ -231,20 +236,7 @@ static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke) return resolved_method; } - HInstruction* receiver = invoke->InputAt(0); - if (receiver->IsNullCheck()) { - // Due to multiple levels of inlining within the same pass, it might be that - // null check does not have the reference type of the actual receiver. - receiver = receiver->InputAt(0); - } - ReferenceTypeInfo info = receiver->GetReferenceTypeInfo(); - DCHECK(info.IsValid()) << "Invalid RTI for " << receiver->DebugName(); - if (!info.IsExact()) { - // We currently only support inlining with known receivers. - // TODO: Remove this check, we should be able to inline final methods - // on unknown receivers. - return nullptr; - } else if (info.GetTypeHandle()->IsInterface()) { + if (info.GetTypeHandle()->IsInterface()) { // Statically knowing that the receiver has an interface type cannot // help us find what is the target method. return nullptr; @@ -336,8 +328,8 @@ static dex::TypeIndex FindClassIndexIn(ObjPtr<mirror::Class> cls, HInliner::InlineCacheType HInliner::GetInlineCacheType( const StackHandleScope<InlineCache::kIndividualCacheSize>& classes) { - DCHECK_EQ(classes.NumberOfReferences(), InlineCache::kIndividualCacheSize); - uint8_t number_of_types = InlineCache::kIndividualCacheSize - classes.RemainingSlots(); + DCHECK_EQ(classes.Capacity(), InlineCache::kIndividualCacheSize); + uint8_t number_of_types = classes.Size(); if (number_of_types == 0) { return kInlineCacheUninitialized; } else if (number_of_types == 1) { @@ -472,15 +464,31 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { return false; } - ArtMethod* actual_method = invoke_instruction->IsInvokeStaticOrDirect() - ? invoke_instruction->GetResolvedMethod() - : FindVirtualOrInterfaceTarget(invoke_instruction); + ArtMethod* actual_method = nullptr; + ReferenceTypeInfo receiver_info = ReferenceTypeInfo::CreateInvalid(); + if (invoke_instruction->GetInvokeType() == kStatic) { + actual_method = invoke_instruction->GetResolvedMethod(); + } else { + HInstruction* receiver = invoke_instruction->InputAt(0); + while (receiver->IsNullCheck()) { + // Due to multiple levels of inlining within the same pass, it might be that + // null check does not have the reference type of the actual receiver. + receiver = receiver->InputAt(0); + } + receiver_info = receiver->GetReferenceTypeInfo(); + DCHECK(receiver_info.IsValid()) << "Invalid RTI for " << receiver->DebugName(); + if (invoke_instruction->IsInvokeStaticOrDirect()) { + actual_method = invoke_instruction->GetResolvedMethod(); + } else { + actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, receiver_info); + } + } if (actual_method != nullptr) { // Single target. bool result = TryInlineAndReplace(invoke_instruction, actual_method, - ReferenceTypeInfo::CreateInvalid(), + receiver_info, /* do_rtp= */ true, /* is_speculative= */ false); if (result) { @@ -541,9 +549,10 @@ bool HInliner::TryInlineFromCHA(HInvoke* invoke_instruction) { uint32_t dex_pc = invoke_instruction->GetDexPc(); HInstruction* cursor = invoke_instruction->GetPrevious(); HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); + Handle<mirror::Class> cls = graph_->GetHandleCache()->NewHandle(method->GetDeclaringClass()); if (!TryInlineAndReplace(invoke_instruction, method, - ReferenceTypeInfo::CreateInvalid(), + ReferenceTypeInfo::Create(cls), /* do_rtp= */ true, /* is_speculative= */ true)) { return false; @@ -660,17 +669,23 @@ HInliner::InlineCacheType HInliner::GetInlineCacheJIT( return kInlineCacheNoData; } - Runtime::Current()->GetJit()->GetCodeCache()->CopyInlineCacheInto( - *profiling_info->GetInlineCache(invoke_instruction->GetDexPc()), - classes); + InlineCache* cache = profiling_info->GetInlineCache(invoke_instruction->GetDexPc()); + if (cache == nullptr) { + // This shouldn't happen, but we don't guarantee that method resolution + // between baseline compilation and optimizing compilation is identical. Be robust, + // warn about it, and return that we don't have any inline cache data. + LOG(WARNING) << "No inline cache found for " << caller->PrettyMethod(); + return kInlineCacheNoData; + } + Runtime::Current()->GetJit()->GetCodeCache()->CopyInlineCacheInto(*cache, classes); return GetInlineCacheType(*classes); } HInliner::InlineCacheType HInliner::GetInlineCacheAOT( HInvoke* invoke_instruction, /*out*/StackHandleScope<InlineCache::kIndividualCacheSize>* classes) { - DCHECK_EQ(classes->NumberOfReferences(), InlineCache::kIndividualCacheSize); - DCHECK_EQ(classes->RemainingSlots(), InlineCache::kIndividualCacheSize); + DCHECK_EQ(classes->Capacity(), InlineCache::kIndividualCacheSize); + DCHECK_EQ(classes->Size(), 0u); const ProfileCompilationInfo* pci = codegen_->GetCompilerOptions().GetProfileCompilationInfo(); if (pci == nullptr) { @@ -702,19 +717,21 @@ HInliner::InlineCacheType HInliner::GetInlineCacheAOT( // Walk over the class descriptors and look up the actual classes. // If we cannot find a type we return kInlineCacheMissingTypes. ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); + Thread* self = Thread::Current(); for (const dex::TypeIndex& type_index : dex_pc_data.classes) { const DexFile* dex_file = caller_compilation_unit_.GetDexFile(); const char* descriptor = pci->GetTypeDescriptor(dex_file, type_index); - ObjPtr<mirror::ClassLoader> class_loader = caller_compilation_unit_.GetClassLoader().Get(); - ObjPtr<mirror::Class> clazz = class_linker->LookupResolvedType(descriptor, class_loader); + ObjPtr<mirror::Class> clazz = + class_linker->FindClass(self, descriptor, caller_compilation_unit_.GetClassLoader()); if (clazz == nullptr) { + self->ClearException(); // Clean up the exception left by type resolution. VLOG(compiler) << "Could not find class from inline cache in AOT mode " << invoke_instruction->GetMethodReference().PrettyMethod() << " : " << descriptor; return kInlineCacheMissingTypes; } - DCHECK_NE(classes->RemainingSlots(), 0u); + DCHECK_LT(classes->Size(), classes->Capacity()); classes->NewHandle(clazz); } @@ -965,8 +982,8 @@ bool HInliner::TryInlinePolymorphicCall( bool all_targets_inlined = true; bool one_target_inlined = false; - DCHECK_EQ(classes.NumberOfReferences(), InlineCache::kIndividualCacheSize); - uint8_t number_of_types = InlineCache::kIndividualCacheSize - classes.RemainingSlots(); + DCHECK_EQ(classes.Capacity(), InlineCache::kIndividualCacheSize); + uint8_t number_of_types = classes.Size(); for (size_t i = 0; i != number_of_types; ++i) { DCHECK(classes.GetReference(i) != nullptr); Handle<mirror::Class> handle = @@ -1152,8 +1169,8 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( // Check whether we are actually calling the same method among // the different types seen. - DCHECK_EQ(classes.NumberOfReferences(), InlineCache::kIndividualCacheSize); - uint8_t number_of_types = InlineCache::kIndividualCacheSize - classes.RemainingSlots(); + DCHECK_EQ(classes.Capacity(), InlineCache::kIndividualCacheSize); + uint8_t number_of_types = classes.Size(); for (size_t i = 0; i != number_of_types; ++i) { DCHECK(classes.GetReference(i) != nullptr); ArtMethod* new_method = nullptr; @@ -1184,9 +1201,11 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); HInstruction* return_replacement = nullptr; + Handle<mirror::Class> cls = + graph_->GetHandleCache()->NewHandle(actual_method->GetDeclaringClass()); if (!TryBuildAndInline(invoke_instruction, actual_method, - ReferenceTypeInfo::CreateInvalid(), + ReferenceTypeInfo::Create(cls), &return_replacement, /* is_speculative= */ true)) { return false; @@ -2062,7 +2081,8 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, ReferenceTypeInfo receiver_type, HInstruction** return_replacement, bool is_speculative) { - DCHECK(!(resolved_method->IsStatic() && receiver_type.IsValid())); + DCHECK_IMPLIES(resolved_method->IsStatic(), !receiver_type.IsValid()); + DCHECK_IMPLIES(!resolved_method->IsStatic(), receiver_type.IsValid()); const dex::CodeItem* code_item = resolved_method->GetCodeItem(); const DexFile& callee_dex_file = *resolved_method->GetDexFile(); uint32_t method_index = resolved_method->GetDexMethodIndex(); diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index fee9091145..fe0f3fe319 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -665,22 +665,31 @@ void HInstructionBuilder::InitializeParameters() { } } -template<typename T> -void HInstructionBuilder::If_22t(const Instruction& instruction, uint32_t dex_pc) { - HInstruction* first = LoadLocal(instruction.VRegA(), DataType::Type::kInt32); - HInstruction* second = LoadLocal(instruction.VRegB(), DataType::Type::kInt32); - T* comparison = new (allocator_) T(first, second, dex_pc); - AppendInstruction(comparison); - AppendInstruction(new (allocator_) HIf(comparison, dex_pc)); - current_block_ = nullptr; -} - -template<typename T> -void HInstructionBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) { +template<typename T, bool kCompareWithZero> +void HInstructionBuilder::If_21_22t(const Instruction& instruction, uint32_t dex_pc) { HInstruction* value = LoadLocal(instruction.VRegA(), DataType::Type::kInt32); - T* comparison = new (allocator_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc); + T* comparison = nullptr; + if (kCompareWithZero) { + comparison = new (allocator_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc); + } else { + HInstruction* second = LoadLocal(instruction.VRegB(), DataType::Type::kInt32); + comparison = new (allocator_) T(value, second, dex_pc); + } AppendInstruction(comparison); - AppendInstruction(new (allocator_) HIf(comparison, dex_pc)); + HIf* if_instr = new (allocator_) HIf(comparison, dex_pc); + + ProfilingInfo* info = graph_->GetProfilingInfo(); + if (info != nullptr && !graph_->IsCompilingBaseline()) { + BranchCache* cache = info->GetBranchCache(dex_pc); + if (cache != nullptr) { + if_instr->SetTrueCount(cache->GetTrue()); + if_instr->SetFalseCount(cache->GetFalse()); + } + } + + // Append after setting true/false count, so that the builder knows if the + // instruction needs an environment. + AppendInstruction(if_instr); current_block_ = nullptr; } @@ -1364,8 +1373,7 @@ bool HInstructionBuilder::BuildInvokePolymorphic(uint32_t dex_pc, method_reference, resolved_method, resolved_method_reference, - proto_idx, - !graph_->IsDebuggable()); + proto_idx); if (!HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false)) { return false; } @@ -2365,9 +2373,9 @@ void HInstructionBuilder::BuildCheckedDivRem(uint16_t out_vreg, second = LoadLocal(second_vreg_or_constant, type); } - if (!second_is_constant - || (type == DataType::Type::kInt32 && second->AsIntConstant()->GetValue() == 0) - || (type == DataType::Type::kInt64 && second->AsLongConstant()->GetValue() == 0)) { + if (!second_is_constant || + (type == DataType::Type::kInt32 && second->AsIntConstant()->GetValue() == 0) || + (type == DataType::Type::kInt64 && second->AsLongConstant()->GetValue() == 0)) { second = new (allocator_) HDivZeroCheck(second, dex_pc); AppendInstruction(second); } @@ -2691,6 +2699,9 @@ void HInstructionBuilder::BuildLoadMethodType(dex::ProtoIndex proto_index, uint3 const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); HLoadMethodType* load_method_type = new (allocator_) HLoadMethodType(graph_->GetCurrentMethod(), proto_index, dex_file, dex_pc); + if (!code_generator_->GetCompilerOptions().IsJitCompiler()) { + load_method_type->SetLoadKind(HLoadMethodType::LoadKind::kBssEntry); + } AppendInstruction(load_method_type); } @@ -2880,8 +2891,12 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, } #define IF_XX(comparison, cond) \ - case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_pc); break; \ - case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_pc); break + case Instruction::IF_##cond: \ + If_21_22t<comparison, /* kCompareWithZero= */ false>(instruction, dex_pc); \ + break; \ + case Instruction::IF_##cond##Z: \ + If_21_22t<comparison, /* kCompareWithZero= */ true>(instruction, dex_pc); \ + break; IF_XX(HEqual, EQ); IF_XX(HNotEqual, NE); diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 3d65d8fb54..5c165d7bf9 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -116,8 +116,8 @@ class HInstructionBuilder : public ValueObject { template<typename T> void Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc); - template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_pc); - template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_pc); + template<typename T, bool kCompareWithZero> + void If_21_22t(const Instruction& instruction, uint32_t dex_pc); void Conversion_12x(const Instruction& instruction, DataType::Type input_type, diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 0c2fd5de56..5d552411db 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -22,6 +22,7 @@ #include "data_type-inl.h" #include "driver/compiler_options.h" #include "escape.h" +#include "intrinsic_objects.h" #include "intrinsics.h" #include "intrinsics_utils.h" #include "mirror/class-inl.h" @@ -30,6 +31,7 @@ #include "scoped_thread_state_change-inl.h" #include "sharpening.h" #include "string_builder_append.h" +#include "well_known_classes.h" namespace art HIDDEN { @@ -113,7 +115,7 @@ class InstructionSimplifierVisitor final : public HGraphDelegateVisitor { void VisitInvoke(HInvoke* invoke) override; void VisitDeoptimize(HDeoptimize* deoptimize) override; void VisitVecMul(HVecMul* instruction) override; - void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* instruction) override; + void SimplifyBoxUnbox(HInvoke* instruction, ArtField* field, DataType::Type type); void SimplifySystemArrayCopy(HInvoke* invoke); void SimplifyStringEquals(HInvoke* invoke); void SimplifyFP2Int(HInvoke* invoke); @@ -947,67 +949,6 @@ static HInstruction* AllowInMinMax(IfCondition cmp, return nullptr; } -// TODO This should really be done by LSE itself since there is significantly -// more information available there. -void InstructionSimplifierVisitor::VisitPredicatedInstanceFieldGet( - HPredicatedInstanceFieldGet* pred_get) { - HInstruction* target = pred_get->GetTarget(); - HInstruction* default_val = pred_get->GetDefaultValue(); - if (target->IsNullConstant()) { - pred_get->ReplaceWith(default_val); - pred_get->GetBlock()->RemoveInstruction(pred_get); - RecordSimplification(); - return; - } else if (!target->CanBeNull()) { - HInstruction* replace_with = new (GetGraph()->GetAllocator()) - HInstanceFieldGet(pred_get->GetTarget(), - pred_get->GetFieldInfo().GetField(), - pred_get->GetFieldType(), - pred_get->GetFieldOffset(), - pred_get->IsVolatile(), - pred_get->GetFieldInfo().GetFieldIndex(), - pred_get->GetFieldInfo().GetDeclaringClassDefIndex(), - pred_get->GetFieldInfo().GetDexFile(), - pred_get->GetDexPc()); - if (pred_get->GetType() == DataType::Type::kReference) { - replace_with->SetReferenceTypeInfoIfValid(pred_get->GetReferenceTypeInfo()); - } - pred_get->GetBlock()->InsertInstructionBefore(replace_with, pred_get); - pred_get->ReplaceWith(replace_with); - pred_get->GetBlock()->RemoveInstruction(pred_get); - RecordSimplification(); - return; - } - if (!target->IsPhi() || !default_val->IsPhi() || default_val->GetBlock() != target->GetBlock()) { - // The iget has already been reduced. We know the target or the phi - // selection will differ between the target and default. - return; - } - DCHECK_EQ(default_val->InputCount(), target->InputCount()); - // In the same block both phis only one non-null we can remove the phi from default_val. - HInstruction* single_value = nullptr; - auto inputs = target->GetInputs(); - for (auto [input, idx] : ZipCount(MakeIterationRange(inputs))) { - if (input->CanBeNull()) { - if (single_value == nullptr) { - single_value = default_val->InputAt(idx); - } else if (single_value != default_val->InputAt(idx) && - !single_value->Equals(default_val->InputAt(idx))) { - // Multiple values are associated with potential nulls, can't combine. - return; - } - } - } - DCHECK(single_value != nullptr) << "All target values are non-null but the phi as a whole still" - << " can be null? This should not be possible." << std::endl - << pred_get->DumpWithArgs(); - if (single_value->StrictlyDominates(pred_get)) { - // Combine all the maybe null values into one. - pred_get->ReplaceInput(single_value, 0); - RecordSimplification(); - } -} - void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { HInstruction* replace_with = nullptr; HInstruction* condition = select->GetCondition(); @@ -1050,51 +991,60 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { HInstruction* b = condition->InputAt(1); DataType::Type t_type = true_value->GetType(); DataType::Type f_type = false_value->GetType(); - // Here we have a <cmp> b ? true_value : false_value. - // Test if both values are compatible integral types (resulting MIN/MAX/ABS - // type will be int or long, like the condition). Replacements are general, - // but assume conditions prefer constants on the right. if (DataType::IsIntegralType(t_type) && DataType::Kind(t_type) == DataType::Kind(f_type)) { - // Allow a < 100 ? max(a, -100) : .. - // or a > -100 ? min(a, 100) : .. - // to use min/max instead of a to detect nested min/max expressions. - HInstruction* new_a = AllowInMinMax(cmp, a, b, true_value); - if (new_a != nullptr) { - a = new_a; - } - // Try to replace typical integral MIN/MAX/ABS constructs. - if ((cmp == kCondLT || cmp == kCondLE || cmp == kCondGT || cmp == kCondGE) && - ((a == true_value && b == false_value) || - (b == true_value && a == false_value))) { - // Found a < b ? a : b (MIN) or a < b ? b : a (MAX) - // or a > b ? a : b (MAX) or a > b ? b : a (MIN). - bool is_min = (cmp == kCondLT || cmp == kCondLE) == (a == true_value); - replace_with = NewIntegralMinMax(GetGraph()->GetAllocator(), a, b, select, is_min); - } else if (((cmp == kCondLT || cmp == kCondLE) && true_value->IsNeg()) || - ((cmp == kCondGT || cmp == kCondGE) && false_value->IsNeg())) { - bool negLeft = (cmp == kCondLT || cmp == kCondLE); - HInstruction* the_negated = negLeft ? true_value->InputAt(0) : false_value->InputAt(0); - HInstruction* not_negated = negLeft ? false_value : true_value; - if (a == the_negated && a == not_negated && IsInt64Value(b, 0)) { - // Found a < 0 ? -a : a - // or a > 0 ? a : -a - // which can be replaced by ABS(a). - replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), a, select); + if (cmp == kCondEQ || cmp == kCondNE) { + // Turns + // * Select[a, b, EQ(a,b)] / Select[a, b, EQ(b,a)] into a + // * Select[a, b, NE(a,b)] / Select[a, b, NE(b,a)] into b + // Note that the order in EQ/NE is irrelevant. + if ((a == true_value && b == false_value) || (a == false_value && b == true_value)) { + replace_with = cmp == kCondEQ ? false_value : true_value; + } + } else { + // Test if both values are compatible integral types (resulting MIN/MAX/ABS + // type will be int or long, like the condition). Replacements are general, + // but assume conditions prefer constants on the right. + + // Allow a < 100 ? max(a, -100) : .. + // or a > -100 ? min(a, 100) : .. + // to use min/max instead of a to detect nested min/max expressions. + HInstruction* new_a = AllowInMinMax(cmp, a, b, true_value); + if (new_a != nullptr) { + a = new_a; } - } else if (true_value->IsSub() && false_value->IsSub()) { - HInstruction* true_sub1 = true_value->InputAt(0); - HInstruction* true_sub2 = true_value->InputAt(1); - HInstruction* false_sub1 = false_value->InputAt(0); - HInstruction* false_sub2 = false_value->InputAt(1); - if ((((cmp == kCondGT || cmp == kCondGE) && - (a == true_sub1 && b == true_sub2 && a == false_sub2 && b == false_sub1)) || - ((cmp == kCondLT || cmp == kCondLE) && - (a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) && - AreLowerPrecisionArgs(t_type, a, b)) { - // Found a > b ? a - b : b - a - // or a < b ? b - a : a - b - // which can be replaced by ABS(a - b) for lower precision operands a, b. - replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select); + // Try to replace typical integral MIN/MAX/ABS constructs. + if ((cmp == kCondLT || cmp == kCondLE || cmp == kCondGT || cmp == kCondGE) && + ((a == true_value && b == false_value) || (b == true_value && a == false_value))) { + // Found a < b ? a : b (MIN) or a < b ? b : a (MAX) + // or a > b ? a : b (MAX) or a > b ? b : a (MIN). + bool is_min = (cmp == kCondLT || cmp == kCondLE) == (a == true_value); + replace_with = NewIntegralMinMax(GetGraph()->GetAllocator(), a, b, select, is_min); + } else if (((cmp == kCondLT || cmp == kCondLE) && true_value->IsNeg()) || + ((cmp == kCondGT || cmp == kCondGE) && false_value->IsNeg())) { + bool negLeft = (cmp == kCondLT || cmp == kCondLE); + HInstruction* the_negated = negLeft ? true_value->InputAt(0) : false_value->InputAt(0); + HInstruction* not_negated = negLeft ? false_value : true_value; + if (a == the_negated && a == not_negated && IsInt64Value(b, 0)) { + // Found a < 0 ? -a : a + // or a > 0 ? a : -a + // which can be replaced by ABS(a). + replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), a, select); + } + } else if (true_value->IsSub() && false_value->IsSub()) { + HInstruction* true_sub1 = true_value->InputAt(0); + HInstruction* true_sub2 = true_value->InputAt(1); + HInstruction* false_sub1 = false_value->InputAt(0); + HInstruction* false_sub2 = false_value->InputAt(1); + if ((((cmp == kCondGT || cmp == kCondGE) && + (a == true_sub1 && b == true_sub2 && a == false_sub2 && b == false_sub1)) || + ((cmp == kCondLT || cmp == kCondLE) && + (a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) && + AreLowerPrecisionArgs(t_type, a, b)) { + // Found a > b ? a - b : b - a + // or a < b ? b - a : a - b + // which can be replaced by ABS(a - b) for lower precision operands a, b. + replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select); + } } } } @@ -1222,9 +1172,6 @@ static inline bool TryReplaceFieldOrArrayGetType(HInstruction* maybe_get, DataTy if (maybe_get->IsInstanceFieldGet()) { maybe_get->AsInstanceFieldGet()->SetType(new_type); return true; - } else if (maybe_get->IsPredicatedInstanceFieldGet()) { - maybe_get->AsPredicatedInstanceFieldGet()->SetType(new_type); - return true; } else if (maybe_get->IsStaticFieldGet()) { maybe_get->AsStaticFieldGet()->SetType(new_type); return true; @@ -1456,24 +1403,26 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { } } - HNeg* neg = left_is_neg ? left->AsNeg() : right->AsNeg(); - if (left_is_neg != right_is_neg && neg->HasOnlyOneNonEnvironmentUse()) { - // Replace code looking like - // NEG tmp, b - // ADD dst, a, tmp - // with - // SUB dst, a, b - // We do not perform the optimization if the input negation has environment - // uses or multiple non-environment uses as it could lead to worse code. In - // particular, we do not want the live range of `b` to be extended if we are - // not sure the initial 'NEG' instruction can be removed. - HInstruction* other = left_is_neg ? right : left; - HSub* sub = - new(GetGraph()->GetAllocator()) HSub(instruction->GetType(), other, neg->GetInput()); - instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub); - RecordSimplification(); - neg->GetBlock()->RemoveInstruction(neg); - return; + if (left_is_neg != right_is_neg) { + HNeg* neg = left_is_neg ? left->AsNeg() : right->AsNeg(); + if (neg->HasOnlyOneNonEnvironmentUse()) { + // Replace code looking like + // NEG tmp, b + // ADD dst, a, tmp + // with + // SUB dst, a, b + // We do not perform the optimization if the input negation has environment + // uses or multiple non-environment uses as it could lead to worse code. In + // particular, we do not want the live range of `b` to be extended if we are + // not sure the initial 'NEG' instruction can be removed. + HInstruction* other = left_is_neg ? right : left; + HSub* sub = + new(GetGraph()->GetAllocator()) HSub(instruction->GetType(), other, neg->GetInput()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub); + RecordSimplification(); + neg->GetBlock()->RemoveInstruction(neg); + return; + } } if (TryReplaceWithRotate(instruction)) { @@ -1676,7 +1625,7 @@ static bool RecognizeAndSimplifyClassCheck(HCondition* condition) { HInstruction* input_two = condition->InputAt(1); HLoadClass* load_class = input_one->IsLoadClass() ? input_one->AsLoadClass() - : input_two->AsLoadClass(); + : input_two->AsLoadClassOrNull(); if (load_class == nullptr) { return false; } @@ -1688,8 +1637,8 @@ static bool RecognizeAndSimplifyClassCheck(HCondition* condition) { } HInstanceFieldGet* field_get = (load_class == input_one) - ? input_two->AsInstanceFieldGet() - : input_one->AsInstanceFieldGet(); + ? input_two->AsInstanceFieldGetOrNull() + : input_one->AsInstanceFieldGetOrNull(); if (field_get == nullptr) { return false; } @@ -2240,6 +2189,7 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) { } if (left->IsAdd()) { + // Cases (x + y) - y = x, and (x + y) - x = y. // Replace code patterns looking like // ADD dst1, x, y ADD dst1, x, y // SUB dst2, dst1, y SUB dst2, dst1, x @@ -2248,14 +2198,75 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) { // SUB instruction is not needed in this case, we may use // one of inputs of ADD instead. // It is applicable to integral types only. + HAdd* add = left->AsAdd(); DCHECK(DataType::IsIntegralType(type)); - if (left->InputAt(1) == right) { - instruction->ReplaceWith(left->InputAt(0)); + if (add->GetRight() == right) { + instruction->ReplaceWith(add->GetLeft()); RecordSimplification(); instruction->GetBlock()->RemoveInstruction(instruction); return; - } else if (left->InputAt(0) == right) { - instruction->ReplaceWith(left->InputAt(1)); + } else if (add->GetLeft() == right) { + instruction->ReplaceWith(add->GetRight()); + RecordSimplification(); + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + } else if (right->IsAdd()) { + // Cases y - (x + y) = -x, and x - (x + y) = -y. + // Replace code patterns looking like + // ADD dst1, x, y ADD dst1, x, y + // SUB dst2, y, dst1 SUB dst2, x, dst1 + // with + // ADD dst1, x, y ADD dst1, x, y + // NEG x NEG y + // SUB instruction is not needed in this case, we may use + // one of inputs of ADD instead with a NEG. + // It is applicable to integral types only. + HAdd* add = right->AsAdd(); + DCHECK(DataType::IsIntegralType(type)); + if (add->GetRight() == left) { + HNeg* neg = new (GetGraph()->GetAllocator()) HNeg(add->GetType(), add->GetLeft()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, neg); + RecordSimplification(); + return; + } else if (add->GetLeft() == left) { + HNeg* neg = new (GetGraph()->GetAllocator()) HNeg(add->GetType(), add->GetRight()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, neg); + RecordSimplification(); + return; + } + } else if (left->IsSub()) { + // Case (x - y) - x = -y. + // Replace code patterns looking like + // SUB dst1, x, y + // SUB dst2, dst1, x + // with + // SUB dst1, x, y + // NEG y + // The second SUB is not needed in this case, we may use the second input of the first SUB + // instead with a NEG. + // It is applicable to integral types only. + HSub* sub = left->AsSub(); + DCHECK(DataType::IsIntegralType(type)); + if (sub->GetLeft() == right) { + HNeg* neg = new (GetGraph()->GetAllocator()) HNeg(sub->GetType(), sub->GetRight()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, neg); + RecordSimplification(); + return; + } + } else if (right->IsSub()) { + // Case x - (x - y) = y. + // Replace code patterns looking like + // SUB dst1, x, y + // SUB dst2, x, dst1 + // with + // SUB dst1, x, y + // The second SUB is not needed in this case, we may use the second input of the first SUB. + // It is applicable to integral types only. + HSub* sub = right->AsSub(); + DCHECK(DataType::IsIntegralType(type)); + if (sub->GetLeft() == left) { + instruction->ReplaceWith(sub->GetRight()); RecordSimplification(); instruction->GetBlock()->RemoveInstruction(instruction); return; @@ -2334,6 +2345,29 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) { TryHandleAssociativeAndCommutativeOperation(instruction); } +void InstructionSimplifierVisitor::SimplifyBoxUnbox( + HInvoke* instruction, ArtField* field, DataType::Type type) { + DCHECK(instruction->GetIntrinsic() == Intrinsics::kByteValueOf || + instruction->GetIntrinsic() == Intrinsics::kShortValueOf || + instruction->GetIntrinsic() == Intrinsics::kCharacterValueOf || + instruction->GetIntrinsic() == Intrinsics::kIntegerValueOf); + const HUseList<HInstruction*>& uses = instruction->GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end;) { + HInstruction* user = it->GetUser(); + ++it; // Increment the iterator before we potentially remove the node from the list. + if (user->IsInstanceFieldGet() && + user->AsInstanceFieldGet()->GetFieldInfo().GetField() == field && + // Note: Due to other simplifications, we may have an `HInstanceFieldGet` with + // a different type (Int8 vs. Uint8, Int16 vs. Uint16) for the same field. + // Do not optimize that case for now. (We would need to insert a `HTypeConversion`.) + user->GetType() == type) { + user->ReplaceWith(instruction->InputAt(0)); + RecordSimplification(); + // Do not remove `user` while we're iterating over the block's instructions. Let DCE do it. + } + } +} + void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) { HInstruction* argument = instruction->InputAt(1); HInstruction* receiver = instruction->InputAt(0); @@ -2372,7 +2406,9 @@ static bool IsArrayLengthOf(HInstruction* potential_length, HInstruction* potent void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) { HInstruction* source = instruction->InputAt(0); + HInstruction* source_pos = instruction->InputAt(1); HInstruction* destination = instruction->InputAt(2); + HInstruction* destination_pos = instruction->InputAt(3); HInstruction* count = instruction->InputAt(4); SystemArrayCopyOptimizations optimizations(instruction); if (CanEnsureNotNullAt(source, instruction)) { @@ -2385,6 +2421,10 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) optimizations.SetDestinationIsSource(); } + if (source_pos == destination_pos) { + optimizations.SetSourcePositionIsDestinationPosition(); + } + if (IsArrayLengthOf(count, source)) { optimizations.SetCountIsSourceLength(); } @@ -2985,6 +3025,12 @@ bool InstructionSimplifierVisitor::CanUseKnownBootImageVarHandle(HInvoke* invoke void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { switch (instruction->GetIntrinsic()) { +#define SIMPLIFY_BOX_UNBOX(name, low, high, type, start_index) \ + case Intrinsics::k ## name ## ValueOf: \ + SimplifyBoxUnbox(instruction, WellKnownClasses::java_lang_##name##_value, type); \ + break; + BOXED_TYPES(SIMPLIFY_BOX_UNBOX) +#undef SIMPLIFY_BOX_UNBOX case Intrinsics::kStringEquals: SimplifyStringEquals(instruction); break; @@ -3063,43 +3109,6 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { case Intrinsics::kVarHandleWeakCompareAndSetRelease: SimplifyVarHandleIntrinsic(instruction); break; - case Intrinsics::kIntegerRotateRight: - case Intrinsics::kLongRotateRight: - case Intrinsics::kIntegerRotateLeft: - case Intrinsics::kLongRotateLeft: - case Intrinsics::kIntegerCompare: - case Intrinsics::kLongCompare: - case Intrinsics::kIntegerSignum: - case Intrinsics::kLongSignum: - case Intrinsics::kFloatIsNaN: - case Intrinsics::kDoubleIsNaN: - case Intrinsics::kStringIsEmpty: - case Intrinsics::kUnsafeLoadFence: - case Intrinsics::kUnsafeStoreFence: - case Intrinsics::kUnsafeFullFence: - case Intrinsics::kJdkUnsafeLoadFence: - case Intrinsics::kJdkUnsafeStoreFence: - case Intrinsics::kJdkUnsafeFullFence: - case Intrinsics::kVarHandleFullFence: - case Intrinsics::kVarHandleAcquireFence: - case Intrinsics::kVarHandleReleaseFence: - case Intrinsics::kVarHandleLoadLoadFence: - case Intrinsics::kVarHandleStoreStoreFence: - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: - case Intrinsics::kMathAbsInt: - case Intrinsics::kMathAbsLong: - case Intrinsics::kMathAbsFloat: - case Intrinsics::kMathAbsDouble: - // These are replaced by intermediate representation in the instruction builder. - LOG(FATAL) << "Unexpected " << static_cast<Intrinsics>(instruction->GetIntrinsic()); - UNREACHABLE(); default: break; } @@ -3215,7 +3224,7 @@ bool InstructionSimplifierVisitor::TrySubtractionChainSimplification( HInstruction* left = instruction->GetLeft(); HInstruction* right = instruction->GetRight(); // Variable names as described above. - HConstant* const2 = right->IsConstant() ? right->AsConstant() : left->AsConstant(); + HConstant* const2 = right->IsConstant() ? right->AsConstant() : left->AsConstantOrNull(); if (const2 == nullptr) { return false; } @@ -3231,7 +3240,7 @@ bool InstructionSimplifierVisitor::TrySubtractionChainSimplification( } left = y->GetLeft(); - HConstant* const1 = left->IsConstant() ? left->AsConstant() : y->GetRight()->AsConstant(); + HConstant* const1 = left->IsConstant() ? left->AsConstant() : y->GetRight()->AsConstantOrNull(); if (const1 == nullptr) { return false; } diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc index 05a518d544..be4371f734 100644 --- a/compiler/optimizing/instruction_simplifier_arm.cc +++ b/compiler/optimizing/instruction_simplifier_arm.cc @@ -33,8 +33,9 @@ namespace arm { class InstructionSimplifierArmVisitor final : public HGraphVisitor { public: - InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats) - : HGraphVisitor(graph), stats_(stats) {} + InstructionSimplifierArmVisitor( + HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) + : HGraphVisitor(graph), codegen_(codegen), stats_(stats) {} private: void RecordSimplification() { @@ -78,6 +79,7 @@ class InstructionSimplifierArmVisitor final : public HGraphVisitor { void VisitTypeConversion(HTypeConversion* instruction) override; void VisitUShr(HUShr* instruction) override; + CodeGenerator* codegen_; OptimizingCompilerStats* stats_; }; @@ -217,7 +219,8 @@ void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) { return; } - if (TryExtractArrayAccessAddress(instruction, + if (TryExtractArrayAccessAddress(codegen_, + instruction, instruction->GetArray(), instruction->GetIndex(), data_offset)) { @@ -238,7 +241,8 @@ void InstructionSimplifierArmVisitor::VisitArraySet(HArraySet* instruction) { return; } - if (TryExtractArrayAccessAddress(instruction, + if (TryExtractArrayAccessAddress(codegen_, + instruction, instruction->GetArray(), instruction->GetIndex(), data_offset)) { @@ -300,7 +304,7 @@ void InstructionSimplifierArmVisitor::VisitUShr(HUShr* instruction) { } bool InstructionSimplifierArm::Run() { - InstructionSimplifierArmVisitor visitor(graph_, stats_); + InstructionSimplifierArmVisitor visitor(graph_, codegen_, stats_); visitor.VisitReversePostOrder(); return true; } diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h index 0517e4f49e..25cea7c829 100644 --- a/compiler/optimizing/instruction_simplifier_arm.h +++ b/compiler/optimizing/instruction_simplifier_arm.h @@ -22,16 +22,23 @@ #include "optimization.h" namespace art HIDDEN { + +class CodeGenerator; + namespace arm { class InstructionSimplifierArm : public HOptimization { public: - InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {} + InstructionSimplifierArm(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) + : HOptimization(graph, kInstructionSimplifierArmPassName, stats), + codegen_(codegen) {} static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm"; bool Run() override; + + private: + CodeGenerator* codegen_; }; } // namespace arm diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 671900bd9d..2c191dc3f4 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -33,8 +33,9 @@ using helpers::ShifterOperandSupportsExtension; class InstructionSimplifierArm64Visitor final : public HGraphVisitor { public: - InstructionSimplifierArm64Visitor(HGraph* graph, OptimizingCompilerStats* stats) - : HGraphVisitor(graph), stats_(stats) {} + InstructionSimplifierArm64Visitor( + HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) + : HGraphVisitor(graph), codegen_(codegen), stats_(stats) {} private: void RecordSimplification() { @@ -84,6 +85,7 @@ class InstructionSimplifierArm64Visitor final : public HGraphVisitor { void VisitVecLoad(HVecLoad* instruction) override; void VisitVecStore(HVecStore* instruction) override; + CodeGenerator* codegen_; OptimizingCompilerStats* stats_; }; @@ -198,7 +200,8 @@ void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) { void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); - if (TryExtractArrayAccessAddress(instruction, + if (TryExtractArrayAccessAddress(codegen_, + instruction, instruction->GetArray(), instruction->GetIndex(), data_offset)) { @@ -209,7 +212,8 @@ void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) { size_t access_size = DataType::Size(instruction->GetComponentType()); size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value(); - if (TryExtractArrayAccessAddress(instruction, + if (TryExtractArrayAccessAddress(codegen_, + instruction, instruction->GetArray(), instruction->GetIndex(), data_offset)) { @@ -284,7 +288,7 @@ void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) { size_t size = DataType::Size(instruction->GetPackedType()); size_t offset = mirror::Array::DataOffset(size).Uint32Value(); if (TryExtractArrayAccessAddress( - instruction, instruction->GetArray(), instruction->GetIndex(), offset)) { + codegen_, instruction, instruction->GetArray(), instruction->GetIndex(), offset)) { RecordSimplification(); } } @@ -298,14 +302,14 @@ void InstructionSimplifierArm64Visitor::VisitVecStore(HVecStore* instruction) { size_t size = DataType::Size(instruction->GetPackedType()); size_t offset = mirror::Array::DataOffset(size).Uint32Value(); if (TryExtractArrayAccessAddress( - instruction, instruction->GetArray(), instruction->GetIndex(), offset)) { + codegen_, instruction, instruction->GetArray(), instruction->GetIndex(), offset)) { RecordSimplification(); } } } bool InstructionSimplifierArm64::Run() { - InstructionSimplifierArm64Visitor visitor(graph_, stats_); + InstructionSimplifierArm64Visitor visitor(graph_, codegen_, stats_); visitor.VisitReversePostOrder(); return true; } diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 374638ab9e..5c57484b24 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -22,16 +22,23 @@ #include "optimization.h" namespace art HIDDEN { + +class CodeGenerator; + namespace arm64 { class InstructionSimplifierArm64 : public HOptimization { public: - InstructionSimplifierArm64(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {} + InstructionSimplifierArm64(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) + : HOptimization(graph, kInstructionSimplifierArm64PassName, stats), + codegen_(codegen) {} static constexpr const char* kInstructionSimplifierArm64PassName = "instruction_simplifier_arm64"; bool Run() override; + + private: + CodeGenerator* codegen_; }; } // namespace arm64 diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index 34daae21ee..50ea2b929b 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -16,6 +16,7 @@ #include "instruction_simplifier_shared.h" +#include "code_generator.h" #include "mirror/array-inl.h" namespace art HIDDEN { @@ -229,7 +230,8 @@ bool TryMergeNegatedInput(HBinaryOperation* op) { } -bool TryExtractArrayAccessAddress(HInstruction* access, +bool TryExtractArrayAccessAddress(CodeGenerator* codegen, + HInstruction* access, HInstruction* array, HInstruction* index, size_t data_offset) { @@ -244,8 +246,7 @@ bool TryExtractArrayAccessAddress(HInstruction* access, // The access may require a runtime call or the original array pointer. return false; } - if (gUseReadBarrier && - !kUseBakerReadBarrier && + if (codegen->EmitNonBakerReadBarrier() && access->IsArrayGet() && access->GetType() == DataType::Type::kReference) { // For object arrays, the non-Baker read barrier instrumentation requires diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h index ddc3a867b8..68148cff7e 100644 --- a/compiler/optimizing/instruction_simplifier_shared.h +++ b/compiler/optimizing/instruction_simplifier_shared.h @@ -22,6 +22,8 @@ namespace art HIDDEN { +class CodeGenerator; + namespace helpers { inline bool CanFitInShifterOperand(HInstruction* instruction) { @@ -54,7 +56,7 @@ inline bool HasShifterOperand(HInstruction* instr, InstructionSet isa) { // t3 = Sub(*, t2) inline bool IsSubRightSubLeftShl(HSub *sub) { HInstruction* right = sub->GetRight(); - return right->IsSub() && right->AsSub()->GetLeft()->IsShl();; + return right->IsSub() && right->AsSub()->GetLeft()->IsShl(); } } // namespace helpers @@ -64,7 +66,8 @@ bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa); // a negated bitwise instruction. bool TryMergeNegatedInput(HBinaryOperation* op); -bool TryExtractArrayAccessAddress(HInstruction* access, +bool TryExtractArrayAccessAddress(CodeGenerator* codegen, + HInstruction* access, HInstruction* array, HInstruction* index, size_t data_offset); diff --git a/compiler/optimizing/instruction_simplifier_test.cc b/compiler/optimizing/instruction_simplifier_test.cc index 966f5b91cf..9f47995cf5 100644 --- a/compiler/optimizing/instruction_simplifier_test.cc +++ b/compiler/optimizing/instruction_simplifier_test.cc @@ -134,260 +134,6 @@ class InstanceOfInstructionSimplifierTestGroup }; // // ENTRY -// switch (param) { -// case 1: -// obj1 = param2; break; -// case 2: -// obj1 = param3; break; -// default: -// obj2 = new Obj(); -// } -// val_phi = PHI[3,4,10] -// target_phi = PHI[param2, param3, obj2] -// return PredFieldGet[val_phi, target_phi] => PredFieldGet[val_phi, target_phi] -TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetNoMerge) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "case1"}, - {"entry", "case2"}, - {"entry", "case3"}, - {"case1", "breturn"}, - {"case2", "breturn"}, - {"case3", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(case1); - GET_BLOCK(case2); - GET_BLOCK(case3); - GET_BLOCK(breturn); -#undef GET_BLOCK - - HInstruction* bool_value = MakeParam(DataType::Type::kInt32); - HInstruction* obj1_param = MakeParam(DataType::Type::kReference); - HInstruction* obj2_param = MakeParam(DataType::Type::kReference); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c4 = graph_->GetIntConstant(4); - HInstruction* c10 = graph_->GetIntConstant(10); - - HInstruction* cls = MakeClassLoad(); - HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(switch_inst); - ManuallyBuildEnvFor(cls, {}); - - HInstruction* goto_c1 = new (GetAllocator()) HGoto(); - case1->AddInstruction(goto_c1); - - HInstruction* goto_c2 = new (GetAllocator()) HGoto(); - case2->AddInstruction(goto_c2); - - HInstruction* obj3 = MakeNewInstance(cls); - HInstruction* goto_c3 = new (GetAllocator()) HGoto(); - case3->AddInstruction(obj3); - case3->AddInstruction(goto_c3); - - HPhi* val_phi = MakePhi({c3, c4, c10}); - HPhi* obj_phi = MakePhi({obj1_param, obj2_param, obj3}); - HPredicatedInstanceFieldGet* read_end = - new (GetAllocator()) HPredicatedInstanceFieldGet(obj_phi, - nullptr, - val_phi, - val_phi->GetType(), - MemberOffset(10), - false, - 42, - 0, - graph_->GetDexFile(), - 0); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_end); - breturn->AddPhi(val_phi); - breturn->AddPhi(obj_phi); - breturn->AddInstruction(read_end); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformSimplification(blks); - - EXPECT_INS_RETAINED(read_end); - - EXPECT_INS_EQ(read_end->GetTarget(), obj_phi); - EXPECT_INS_EQ(read_end->GetDefaultValue(), val_phi); -} - -// // ENTRY -// switch (param) { -// case 1: -// obj1 = param2; break; -// case 2: -// obj1 = param3; break; -// default: -// obj2 = new Obj(); -// } -// val_phi = PHI[3,3,10] -// target_phi = PHI[param2, param3, obj2] -// return PredFieldGet[val_phi, target_phi] => PredFieldGet[3, target_phi] -TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetMerge) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "case1"}, - {"entry", "case2"}, - {"entry", "case3"}, - {"case1", "breturn"}, - {"case2", "breturn"}, - {"case3", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(case1); - GET_BLOCK(case2); - GET_BLOCK(case3); - GET_BLOCK(breturn); -#undef GET_BLOCK - - HInstruction* bool_value = MakeParam(DataType::Type::kInt32); - HInstruction* obj1_param = MakeParam(DataType::Type::kReference); - HInstruction* obj2_param = MakeParam(DataType::Type::kReference); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c10 = graph_->GetIntConstant(10); - - HInstruction* cls = MakeClassLoad(); - HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(switch_inst); - ManuallyBuildEnvFor(cls, {}); - - HInstruction* goto_c1 = new (GetAllocator()) HGoto(); - case1->AddInstruction(goto_c1); - - HInstruction* goto_c2 = new (GetAllocator()) HGoto(); - case2->AddInstruction(goto_c2); - - HInstruction* obj3 = MakeNewInstance(cls); - HInstruction* goto_c3 = new (GetAllocator()) HGoto(); - case3->AddInstruction(obj3); - case3->AddInstruction(goto_c3); - - HPhi* val_phi = MakePhi({c3, c3, c10}); - HPhi* obj_phi = MakePhi({obj1_param, obj2_param, obj3}); - HPredicatedInstanceFieldGet* read_end = - new (GetAllocator()) HPredicatedInstanceFieldGet(obj_phi, - nullptr, - val_phi, - val_phi->GetType(), - MemberOffset(10), - false, - 42, - 0, - graph_->GetDexFile(), - 0); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_end); - breturn->AddPhi(val_phi); - breturn->AddPhi(obj_phi); - breturn->AddInstruction(read_end); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformSimplification(blks); - - EXPECT_FALSE(obj3->CanBeNull()); - EXPECT_INS_RETAINED(read_end); - - EXPECT_INS_EQ(read_end->GetTarget(), obj_phi); - EXPECT_INS_EQ(read_end->GetDefaultValue(), c3); -} - -// // ENTRY -// if (param) { -// obj1 = new Obj(); -// } else { -// obj2 = new Obj(); -// } -// val_phi = PHI[3,10] -// target_phi = PHI[obj1, obj2] -// return PredFieldGet[val_phi, target_phi] => FieldGet[target_phi] -TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetNoNull) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(left); - GET_BLOCK(right); - GET_BLOCK(breturn); -#undef GET_BLOCK - - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c10 = graph_->GetIntConstant(10); - - HInstruction* cls = MakeClassLoad(); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - - HInstruction* obj1 = MakeNewInstance(cls); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(obj1); - left->AddInstruction(goto_left); - - HInstruction* obj2 = MakeNewInstance(cls); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(obj2); - right->AddInstruction(goto_right); - - HPhi* val_phi = MakePhi({c3, c10}); - HPhi* obj_phi = MakePhi({obj1, obj2}); - obj_phi->SetCanBeNull(false); - HInstruction* read_end = new (GetAllocator()) HPredicatedInstanceFieldGet(obj_phi, - nullptr, - val_phi, - val_phi->GetType(), - MemberOffset(10), - false, - 42, - 0, - graph_->GetDexFile(), - 0); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_end); - breturn->AddPhi(val_phi); - breturn->AddPhi(obj_phi); - breturn->AddInstruction(read_end); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformSimplification(blks); - - EXPECT_FALSE(obj1->CanBeNull()); - EXPECT_FALSE(obj2->CanBeNull()); - EXPECT_INS_REMOVED(read_end); - - HInstanceFieldGet* ifget = FindSingleInstruction<HInstanceFieldGet>(graph_, breturn); - ASSERT_NE(ifget, nullptr); - EXPECT_INS_EQ(ifget->InputAt(0), obj_phi); -} - -// // ENTRY // obj = new Obj(); // // Make sure this graph isn't broken // if (obj instanceof <other>) { diff --git a/compiler/optimizing/intrinsic_objects.cc b/compiler/optimizing/intrinsic_objects.cc index 7e542117a9..cf49f50d91 100644 --- a/compiler/optimizing/intrinsic_objects.cc +++ b/compiler/optimizing/intrinsic_objects.cc @@ -20,28 +20,54 @@ #include "base/casts.h" #include "base/logging.h" #include "image.h" +#include "intrinsics.h" #include "obj_ptr-inl.h" +#include "well_known_classes.h" namespace art HIDDEN { static constexpr size_t kIntrinsicObjectsOffset = enum_cast<size_t>(ImageHeader::kIntrinsicObjectsStart); -ObjPtr<mirror::ObjectArray<mirror::Object>> IntrinsicObjects::LookupIntegerCache( - Thread* self, ClassLinker* class_linker) { - ObjPtr<mirror::Class> integer_cache_class = class_linker->LookupClass( - self, "Ljava/lang/Integer$IntegerCache;", /* class_loader= */ nullptr); - if (integer_cache_class == nullptr || !integer_cache_class->IsInitialized()) { - return nullptr; - } - ArtField* cache_field = - integer_cache_class->FindDeclaredStaticField("cache", "[Ljava/lang/Integer;"); - CHECK(cache_field != nullptr); - ObjPtr<mirror::ObjectArray<mirror::Object>> integer_cache = +template <typename T> +static int32_t FillIntrinsicsObjects( + ArtField* cache_field, + ObjPtr<mirror::ObjectArray<mirror::Object>> live_objects, + int32_t expected_low, + int32_t expected_high, + T type_check, + int32_t index) + REQUIRES_SHARED(Locks::mutator_lock_) { + ObjPtr<mirror::ObjectArray<mirror::Object>> cache = ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast( - cache_field->GetObject(integer_cache_class)); - CHECK(integer_cache != nullptr); - return integer_cache; + cache_field->GetObject(cache_field->GetDeclaringClass())); + int32_t length = expected_high - expected_low + 1; + DCHECK_EQ(length, cache->GetLength()); + for (int32_t i = 0; i != length; ++i) { + ObjPtr<mirror::Object> value = cache->GetWithoutChecks(i); + live_objects->Set(index + i, value); + type_check(value, expected_low + i); + } + return index + length; +} + +void IntrinsicObjects::FillIntrinsicObjects( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, size_t start_index) { + DCHECK_EQ(start_index, ImageHeader::kIntrinsicObjectsStart); + int32_t index = dchecked_integral_cast<int32_t>(start_index); +#define FILL_OBJECTS(name, low, high, type, offset) \ + index = FillIntrinsicsObjects( \ + WellKnownClasses::java_lang_ ##name ##_ ##name ##Cache_cache, \ + boot_image_live_objects, \ + low, \ + high, \ + [](ObjPtr<mirror::Object> obj, int32_t expected) REQUIRES_SHARED(Locks::mutator_lock_) { \ + CHECK_EQ(expected, WellKnownClasses::java_lang_ ##name ##_value->Get ##name(obj)); \ + }, \ + index); + BOXED_TYPES(FILL_OBJECTS) +#undef FILL_OBJECTS + DCHECK_EQ(dchecked_integral_cast<size_t>(index), start_index + GetNumberOfIntrinsicObjects()); } static bool HasIntrinsicObjects( @@ -53,43 +79,26 @@ static bool HasIntrinsicObjects( return length != kIntrinsicObjectsOffset; } -ObjPtr<mirror::ObjectArray<mirror::Object>> IntrinsicObjects::GetIntegerValueOfCache( - ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) { - if (!HasIntrinsicObjects(boot_image_live_objects)) { - return nullptr; // No intrinsic objects. - } - // No need for read barrier for boot image object or for verifying the value that was just stored. - ObjPtr<mirror::Object> result = - boot_image_live_objects->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>( - kIntrinsicObjectsOffset); - DCHECK(result != nullptr); - DCHECK(result->IsObjectArray()); - DCHECK(result->GetClass()->DescriptorEquals("[Ljava/lang/Integer;")); - return ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast(result); -} - -ObjPtr<mirror::Object> IntrinsicObjects::GetIntegerValueOfObject( +ObjPtr<mirror::Object> IntrinsicObjects::GetValueOfObject( ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, + size_t start_index, uint32_t index) { DCHECK(HasIntrinsicObjects(boot_image_live_objects)); - DCHECK_LT(index, - static_cast<uint32_t>(GetIntegerValueOfCache(boot_image_live_objects)->GetLength())); - // No need for read barrier for boot image object or for verifying the value that was just stored. ObjPtr<mirror::Object> result = boot_image_live_objects->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>( - kIntrinsicObjectsOffset + /* skip the IntegerCache.cache */ 1u + index); + kIntrinsicObjectsOffset + start_index + index); DCHECK(result != nullptr); - DCHECK(result->GetClass()->DescriptorEquals("Ljava/lang/Integer;")); return result; } -MemberOffset IntrinsicObjects::GetIntegerValueOfArrayDataOffset( - ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) { +MemberOffset IntrinsicObjects::GetValueOfArrayDataOffset( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, + size_t start_index) { DCHECK(HasIntrinsicObjects(boot_image_live_objects)); MemberOffset result = - mirror::ObjectArray<mirror::Object>::OffsetOfElement(kIntrinsicObjectsOffset + 1u); - DCHECK_EQ(GetIntegerValueOfObject(boot_image_live_objects, 0u), + mirror::ObjectArray<mirror::Object>::OffsetOfElement(kIntrinsicObjectsOffset + start_index); + DCHECK_EQ(GetValueOfObject(boot_image_live_objects, start_index, 0u), (boot_image_live_objects ->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(result))); return result; diff --git a/compiler/optimizing/intrinsic_objects.h b/compiler/optimizing/intrinsic_objects.h index d750f2934b..52a6b81f0e 100644 --- a/compiler/optimizing/intrinsic_objects.h +++ b/compiler/optimizing/intrinsic_objects.h @@ -21,11 +21,12 @@ #include "base/bit_utils.h" #include "base/macros.h" #include "base/mutex.h" +#include "obj_ptr.h" +#include "offsets.h" namespace art HIDDEN { class ClassLinker; -template <class MirrorType> class ObjPtr; class MemberOffset; class Thread; @@ -34,17 +35,30 @@ class Object; template <class T> class ObjectArray; } // namespace mirror +#define BOXED_TYPES(V) \ + V(Byte, -128, 127, DataType::Type::kInt8, 0) \ + V(Short, -128, 127, DataType::Type::kInt16, kByteCacheLastIndex) \ + V(Character, 0, 127, DataType::Type::kUint16, kShortCacheLastIndex) \ + V(Integer, -128, 127, DataType::Type::kInt32, kCharacterCacheLastIndex) + +#define DEFINE_BOXED_CONSTANTS(name, low, high, unused, start_index) \ + static constexpr size_t k ##name ##CacheLastIndex = start_index + (high - low + 1); \ + static constexpr size_t k ##name ##CacheFirstIndex = start_index; + BOXED_TYPES(DEFINE_BOXED_CONSTANTS) + + static constexpr size_t kNumberOfBoxedCaches = kIntegerCacheLastIndex; +#undef DEFINE_BOXED_CONSTANTS + class IntrinsicObjects { public: enum class PatchType { - kIntegerValueOfObject, - kIntegerValueOfArray, + kValueOfObject, + kValueOfArray, - kLast = kIntegerValueOfArray + kLast = kValueOfArray }; static uint32_t EncodePatch(PatchType patch_type, uint32_t index = 0u) { - DCHECK(patch_type == PatchType::kIntegerValueOfObject || index == 0u); return PatchTypeField::Encode(static_cast<uint32_t>(patch_type)) | IndexField::Encode(index); } @@ -56,18 +70,37 @@ class IntrinsicObjects { return IndexField::Decode(intrinsic_data); } - // Functions for retrieving data for Integer.valueOf(). - EXPORT static ObjPtr<mirror::ObjectArray<mirror::Object>> LookupIntegerCache( - Thread* self, ClassLinker* class_linker) REQUIRES_SHARED(Locks::mutator_lock_); - EXPORT static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerValueOfCache( - ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) + // Helpers returning addresses of objects, suitable for embedding in generated code. +#define DEFINE_BOXED_ACCESSES(name, unused1, unused2, unused3, start_index) \ + static ObjPtr<mirror::Object> Get ##name ##ValueOfObject( \ + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, \ + uint32_t index) REQUIRES_SHARED(Locks::mutator_lock_) { \ + return GetValueOfObject(boot_image_live_objects, k ##name ##CacheFirstIndex, index); \ + } \ + static MemberOffset Get ##name ##ValueOfArrayDataOffset( \ + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) \ + REQUIRES_SHARED(Locks::mutator_lock_) { \ + return GetValueOfArrayDataOffset(boot_image_live_objects, k ##name ##CacheFirstIndex); \ + } + BOXED_TYPES(DEFINE_BOXED_ACCESSES) +#undef DEFINED_BOXED_ACCESSES + + EXPORT static void FillIntrinsicObjects( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, size_t start_index) REQUIRES_SHARED(Locks::mutator_lock_); - EXPORT static ObjPtr<mirror::Object> GetIntegerValueOfObject( + + static constexpr size_t GetNumberOfIntrinsicObjects() { + return kNumberOfBoxedCaches; + } + + EXPORT static ObjPtr<mirror::Object> GetValueOfObject( ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, + size_t start_index, uint32_t index) REQUIRES_SHARED(Locks::mutator_lock_); - EXPORT static MemberOffset GetIntegerValueOfArrayDataOffset( - ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) - REQUIRES_SHARED(Locks::mutator_lock_); + + EXPORT static MemberOffset GetValueOfArrayDataOffset( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, + size_t start_index) REQUIRES_SHARED(Locks::mutator_lock_); private: static constexpr size_t kPatchTypeBits = diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 774deec438..8330a973ff 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -27,10 +27,12 @@ #include "gc/space/image_space.h" #include "image-inl.h" #include "intrinsic_objects.h" +#include "intrinsics_list.h" #include "nodes.h" #include "obj_ptr-inl.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" +#include "well_known_classes-inl.h" namespace art HIDDEN { @@ -43,22 +45,12 @@ std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { case Intrinsics::k ## Name: \ os << # Name; \ break; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef STATIC_INTRINSICS_LIST -#undef VIRTUAL_INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS } return os; } -static const char kIntegerCacheDescriptor[] = "Ljava/lang/Integer$IntegerCache;"; -static const char kIntegerDescriptor[] = "Ljava/lang/Integer;"; -static const char kIntegerArrayDescriptor[] = "[Ljava/lang/Integer;"; -static const char kLowFieldName[] = "low"; -static const char kHighFieldName[] = "high"; -static const char kValueFieldName[] = "value"; - static ObjPtr<mirror::ObjectArray<mirror::Object>> GetBootImageLiveObjects() REQUIRES_SHARED(Locks::mutator_lock_) { gc::Heap* heap = Runtime::Current()->GetHeap(); @@ -73,79 +65,6 @@ static ObjPtr<mirror::ObjectArray<mirror::Object>> GetBootImageLiveObjects() return boot_image_live_objects; } -static ObjPtr<mirror::Class> LookupInitializedClass(Thread* self, - ClassLinker* class_linker, - const char* descriptor) - REQUIRES_SHARED(Locks::mutator_lock_) { - ObjPtr<mirror::Class> klass = - class_linker->LookupClass(self, descriptor, /* class_loader= */ nullptr); - DCHECK(klass != nullptr); - DCHECK(klass->IsInitialized()); - return klass; -} - -static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerCacheArray( - ObjPtr<mirror::Class> cache_class) REQUIRES_SHARED(Locks::mutator_lock_) { - ArtField* cache_field = cache_class->FindDeclaredStaticField("cache", kIntegerArrayDescriptor); - DCHECK(cache_field != nullptr); - return ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast(cache_field->GetObject(cache_class)); -} - -static int32_t GetIntegerCacheField(ObjPtr<mirror::Class> cache_class, const char* field_name) - REQUIRES_SHARED(Locks::mutator_lock_) { - ArtField* field = cache_class->FindDeclaredStaticField(field_name, "I"); - DCHECK(field != nullptr); - return field->GetInt(cache_class); -} - -static bool CheckIntegerCache(Thread* self, - ClassLinker* class_linker, - ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, - ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_cache) - REQUIRES_SHARED(Locks::mutator_lock_) { - DCHECK(boot_image_cache != nullptr); - - // Since we have a cache in the boot image, both java.lang.Integer and - // java.lang.Integer$IntegerCache must be initialized in the boot image. - ObjPtr<mirror::Class> cache_class = - LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor); - ObjPtr<mirror::Class> integer_class = - LookupInitializedClass(self, class_linker, kIntegerDescriptor); - - // Check that the current cache is the same as the `boot_image_cache`. - ObjPtr<mirror::ObjectArray<mirror::Object>> current_cache = GetIntegerCacheArray(cache_class); - if (current_cache != boot_image_cache) { - return false; // Messed up IntegerCache.cache. - } - - // Check that the range matches the boot image cache length. - int32_t low = GetIntegerCacheField(cache_class, kLowFieldName); - int32_t high = GetIntegerCacheField(cache_class, kHighFieldName); - if (boot_image_cache->GetLength() != high - low + 1) { - return false; // Messed up IntegerCache.low or IntegerCache.high. - } - - // Check that the elements match the boot image intrinsic objects and check their values as well. - ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); - DCHECK(value_field != nullptr); - for (int32_t i = 0, len = boot_image_cache->GetLength(); i != len; ++i) { - ObjPtr<mirror::Object> boot_image_object = - IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, i); - DCHECK(Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boot_image_object)); - // No need for read barrier for comparison with a boot image object. - ObjPtr<mirror::Object> current_object = - boot_image_cache->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>(i); - if (boot_image_object != current_object) { - return false; // Messed up IntegerCache.cache[i] - } - if (value_field->GetInt(boot_image_object) != low + i) { - return false; // Messed up IntegerCache.cache[i].value. - } - } - - return true; -} - static bool CanReferenceBootImageObjects(HInvoke* invoke, const CompilerOptions& compiler_options) { // Piggyback on the method load kind to determine whether we can use PC-relative addressing // for AOT. This should cover both the testing config (non-PIC boot image) and codegens that @@ -161,95 +80,24 @@ static bool CanReferenceBootImageObjects(HInvoke* invoke, const CompilerOptions& return true; } -void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke, - CodeGenerator* codegen, - Location return_location, - Location first_argument_location) { - // The intrinsic will call if it needs to allocate a j.l.Integer. +void IntrinsicVisitor::ComputeValueOfLocations(HInvoke* invoke, + CodeGenerator* codegen, + int32_t low, + int32_t length, + Location return_location, + Location first_argument_location) { + // The intrinsic will call if it needs to allocate a boxed object. LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly; const CompilerOptions& compiler_options = codegen->GetCompilerOptions(); if (!CanReferenceBootImageObjects(invoke, compiler_options)) { return; } HInstruction* const input = invoke->InputAt(0); - if (compiler_options.IsBootImage()) { - if (!compiler_options.IsImageClass(kIntegerCacheDescriptor) || - !compiler_options.IsImageClass(kIntegerDescriptor)) { - return; - } - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - Thread* self = Thread::Current(); - ScopedObjectAccess soa(self); - ObjPtr<mirror::Class> cache_class = class_linker->LookupClass( - self, kIntegerCacheDescriptor, /* class_loader= */ nullptr); - DCHECK(cache_class != nullptr); - if (UNLIKELY(!cache_class->IsInitialized())) { - LOG(WARNING) << "Image class " << cache_class->PrettyDescriptor() << " is uninitialized."; - return; - } - ObjPtr<mirror::Class> integer_class = - class_linker->LookupClass(self, kIntegerDescriptor, /* class_loader= */ nullptr); - DCHECK(integer_class != nullptr); - if (UNLIKELY(!integer_class->IsInitialized())) { - LOG(WARNING) << "Image class " << integer_class->PrettyDescriptor() << " is uninitialized."; - return; - } - int32_t low = GetIntegerCacheField(cache_class, kLowFieldName); - int32_t high = GetIntegerCacheField(cache_class, kHighFieldName); - if (kIsDebugBuild) { - ObjPtr<mirror::ObjectArray<mirror::Object>> current_cache = GetIntegerCacheArray(cache_class); - CHECK(current_cache != nullptr); - CHECK_EQ(current_cache->GetLength(), high - low + 1); - ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); - CHECK(value_field != nullptr); - for (int32_t i = 0, len = current_cache->GetLength(); i != len; ++i) { - ObjPtr<mirror::Object> current_object = current_cache->GetWithoutChecks(i); - CHECK(current_object != nullptr); - CHECK_EQ(value_field->GetInt(current_object), low + i); - } - } - if (input->IsIntConstant()) { - int32_t value = input->AsIntConstant()->GetValue(); - if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) < - static_cast<uint32_t>(high - low + 1)) { - // No call, we shall use direct pointer to the Integer object. - call_kind = LocationSummary::kNoCall; - } - } - } else { - Runtime* runtime = Runtime::Current(); - Thread* self = Thread::Current(); - ScopedObjectAccess soa(self); - ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects = GetBootImageLiveObjects(); - ObjPtr<mirror::ObjectArray<mirror::Object>> cache = - IntrinsicObjects::GetIntegerValueOfCache(boot_image_live_objects); - if (cache == nullptr) { - return; // No cache in the boot image. - } - if (compiler_options.IsJitCompiler()) { - if (!CheckIntegerCache(self, runtime->GetClassLinker(), boot_image_live_objects, cache)) { - return; // The cache was somehow messed up, probably by using reflection. - } - } else { - DCHECK(compiler_options.IsAotCompiler()); - DCHECK(CheckIntegerCache(self, runtime->GetClassLinker(), boot_image_live_objects, cache)); - if (input->IsIntConstant()) { - int32_t value = input->AsIntConstant()->GetValue(); - // Retrieve the `value` from the lowest cached Integer. - ObjPtr<mirror::Object> low_integer = - IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, 0u); - ObjPtr<mirror::Class> integer_class = - low_integer->GetClass<kVerifyNone, kWithoutReadBarrier>(); - ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); - DCHECK(value_field != nullptr); - int32_t low = value_field->GetInt(low_integer); - if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) < - static_cast<uint32_t>(cache->GetLength())) { - // No call, we shall use direct pointer to the Integer object. Note that we cannot - // do this for JIT as the "low" can change through reflection before emitting the code. - call_kind = LocationSummary::kNoCall; - } - } + if (input->IsIntConstant()) { + int32_t value = input->AsIntConstant()->GetValue(); + if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) < static_cast<uint32_t>(length)) { + // No call, we shall use direct pointer to the boxed object. + call_kind = LocationSummary::kNoCall; } } @@ -265,98 +113,58 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke, } } -static int32_t GetIntegerCacheLowFromIntegerCache(Thread* self, ClassLinker* class_linker) - REQUIRES_SHARED(Locks::mutator_lock_) { - ObjPtr<mirror::Class> cache_class = - LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor); - return GetIntegerCacheField(cache_class, kLowFieldName); -} - -inline IntrinsicVisitor::IntegerValueOfInfo::IntegerValueOfInfo() +inline IntrinsicVisitor::ValueOfInfo::ValueOfInfo() : value_offset(0), low(0), length(0u), value_boot_image_reference(kInvalidReference) {} -IntrinsicVisitor::IntegerValueOfInfo IntrinsicVisitor::ComputeIntegerValueOfInfo( - HInvoke* invoke, const CompilerOptions& compiler_options) { - // Note that we could cache all of the data looked up here. but there's no good - // location for it. We don't want to add it to WellKnownClasses, to avoid creating global - // jni values. Adding it as state to the compiler singleton seems like wrong - // separation of concerns. - // The need for this data should be pretty rare though. - - // Note that at this point we can no longer abort the code generation. Therefore, - // we need to provide data that shall not lead to a crash even if the fields were - // modified through reflection since ComputeIntegerValueOfLocations() when JITting. - - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - Thread* self = Thread::Current(); - ScopedObjectAccess soa(self); - - IntegerValueOfInfo info; +IntrinsicVisitor::ValueOfInfo IntrinsicVisitor::ComputeValueOfInfo( + HInvoke* invoke, + const CompilerOptions& compiler_options, + ArtField* value_field, + int32_t low, + int32_t length, + size_t base) { + ValueOfInfo info; + info.low = low; + info.length = length; + info.value_offset = value_field->GetOffset().Uint32Value(); if (compiler_options.IsBootImage()) { - ObjPtr<mirror::Class> integer_class = invoke->GetResolvedMethod()->GetDeclaringClass(); - DCHECK(integer_class->DescriptorEquals(kIntegerDescriptor)); - ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); - DCHECK(value_field != nullptr); - info.value_offset = value_field->GetOffset().Uint32Value(); - ObjPtr<mirror::Class> cache_class = - LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor); - info.low = GetIntegerCacheField(cache_class, kLowFieldName); - int32_t high = GetIntegerCacheField(cache_class, kHighFieldName); - info.length = dchecked_integral_cast<uint32_t>(high - info.low + 1); - if (invoke->InputAt(0)->IsIntConstant()) { int32_t input_value = invoke->InputAt(0)->AsIntConstant()->GetValue(); uint32_t index = static_cast<uint32_t>(input_value) - static_cast<uint32_t>(info.low); if (index < static_cast<uint32_t>(info.length)) { info.value_boot_image_reference = IntrinsicObjects::EncodePatch( - IntrinsicObjects::PatchType::kIntegerValueOfObject, index); + IntrinsicObjects::PatchType::kValueOfObject, index + base); } else { // Not in the cache. - info.value_boot_image_reference = IntegerValueOfInfo::kInvalidReference; + info.value_boot_image_reference = ValueOfInfo::kInvalidReference; } } else { info.array_data_boot_image_reference = - IntrinsicObjects::EncodePatch(IntrinsicObjects::PatchType::kIntegerValueOfArray); + IntrinsicObjects::EncodePatch(IntrinsicObjects::PatchType::kValueOfArray, base); } } else { + ScopedObjectAccess soa(Thread::Current()); ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects = GetBootImageLiveObjects(); - ObjPtr<mirror::Object> low_integer = - IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, 0u); - ObjPtr<mirror::Class> integer_class = low_integer->GetClass<kVerifyNone, kWithoutReadBarrier>(); - ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); - DCHECK(value_field != nullptr); - info.value_offset = value_field->GetOffset().Uint32Value(); - if (compiler_options.IsJitCompiler()) { - // Use the current `IntegerCache.low` for JIT to avoid truly surprising behavior if the - // code messes up the `value` field in the lowest cached Integer using reflection. - info.low = GetIntegerCacheLowFromIntegerCache(self, class_linker); - } else { - // For app AOT, the `low_integer->value` should be the same as `IntegerCache.low`. - info.low = value_field->GetInt(low_integer); - DCHECK_EQ(info.low, GetIntegerCacheLowFromIntegerCache(self, class_linker)); - } - // Do not look at `IntegerCache.high`, use the immutable length of the cache array instead. - info.length = dchecked_integral_cast<uint32_t>( - IntrinsicObjects::GetIntegerValueOfCache(boot_image_live_objects)->GetLength()); if (invoke->InputAt(0)->IsIntConstant()) { int32_t input_value = invoke->InputAt(0)->AsIntConstant()->GetValue(); uint32_t index = static_cast<uint32_t>(input_value) - static_cast<uint32_t>(info.low); if (index < static_cast<uint32_t>(info.length)) { - ObjPtr<mirror::Object> integer = - IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, index); - info.value_boot_image_reference = CodeGenerator::GetBootImageOffset(integer); + ObjPtr<mirror::Object> object = + IntrinsicObjects::GetValueOfObject(boot_image_live_objects, base, index); + info.value_boot_image_reference = CodeGenerator::GetBootImageOffset(object); } else { // Not in the cache. - info.value_boot_image_reference = IntegerValueOfInfo::kInvalidReference; + info.value_boot_image_reference = ValueOfInfo::kInvalidReference; } } else { info.array_data_boot_image_reference = CodeGenerator::GetBootImageOffset(boot_image_live_objects) + - IntrinsicObjects::GetIntegerValueOfArrayDataOffset(boot_image_live_objects).Uint32Value(); + IntrinsicObjects::GetValueOfArrayDataOffset( + boot_image_live_objects, base).Uint32Value(); } } @@ -392,8 +200,8 @@ void IntrinsicVisitor::CreateReferenceGetReferentLocations(HInvoke* invoke, locations->SetOut(Location::RequiresRegister()); } -void IntrinsicVisitor::CreateReferenceRefersToLocations(HInvoke* invoke) { - if (gUseReadBarrier && !kUseBakerReadBarrier) { +void IntrinsicVisitor::CreateReferenceRefersToLocations(HInvoke* invoke, CodeGenerator* codegen) { + if (codegen->EmitNonBakerReadBarrier()) { // Unimplemented for non-Baker read barrier. return; } @@ -414,4 +222,54 @@ void IntrinsicVisitor::AssertNonMovableStringClass() { } } +void InsertFpToIntegralIntrinsic(HInvokeStaticOrDirect* invoke, size_t input_index) { + DCHECK_EQ(invoke->GetCodePtrLocation(), CodePtrLocation::kCallCriticalNative); + DCHECK(!invoke->GetBlock()->GetGraph()->IsDebuggable()) + << "Unexpected direct @CriticalNative call in a debuggable graph!"; + DCHECK_LT(input_index, invoke->GetNumberOfArguments()); + HInstruction* input = invoke->InputAt(input_index); + DataType::Type input_type = input->GetType(); + DCHECK(DataType::IsFloatingPointType(input_type)); + bool is_double = (input_type == DataType::Type::kFloat64); + DataType::Type converted_type = is_double ? DataType::Type::kInt64 : DataType::Type::kInt32; + ArtMethod* resolved_method = is_double + ? WellKnownClasses::java_lang_Double_doubleToRawLongBits + : WellKnownClasses::java_lang_Float_floatToRawIntBits; + DCHECK(resolved_method != nullptr); + DCHECK(resolved_method->IsIntrinsic()); + MethodReference target_method(nullptr, 0); + { + ScopedObjectAccess soa(Thread::Current()); + target_method = + MethodReference(resolved_method->GetDexFile(), resolved_method->GetDexMethodIndex()); + } + // Use arbitrary dispatch info that does not require the method argument. + HInvokeStaticOrDirect::DispatchInfo dispatch_info = { + MethodLoadKind::kBssEntry, + CodePtrLocation::kCallArtMethod, + /*method_load_data=*/ 0u + }; + HBasicBlock* block = invoke->GetBlock(); + ArenaAllocator* allocator = block->GetGraph()->GetAllocator(); + HInvokeStaticOrDirect* new_input = new (allocator) HInvokeStaticOrDirect( + allocator, + /*number_of_arguments=*/ 1u, + converted_type, + invoke->GetDexPc(), + /*method_reference=*/ MethodReference(nullptr, dex::kDexNoIndex), + resolved_method, + dispatch_info, + kStatic, + target_method, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + /*enable_intrinsic_opt=*/ true); + // The intrinsic has no side effects and does not need the environment. + new_input->SetSideEffects(SideEffects::None()); + IntrinsicOptimizations opt(new_input); + opt.SetDoesNotNeedEnvironment(); + new_input->SetRawInputAt(0u, input); + block->InsertInstructionBefore(new_input, invoke); + invoke->ReplaceInput(new_input, input_index); +} + } // namespace art diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 893cd04411..d74d5d2a40 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -19,6 +19,7 @@ #include "base/macros.h" #include "code_generator.h" +#include "intrinsics_list.h" #include "nodes.h" #include "optimization.h" #include "parallel_move_resolver.h" @@ -48,9 +49,7 @@ class IntrinsicVisitor : public ValueObject { case Intrinsics::k ## Name: \ Visit ## Name(invoke); \ return; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS // Do not put a default case. That way the compiler will complain if we missed a case. @@ -60,11 +59,8 @@ class IntrinsicVisitor : public ValueObject { // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, ...) \ - virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ - } -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + virtual void Visit##Name([[maybe_unused]] HInvoke* invoke) {} + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS static void MoveArguments(HInvoke* invoke, @@ -99,19 +95,20 @@ class IntrinsicVisitor : public ValueObject { codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); } - static void ComputeIntegerValueOfLocations(HInvoke* invoke, - CodeGenerator* codegen, - Location return_location, - Location first_argument_location); + static void ComputeValueOfLocations(HInvoke* invoke, + CodeGenerator* codegen, + int32_t low, + int32_t length, + Location return_location, + Location first_argument_location); - // Temporary data structure for holding Integer.valueOf data for generating code. - // We only use it if the boot image contains the IntegerCache objects. - struct IntegerValueOfInfo { + // Temporary data structure for holding BoxedType.valueOf data for generating code. + struct ValueOfInfo { static constexpr uint32_t kInvalidReference = static_cast<uint32_t>(-1); - IntegerValueOfInfo(); + ValueOfInfo(); - // Offset of the Integer.value field for initializing a newly allocated instance. + // Offset of the value field of the boxed object for initializing a newly allocated instance. uint32_t value_offset; // The low value in the cache. int32_t low; @@ -134,13 +131,18 @@ class IntrinsicVisitor : public ValueObject { }; }; - static IntegerValueOfInfo ComputeIntegerValueOfInfo( - HInvoke* invoke, const CompilerOptions& compiler_options); + static ValueOfInfo ComputeValueOfInfo( + HInvoke* invoke, + const CompilerOptions& compiler_options, + ArtField* value_field, + int32_t low, + int32_t length, + size_t base); static MemberOffset GetReferenceDisableIntrinsicOffset(); static MemberOffset GetReferenceSlowPathEnabledOffset(); static void CreateReferenceGetReferentLocations(HInvoke* invoke, CodeGenerator* codegen); - static void CreateReferenceRefersToLocations(HInvoke* invoke); + static void CreateReferenceRefersToLocations(HInvoke* invoke, CodeGenerator* codegen); protected: IntrinsicVisitor() {} @@ -220,6 +222,7 @@ class SystemArrayCopyOptimizations : public IntrinsicOptimizations { INTRINSIC_OPTIMIZATION(DestinationIsPrimitiveArray, 8); INTRINSIC_OPTIMIZATION(SourceIsNonPrimitiveArray, 9); INTRINSIC_OPTIMIZATION(SourceIsPrimitiveArray, 10); + INTRINSIC_OPTIMIZATION(SourcePositionIsDestinationPosition, 11); private: DISALLOW_COPY_AND_ASSIGN(SystemArrayCopyOptimizations); @@ -254,11 +257,9 @@ class VarHandleOptimizations : public IntrinsicOptimizations { // intrinsic to exploit e.g. no side-effects or exceptions, but otherwise not handled // by this architecture-specific intrinsics code generator. Eventually it is implemented // as a true method call. -#define UNIMPLEMENTED_INTRINSIC(Arch, Name) \ -void IntrinsicLocationsBuilder ## Arch::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ -} \ -void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ -} +#define UNIMPLEMENTED_INTRINSIC(Arch, Name) \ + void IntrinsicLocationsBuilder##Arch::Visit##Name([[maybe_unused]] HInvoke* invoke) {} \ + void IntrinsicCodeGenerator##Arch::Visit##Name([[maybe_unused]] HInvoke* invoke) {} // Defines a list of unreached intrinsics: that is, method calls that are recognized as // an intrinsic, and then always converted into HIR instructions before they reach any @@ -334,6 +335,11 @@ bool IsCallFreeIntrinsic(HInvoke* invoke, Codegenerator* codegen) { return false; } +// Insert a `Float.floatToRawIntBits()` or `Double.doubleToRawLongBits()` intrinsic for a +// given input. These fake calls are needed on arm and riscv64 to satisfy type consistency +// checks while passing certain FP args in core registers for direct @CriticalNative calls. +void InsertFpToIntegralIntrinsic(HInvokeStaticOrDirect* invoke, size_t input_index); + } // namespace art #endif // ART_COMPILER_OPTIMIZING_INTRINSICS_H_ diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index d2dbaa32e3..3183dac348 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -25,6 +25,7 @@ #include "data_type-inl.h" #include "entrypoints/quick/quick_entrypoints.h" #include "heap_poisoning.h" +#include "intrinsic_objects.h" #include "intrinsics.h" #include "intrinsics_utils.h" #include "lock_word.h" @@ -36,6 +37,7 @@ #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" #include "utils/arm64/assembler_arm64.h" +#include "well_known_classes.h" using namespace vixl::aarch64; // NOLINT(build/namespaces) @@ -91,11 +93,10 @@ class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 { public: ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp) : SlowPathCodeARM64(instruction), tmp_(tmp) { - DCHECK(gUseReadBarrier); - DCHECK(kUseBakerReadBarrier); } void EmitNativeCode(CodeGenerator* codegen_in) override { + DCHECK(codegen_in->EmitBakerReadBarrier()); CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in); LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); @@ -700,7 +701,8 @@ static void GenUnsafeGet(HInvoke* invoke, bool is_volatile, CodeGeneratorARM64* codegen) { LocationSummary* locations = invoke->GetLocations(); - DCHECK((type == DataType::Type::kInt32) || + DCHECK((type == DataType::Type::kInt8) || + (type == DataType::Type::kInt32) || (type == DataType::Type::kInt64) || (type == DataType::Type::kReference)); Location base_loc = locations->InAt(1); @@ -710,7 +712,7 @@ static void GenUnsafeGet(HInvoke* invoke, Location trg_loc = locations->Out(); Register trg = RegisterFrom(trg_loc, type); - if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) { + if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) { // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case. Register temp = WRegisterFrom(locations->GetTemp(0)); MacroAssembler* masm = codegen->GetVIXLAssembler(); @@ -738,22 +740,10 @@ static void GenUnsafeGet(HInvoke* invoke, } } -static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) { - switch (intrinsic) { - case Intrinsics::kUnsafeGetObject: - case Intrinsics::kUnsafeGetObjectVolatile: - case Intrinsics::kJdkUnsafeGetObject: - case Intrinsics::kJdkUnsafeGetObjectVolatile: - case Intrinsics::kJdkUnsafeGetObjectAcquire: - return true; - default: - break; - } - return false; -} - -static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); +static void CreateUnsafeGetLocations(ArenaAllocator* allocator, + HInvoke* invoke, + CodeGeneratorARM64* codegen) { + bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke); LocationSummary* locations = new (allocator) LocationSummary(invoke, can_call @@ -786,38 +776,44 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) VisitJdkUnsafeGetLongVolatile(invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) { - VisitJdkUnsafeGetObject(invoke); + VisitJdkUnsafeGetReference(invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - VisitJdkUnsafeGetObjectVolatile(invoke); + VisitJdkUnsafeGetReferenceVolatile(invoke); +} +void IntrinsicLocationsBuilderARM64::VisitUnsafeGetByte(HInvoke* invoke) { + VisitJdkUnsafeGetByte(invoke); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); + CreateUnsafeGetLocations(allocator_, invoke, codegen_); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); + CreateUnsafeGetLocations(allocator_, invoke, codegen_); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); + CreateUnsafeGetLocations(allocator_, invoke, codegen_); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); + CreateUnsafeGetLocations(allocator_, invoke, codegen_); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); + CreateUnsafeGetLocations(allocator_, invoke, codegen_); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); + CreateUnsafeGetLocations(allocator_, invoke, codegen_); +} +void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetReference(HInvoke* invoke) { + CreateUnsafeGetLocations(allocator_, invoke, codegen_); } -void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); +void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) { + CreateUnsafeGetLocations(allocator_, invoke, codegen_); } -void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); +void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) { + CreateUnsafeGetLocations(allocator_, invoke, codegen_); } -void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); +void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetByte(HInvoke* invoke) { + CreateUnsafeGetLocations(allocator_, invoke, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) { @@ -833,10 +829,13 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { VisitJdkUnsafeGetLongVolatile(invoke); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) { - VisitJdkUnsafeGetObject(invoke); + VisitJdkUnsafeGetReference(invoke); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - VisitJdkUnsafeGetObjectVolatile(invoke); + VisitJdkUnsafeGetReferenceVolatile(invoke); +} +void IntrinsicCodeGeneratorARM64::VisitUnsafeGetByte(HInvoke* invoke) { + VisitJdkUnsafeGetByte(invoke); } void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGet(HInvoke* invoke) { @@ -857,17 +856,20 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) { GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_); } -void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetObject(HInvoke* invoke) { +void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetReference(HInvoke* invoke) { GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_); } -void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) { +void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) { GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_); } -void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) { +void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) { GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_); } +void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetByte(HInvoke* invoke) { + GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/ false, codegen_); +} -static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) { +static void CreateUnsafePutLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. @@ -886,13 +888,13 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) { VisitJdkUnsafePutVolatile(invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) { - VisitJdkUnsafePutObject(invoke); + VisitJdkUnsafePutReference(invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { VisitJdkUnsafePutObjectOrdered(invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - VisitJdkUnsafePutObjectVolatile(invoke); + VisitJdkUnsafePutReferenceVolatile(invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) { VisitJdkUnsafePutLong(invoke); @@ -903,42 +905,48 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { VisitJdkUnsafePutLongVolatile(invoke); } +void IntrinsicLocationsBuilderARM64::VisitUnsafePutByte(HInvoke* invoke) { + VisitJdkUnsafePutByte(invoke); +} void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePut(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); + CreateUnsafePutLocations(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); + CreateUnsafePutLocations(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); + CreateUnsafePutLocations(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutRelease(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); + CreateUnsafePutLocations(allocator_, invoke); } -void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObject(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); +void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutReference(HInvoke* invoke) { + CreateUnsafePutLocations(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); + CreateUnsafePutLocations(allocator_, invoke); } -void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); +void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) { + CreateUnsafePutLocations(allocator_, invoke); } -void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); +void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) { + CreateUnsafePutLocations(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLong(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); + CreateUnsafePutLocations(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); + CreateUnsafePutLocations(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); + CreateUnsafePutLocations(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) { - CreateIntIntIntIntToVoid(allocator_, invoke); + CreateUnsafePutLocations(allocator_, invoke); +} +void IntrinsicLocationsBuilderARM64::VisitJdkUnsafePutByte(HInvoke* invoke) { + CreateUnsafePutLocations(allocator_, invoke); } static void GenUnsafePut(HInvoke* invoke, @@ -991,13 +999,13 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) { VisitJdkUnsafePutVolatile(invoke); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) { - VisitJdkUnsafePutObject(invoke); + VisitJdkUnsafePutReference(invoke); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { VisitJdkUnsafePutObjectOrdered(invoke); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - VisitJdkUnsafePutObjectVolatile(invoke); + VisitJdkUnsafePutReferenceVolatile(invoke); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) { VisitJdkUnsafePutLong(invoke); @@ -1008,6 +1016,9 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) { void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { VisitJdkUnsafePutLongVolatile(invoke); } +void IntrinsicCodeGeneratorARM64::VisitUnsafePutByte(HInvoke* invoke) { + VisitJdkUnsafePutByte(invoke); +} void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePut(HInvoke* invoke) { GenUnsafePut(invoke, @@ -1037,7 +1048,7 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutRelease(HInvoke* invoke) { /*is_ordered=*/ false, codegen_); } -void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObject(HInvoke* invoke) { +void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutReference(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kReference, /*is_volatile=*/ false, @@ -1051,14 +1062,14 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke /*is_ordered=*/ true, codegen_); } -void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) { +void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kReference, /*is_volatile=*/ true, /*is_ordered=*/ false, codegen_); } -void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) { +void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kReference, /*is_volatile=*/ true, @@ -1093,9 +1104,18 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) /*is_ordered=*/ false, codegen_); } +void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutByte(HInvoke* invoke) { + GenUnsafePut(invoke, + DataType::Type::kInt8, + /*is_volatile=*/ false, + /*is_ordered=*/ false, + codegen_); +} -static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) { - const bool can_call = gUseReadBarrier && IsUnsafeCASObject(invoke); +static void CreateUnsafeCASLocations(ArenaAllocator* allocator, + HInvoke* invoke, + CodeGeneratorARM64* codegen) { + const bool can_call = codegen->EmitReadBarrier() && IsUnsafeCASReference(invoke); LocationSummary* locations = new (allocator) LocationSummary(invoke, can_call @@ -1257,7 +1277,7 @@ static void GenerateCompareAndSet(CodeGeneratorARM64* codegen, // } // // Flag Z indicates whether `old_value == expected || old_value == expected2`. - // (Is `expected2` is not valid, the `old_value == expected2` part is not emitted.) + // (If `expected2` is not valid, the `old_value == expected2` part is not emitted.) vixl::aarch64::Label loop_head; if (strong) { @@ -1340,7 +1360,7 @@ class ReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 { // Mark the `old_value_` from the main path and compare with `expected_`. if (kUseBakerReadBarrier) { DCHECK(mark_old_value_slow_path_ == nullptr); - arm64_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_temp_, old_value_); + arm64_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_temp_, old_value_); } else { DCHECK(mark_old_value_slow_path_ != nullptr); __ B(mark_old_value_slow_path_->GetEntryLabel()); @@ -1394,7 +1414,7 @@ class ReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 { __ Bind(&mark_old_value); if (kUseBakerReadBarrier) { DCHECK(update_old_value_slow_path_ == nullptr); - arm64_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_, old_value_temp_); + arm64_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_, old_value_temp_); } else { // Note: We could redirect the `failure` above directly to the entry label and bind // the exit label in the main path, but the main path would need to access the @@ -1447,7 +1467,7 @@ static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM6 vixl::aarch64::Label* exit_loop = &exit_loop_label; vixl::aarch64::Label* cmp_failure = &exit_loop_label; - if (gUseReadBarrier && type == DataType::Type::kReference) { + if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) { // We need to store the `old_value` in a non-scratch register to make sure // the read barrier in the slow path does not clobber it. old_value = WRegisterFrom(locations->GetTemp(0)); // The old value from main path. @@ -1511,23 +1531,23 @@ void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCASLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCASObject(HInvoke* invoke) { // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc). - VisitJdkUnsafeCompareAndSetObject(invoke); + VisitJdkUnsafeCompareAndSetReference(invoke); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) { - CreateUnsafeCASLocations(allocator_, invoke); + CreateUnsafeCASLocations(allocator_, invoke, codegen_); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) { - CreateUnsafeCASLocations(allocator_, invoke); + CreateUnsafeCASLocations(allocator_, invoke, codegen_); } -void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { +void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - if (gUseReadBarrier && !kUseBakerReadBarrier) { + if (codegen_->EmitNonBakerReadBarrier()) { return; } - CreateUnsafeCASLocations(allocator_, invoke); - if (gUseReadBarrier) { + CreateUnsafeCASLocations(allocator_, invoke, codegen_); + if (codegen_->EmitReadBarrier()) { // We need two non-scratch temporary registers for read barrier. LocationSummary* locations = invoke->GetLocations(); if (kUseBakerReadBarrier) { @@ -1557,7 +1577,7 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASInt(HInvoke* invoke) { - // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc). + // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc). VisitJdkUnsafeCompareAndSetInt(invoke); } void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASLong(HInvoke* invoke) { @@ -1566,7 +1586,7 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCASObject(HInvoke* invoke) { // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc). - VisitJdkUnsafeCompareAndSetObject(invoke); + VisitJdkUnsafeCompareAndSetReference(invoke); } void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) { @@ -1575,9 +1595,9 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) { GenUnsafeCas(invoke, DataType::Type::kInt64, codegen_); } -void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { +void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier); GenUnsafeCas(invoke, DataType::Type::kReference, codegen_); } @@ -1672,6 +1692,138 @@ static void GenerateGetAndUpdate(CodeGeneratorARM64* codegen, __ Cbnz(store_result, &loop_label); } +static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator, + HInvoke* invoke, + CodeGeneratorARM64* codegen) { + const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, + can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +static void GenUnsafeGetAndUpdate(HInvoke* invoke, + DataType::Type type, + CodeGeneratorARM64* codegen, + GetAndUpdateOp get_and_update_op) { + MacroAssembler* masm = codegen->GetVIXLAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register out = RegisterFrom(locations->Out(), type); // Result. + Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. + Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. + Register arg = RegisterFrom(locations->InAt(3), type); // New value or addend. + Register tmp_ptr = XRegisterFrom(locations->GetTemp(0)); // Pointer to actual memory. + + // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps. + if (type == DataType::Type::kReference) { + DCHECK(get_and_update_op == GetAndUpdateOp::kSet); + // Mark card for object as a new value shall be stored. + bool new_value_can_be_null = true; // TODO: Worth finding out this information? + codegen->MarkGCCard(base, /*value=*/ arg, new_value_can_be_null); + } + + __ Add(tmp_ptr, base.X(), Operand(offset)); + GenerateGetAndUpdate(codegen, + get_and_update_op, + type, + std::memory_order_seq_cst, + tmp_ptr, + arg, + /*old_value=*/ out); + + if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) { + DCHECK(get_and_update_op == GetAndUpdateOp::kSet); + if (kUseBakerReadBarrier) { + codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out.W(), out.W()); + } else { + codegen->GenerateReadBarrierSlow( + invoke, + Location::RegisterLocation(out.GetCode()), + Location::RegisterLocation(out.GetCode()), + Location::RegisterLocation(base.GetCode()), + /*offset=*/ 0u, + /*index=*/ Location::RegisterLocation(offset.GetCode())); + } + } +} + +void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndAddInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddInt(invoke); +} +void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndAddLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddLong(invoke); +} +void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndSetInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetInt(invoke); +} +void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndSetLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetLong(invoke); +} +void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndSetObject(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetReference(invoke); +} + +void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_); +} +void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_); +} +void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_); +} +void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_); +} +void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndAddInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddInt(invoke); +} +void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndAddLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddLong(invoke); +} +void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndSetInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetInt(invoke); +} +void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndSetLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetLong(invoke); +} +void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndSetObject(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetReference(invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd); +} +void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd); +} +void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet); +} +void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet); +} +void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet); +} + void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = new (allocator_) LocationSummary(invoke, @@ -2272,7 +2424,7 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invo locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType())); } -static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { +static void CreateFPFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { DCHECK_EQ(invoke->GetNumberOfArguments(), 3U); DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType())); DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType())); @@ -2582,7 +2734,7 @@ static constexpr int32_t kSystemArrayCopyCharThreshold = 192; static void SetSystemArrayCopyLocationRequires(LocationSummary* locations, uint32_t at, HInstruction* input) { - HIntConstant* const_input = input->AsIntConstant(); + HIntConstant* const_input = input->AsIntConstantOrNull(); if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) { locations->SetInAt(at, Location::RequiresRegister()); } else { @@ -2593,8 +2745,8 @@ static void SetSystemArrayCopyLocationRequires(LocationSummary* locations, void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { // Check to see if we have known failures that will cause us to have to bail out // to the runtime, and just generate the runtime call directly. - HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); - HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant(); + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull(); + HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstantOrNull(); // The positions must be non-negative. if ((src_pos != nullptr && src_pos->GetValue() < 0) || @@ -2605,7 +2757,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { // The length must be >= 0 and not so long that we would (currently) prefer libcore's // native implementation. - HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull(); if (length != nullptr) { int32_t len = length->GetValue(); if (len < 0 || len > kSystemArrayCopyCharThreshold) { @@ -2897,14 +3049,14 @@ static constexpr int32_t kSystemArrayCopyThreshold = 128; void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - if (gUseReadBarrier && !kUseBakerReadBarrier) { + if (codegen_->EmitNonBakerReadBarrier()) { return; } // Check to see if we have known failures that will cause us to have to bail out // to the runtime, and just generate the runtime call directly. - HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); - HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull(); + HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull(); // The positions must be non-negative. if ((src_pos != nullptr && src_pos->GetValue() < 0) || @@ -2914,7 +3066,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { } // The length must be >= 0. - HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull(); if (length != nullptr) { int32_t len = length->GetValue(); if (len < 0 || len >= kSystemArrayCopyThreshold) { @@ -2949,7 +3101,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // Temporary register IP0, obtained from the VIXL scratch register // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64 // (because that register is clobbered by ReadBarrierMarkRegX @@ -2967,7 +3119,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier); MacroAssembler* masm = GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -3009,8 +3161,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { __ B(intrinsic_slow_path->GetEntryLabel(), eq); } // Checked when building locations. - DCHECK(!optimizations.GetDestinationIsSource() - || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue())); + DCHECK(!optimizations.GetDestinationIsSource() || + (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue())); } else { if (!optimizations.GetDestinationIsSource()) { __ Cmp(src, dest); @@ -3074,7 +3226,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { UseScratchRegisterScope temps(masm); Location temp3_loc; // Used only for Baker read barrier. Register temp3; - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { temp3_loc = locations->GetTemp(2); temp3 = WRegisterFrom(temp3_loc); } else { @@ -3087,7 +3239,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { if (!optimizations.GetSourceIsNonPrimitiveArray()) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, @@ -3108,7 +3260,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { /* use_load_acquire= */ false); __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp1` has been unpoisoned - // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // by the previous call to GenerateFieldLoadWithBakerReadBarrier. // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_); __ Ldrh(temp1, HeapOperand(temp1, primitive_offset)); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); @@ -3142,7 +3294,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { /* use_load_acquire= */ false); __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp2` has been unpoisoned - // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // by the previous call to GenerateFieldLoadWithBakerReadBarrier. // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); __ Ldrh(temp2, HeapOperand(temp2, primitive_offset)); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); @@ -3248,7 +3400,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, temp1_loc, @@ -3267,7 +3419,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { /* use_load_acquire= */ false); __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp2` has been unpoisoned - // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // by the previous call to GenerateFieldLoadWithBakerReadBarrier. } else { // /* HeapReference<Class> */ temp1 = src->klass_ __ Ldr(temp1, HeapOperand(src.W(), class_offset)); @@ -3298,7 +3450,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { __ Cbz(WRegisterFrom(length), &done); } - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // TODO: Also convert this intrinsic to the IsGcMarking strategy? // SystemArrayCopy implementation for Baker read barriers (see @@ -3465,18 +3617,34 @@ void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) { GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler()); } -void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) { - InvokeRuntimeCallingConvention calling_convention; - IntrinsicVisitor::ComputeIntegerValueOfLocations( - invoke, - codegen_, - calling_convention.GetReturnLocation(DataType::Type::kReference), - Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); -} - -void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = - IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); +#define VISIT_INTRINSIC(name, low, high, type, start_index) \ + void IntrinsicLocationsBuilderARM64::Visit ##name ##ValueOf(HInvoke* invoke) { \ + InvokeRuntimeCallingConvention calling_convention; \ + IntrinsicVisitor::ComputeValueOfLocations( \ + invoke, \ + codegen_, \ + low, \ + high - low + 1, \ + calling_convention.GetReturnLocation(DataType::Type::kReference), \ + Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); \ + } \ + void IntrinsicCodeGeneratorARM64::Visit ##name ##ValueOf(HInvoke* invoke) { \ + IntrinsicVisitor::ValueOfInfo info = \ + IntrinsicVisitor::ComputeValueOfInfo( \ + invoke, \ + codegen_->GetCompilerOptions(), \ + WellKnownClasses::java_lang_ ##name ##_value, \ + low, \ + high - low + 1, \ + start_index); \ + HandleValueOf(invoke, info, type); \ + } + BOXED_TYPES(VISIT_INTRINSIC) +#undef VISIT_INTRINSIC + +void IntrinsicCodeGeneratorARM64::HandleValueOf(HInvoke* invoke, + const IntrinsicVisitor::ValueOfInfo& info, + DataType::Type type) { LocationSummary* locations = invoke->GetLocations(); MacroAssembler* masm = GetVIXLAssembler(); @@ -3489,20 +3657,20 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); }; - if (invoke->InputAt(0)->IsConstant()) { + if (invoke->InputAt(0)->IsIntConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); if (static_cast<uint32_t>(value - info.low) < info.length) { - // Just embed the j.l.Integer in the code. - DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + // Just embed the object in the code. + DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference); codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { DCHECK(locations->CanCall()); - // Allocate and initialize a new j.l.Integer. - // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the + // Allocate and initialize a new object. + // TODO: If we JIT, we could allocate the object now, and store it in the // JIT object table. allocate_instance(); __ Mov(temp.W(), value); - __ Str(temp.W(), HeapOperand(out.W(), info.value_offset)); + codegen_->Store(type, temp.W(), HeapOperand(out.W(), info.value_offset)); // Class pointer and `value` final field stores require a barrier before publication. codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } @@ -3514,7 +3682,7 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { __ Cmp(out.W(), info.length); vixl::aarch64::Label allocate, done; __ B(&allocate, hs); - // If the value is within the bounds, load the j.l.Integer directly from the array. + // If the value is within the bounds, load the object directly from the array. codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference); MemOperand source = HeapOperand( temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference)); @@ -3522,9 +3690,9 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out); __ B(&done); __ Bind(&allocate); - // Otherwise allocate and initialize a new j.l.Integer. + // Otherwise allocate and initialize a new object. allocate_instance(); - __ Str(in.W(), HeapOperand(out.W(), info.value_offset)); + codegen_->Store(type, in.W(), HeapOperand(out.W(), info.value_offset)); // Class pointer and `value` final field stores require a barrier before publication. codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); __ Bind(&done); @@ -3534,7 +3702,7 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) { IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_); - if (gUseReadBarrier && kUseBakerReadBarrier && invoke->GetLocations() != nullptr) { + if (codegen_->EmitBakerReadBarrier() && invoke->GetLocations() != nullptr) { invoke->GetLocations()->AddTemp(Location::RequiresRegister()); } } @@ -3549,7 +3717,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) { SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); codegen_->AddSlowPath(slow_path); - if (gUseReadBarrier) { + if (codegen_->EmitReadBarrier()) { // Check self->GetWeakRefAccessEnabled(). UseScratchRegisterScope temps(masm); Register temp = temps.AcquireW(); @@ -3576,7 +3744,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) { // Load the value from the field. uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value(); - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, out, WRegisterFrom(obj), @@ -3594,7 +3762,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitReferenceRefersTo(HInvoke* invoke) { - IntrinsicVisitor::CreateReferenceRefersToLocations(invoke); + IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_); } void IntrinsicCodeGeneratorARM64::VisitReferenceRefersTo(HInvoke* invoke) { @@ -3616,7 +3784,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceRefersTo(HInvoke* invoke) { __ Cmp(tmp, other); - if (gUseReadBarrier) { + if (codegen_->EmitReadBarrier()) { DCHECK(kUseBakerReadBarrier); vixl::aarch64::Label calculate_result; @@ -3676,7 +3844,7 @@ void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) { locations->SetInAt(0, Location::Any()); } -void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } +void IntrinsicCodeGeneratorARM64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {} void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) { if (!codegen_->GetInstructionSetFeatures().HasCRC()) { @@ -4305,7 +4473,7 @@ static void GenerateMathFma(HInvoke* invoke, CodeGeneratorARM64* codegen) { } void IntrinsicLocationsBuilderARM64::VisitMathFmaDouble(HInvoke* invoke) { - CreateFPFPFPToFPCallLocations(allocator_, invoke); + CreateFPFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathFmaDouble(HInvoke* invoke) { @@ -4313,7 +4481,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathFmaDouble(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathFmaFloat(HInvoke* invoke) { - CreateFPFPFPToFPCallLocations(allocator_, invoke); + CreateFPFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathFmaFloat(HInvoke* invoke) { @@ -4695,24 +4863,24 @@ static void GenerateVarHandleTarget(HInvoke* invoke, __ Mov(target.offset, target_field->GetOffset().Uint32Value()); } else { // For static fields, we need to fill the `target.object` with the declaring class, - // so we can use `target.object` as temporary for the `ArtMethod*`. For instance fields, - // we do not need the declaring class, so we can forget the `ArtMethod*` when - // we load the `target.offset`, so use the `target.offset` to hold the `ArtMethod*`. - Register method = (expected_coordinates_count == 0) ? target.object : target.offset; + // so we can use `target.object` as temporary for the `ArtField*`. For instance fields, + // we do not need the declaring class, so we can forget the `ArtField*` when + // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`. + Register field = (expected_coordinates_count == 0) ? target.object : target.offset; const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset(); const MemberOffset offset_offset = ArtField::OffsetOffset(); - // Load the ArtField, the offset and, if needed, declaring class. - __ Ldr(method.X(), HeapOperand(varhandle, art_field_offset.Int32Value())); - __ Ldr(target.offset, MemOperand(method.X(), offset_offset.Int32Value())); + // Load the ArtField*, the offset and, if needed, declaring class. + __ Ldr(field.X(), HeapOperand(varhandle, art_field_offset.Int32Value())); + __ Ldr(target.offset, MemOperand(field.X(), offset_offset.Int32Value())); if (expected_coordinates_count == 0u) { codegen->GenerateGcRootFieldLoad(invoke, LocationFrom(target.object), - method.X(), + field.X(), ArtField::DeclaringClassOffset().Int32Value(), - /*fixup_label=*/ nullptr, - gCompilerReadBarrierOption); + /*fixup_label=*/nullptr, + codegen->GetCompilerReadBarrierOption()); } } } else { @@ -4732,7 +4900,8 @@ static void GenerateVarHandleTarget(HInvoke* invoke, } } -static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) { +static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke, + CodeGeneratorARM64* codegen) { size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke); DataType::Type return_type = invoke->GetType(); @@ -4766,7 +4935,7 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) { } // Add a temporary for offset. - if ((gUseReadBarrier && !kUseBakerReadBarrier) && + if (codegen->EmitNonBakerReadBarrier() && GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields. // To preserve the offset value across the non-Baker read barrier slow path // for loading the declaring class, use a fixed callee-save register. @@ -4783,13 +4952,13 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) { return locations; } -static void CreateVarHandleGetLocations(HInvoke* invoke) { +static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorARM64* codegen) { VarHandleOptimizations optimizations(invoke); if (optimizations.GetDoNotIntrinsify()) { return; } - if ((gUseReadBarrier && !kUseBakerReadBarrier) && + if (codegen->EmitNonBakerReadBarrier() && invoke->GetType() == DataType::Type::kReference && invoke->GetIntrinsic() != Intrinsics::kVarHandleGet && invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) { @@ -4799,7 +4968,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke) { return; } - CreateVarHandleCommonLocations(invoke); + CreateVarHandleCommonLocations(invoke, codegen); } static void GenerateVarHandleGet(HInvoke* invoke, @@ -4829,7 +4998,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, DCHECK(use_load_acquire || order == std::memory_order_relaxed); // Load the value from the target location. - if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) { + if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) { // Piggy-back on the field load path using introspection for the Baker read barrier. // The `target.offset` is a temporary, use it for field address. Register tmp_ptr = target.offset.X(); @@ -4882,7 +5051,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, } void IntrinsicLocationsBuilderARM64::VisitVarHandleGet(HInvoke* invoke) { - CreateVarHandleGetLocations(invoke); + CreateVarHandleGetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGet(HInvoke* invoke) { @@ -4890,7 +5059,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGet(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetOpaque(HInvoke* invoke) { - CreateVarHandleGetLocations(invoke); + CreateVarHandleGetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetOpaque(HInvoke* invoke) { @@ -4898,7 +5067,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetOpaque(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAcquire(HInvoke* invoke) { - CreateVarHandleGetLocations(invoke); + CreateVarHandleGetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAcquire(HInvoke* invoke) { @@ -4906,20 +5075,20 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAcquire(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetVolatile(HInvoke* invoke) { - CreateVarHandleGetLocations(invoke); + CreateVarHandleGetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetVolatile(HInvoke* invoke) { GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst); } -static void CreateVarHandleSetLocations(HInvoke* invoke) { +static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorARM64* codegen) { VarHandleOptimizations optimizations(invoke); if (optimizations.GetDoNotIntrinsify()) { return; } - CreateVarHandleCommonLocations(invoke); + CreateVarHandleCommonLocations(invoke, codegen); } static void GenerateVarHandleSet(HInvoke* invoke, @@ -4991,7 +5160,7 @@ static void GenerateVarHandleSet(HInvoke* invoke, } void IntrinsicLocationsBuilderARM64::VisitVarHandleSet(HInvoke* invoke) { - CreateVarHandleSetLocations(invoke); + CreateVarHandleSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorARM64::VisitVarHandleSet(HInvoke* invoke) { @@ -4999,7 +5168,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleSet(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitVarHandleSetOpaque(HInvoke* invoke) { - CreateVarHandleSetLocations(invoke); + CreateVarHandleSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorARM64::VisitVarHandleSetOpaque(HInvoke* invoke) { @@ -5007,7 +5176,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleSetOpaque(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitVarHandleSetRelease(HInvoke* invoke) { - CreateVarHandleSetLocations(invoke); + CreateVarHandleSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorARM64::VisitVarHandleSetRelease(HInvoke* invoke) { @@ -5015,14 +5184,16 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleSetRelease(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitVarHandleSetVolatile(HInvoke* invoke) { - CreateVarHandleSetLocations(invoke); + CreateVarHandleSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorARM64::VisitVarHandleSetVolatile(HInvoke* invoke) { GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst); } -static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, bool return_success) { +static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, + CodeGeneratorARM64* codegen, + bool return_success) { VarHandleOptimizations optimizations(invoke); if (optimizations.GetDoNotIntrinsify()) { return; @@ -5030,8 +5201,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo uint32_t number_of_arguments = invoke->GetNumberOfArguments(); DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u); - if ((gUseReadBarrier && !kUseBakerReadBarrier) && - value_type == DataType::Type::kReference) { + if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) { // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores // the passed reference and reloads it from the field. This breaks the read barriers // in slow path in different ways. The marked old value may not actually be a to-space @@ -5042,9 +5212,9 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo return; } - LocationSummary* locations = CreateVarHandleCommonLocations(invoke); + LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen); - if (gUseReadBarrier && !kUseBakerReadBarrier) { + if (codegen->EmitNonBakerReadBarrier()) { // We need callee-save registers for both the class object and offset instead of // the temporaries reserved in CreateVarHandleCommonLocations(). static_assert(POPCOUNT(kArm64CalleeSaveRefSpills) >= 2u); @@ -5085,7 +5255,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo locations->AddTemp(Location::RequiresRegister()); } } - if (gUseReadBarrier && value_type == DataType::Type::kReference) { + if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) { // Add a temporary for the `old_value_temp` in slow path. locations->AddTemp(Location::RequiresRegister()); } @@ -5151,7 +5321,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, // except for references that need the offset for the read barrier. UseScratchRegisterScope temps(masm); Register tmp_ptr = target.offset.X(); - if (gUseReadBarrier && value_type == DataType::Type::kReference) { + if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) { tmp_ptr = temps.AcquireX(); } __ Add(tmp_ptr, target.object.X(), target.offset.X()); @@ -5234,7 +5404,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, vixl::aarch64::Label* exit_loop = &exit_loop_label; vixl::aarch64::Label* cmp_failure = &exit_loop_label; - if (gUseReadBarrier && value_type == DataType::Type::kReference) { + if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) { // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked // reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register. size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke); @@ -5301,7 +5471,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, } void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchange(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false); } void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchange(HInvoke* invoke) { @@ -5310,7 +5480,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchange(HInvoke* invo } void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false); } void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) { @@ -5319,7 +5489,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeAcquire(HInvok } void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false); } void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) { @@ -5328,7 +5498,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndExchangeRelease(HInvok } void IntrinsicLocationsBuilderARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); } void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) { @@ -5337,7 +5507,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleCompareAndSet(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); } void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) { @@ -5346,7 +5516,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSet(HInvoke* invok } void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); } void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) { @@ -5355,7 +5525,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke } void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); } void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) { @@ -5364,7 +5534,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* } void IntrinsicLocationsBuilderARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); } void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) { @@ -5373,21 +5543,21 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleWeakCompareAndSetRelease(HInvoke } static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, + CodeGeneratorARM64* codegen, GetAndUpdateOp get_and_update_op) { VarHandleOptimizations optimizations(invoke); if (optimizations.GetDoNotIntrinsify()) { return; } - if ((gUseReadBarrier && !kUseBakerReadBarrier) && - invoke->GetType() == DataType::Type::kReference) { + if (invoke->GetType() == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) { // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores // the passed reference and reloads it from the field, thus seeing the new value // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084 return; } - LocationSummary* locations = CreateVarHandleCommonLocations(invoke); + LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen); size_t old_temp_count = locations->GetTempCount(); DCHECK_EQ(old_temp_count, (GetExpectedVarHandleCoordinatesCount(invoke) == 0) ? 2u : 1u); @@ -5455,8 +5625,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, // except for references that need the offset for the non-Baker read barrier. UseScratchRegisterScope temps(masm); Register tmp_ptr = target.offset.X(); - if ((gUseReadBarrier && !kUseBakerReadBarrier) && - value_type == DataType::Type::kReference) { + if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) { tmp_ptr = temps.AcquireX(); } __ Add(tmp_ptr, target.object.X(), target.offset.X()); @@ -5485,8 +5654,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, // the new value unless it is zero bit pattern (+0.0f or +0.0) and need another one // in GenerateGetAndUpdate(). We have allocated a normal temporary to handle that. old_value = CPURegisterFrom(locations->GetTemp(1u), load_store_type); - } else if ((gUseReadBarrier && kUseBakerReadBarrier) && - value_type == DataType::Type::kReference) { + } else if (value_type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) { // Load the old value initially to a scratch register. // We shall move it to `out` later with a read barrier. old_value = temps.AcquireW(); @@ -5533,9 +5701,9 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, __ Sxtb(out.W(), old_value.W()); } else if (value_type == DataType::Type::kInt16) { __ Sxth(out.W(), old_value.W()); - } else if (gUseReadBarrier && value_type == DataType::Type::kReference) { + } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) { if (kUseBakerReadBarrier) { - codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(out.W(), old_value.W()); + codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out.W(), old_value.W()); } else { codegen->GenerateReadBarrierSlow( invoke, @@ -5554,7 +5722,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSet(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSet(HInvoke* invoke) { @@ -5562,7 +5730,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSet(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) { @@ -5570,7 +5738,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) { @@ -5578,7 +5746,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) { @@ -5586,7 +5754,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAdd(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) { @@ -5594,7 +5762,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) { @@ -5602,7 +5770,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndAddRelease(HInvoke* invoke } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) { @@ -5610,7 +5778,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) { @@ -5618,7 +5786,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) { @@ -5626,7 +5794,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) { @@ -5634,7 +5802,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) { @@ -5642,7 +5810,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) { @@ -5650,7 +5818,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) { @@ -5658,7 +5826,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) { @@ -5666,7 +5834,7 @@ void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* } void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor); } void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h index a0ccf87f7b..50e2e43f5f 100644 --- a/compiler/optimizing/intrinsics_arm64.h +++ b/compiler/optimizing/intrinsics_arm64.h @@ -19,6 +19,7 @@ #include "base/macros.h" #include "intrinsics.h" +#include "intrinsics_list.h" namespace vixl { namespace aarch64 { @@ -47,9 +48,7 @@ class IntrinsicLocationsBuilderARM64 final : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether @@ -72,9 +71,7 @@ class IntrinsicCodeGeneratorARM64 final : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS private: @@ -82,6 +79,10 @@ class IntrinsicCodeGeneratorARM64 final : public IntrinsicVisitor { ArenaAllocator* GetAllocator(); + void HandleValueOf(HInvoke* invoke, + const IntrinsicVisitor::ValueOfInfo& info, + DataType::Type type); + CodeGeneratorARM64* const codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorARM64); diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 266b5bc799..5f4de8cda2 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -22,6 +22,7 @@ #include "code_generator_arm_vixl.h" #include "common_arm.h" #include "heap_poisoning.h" +#include "intrinsic_objects.h" #include "intrinsics.h" #include "intrinsics_utils.h" #include "lock_word.h" @@ -31,6 +32,7 @@ #include "mirror/string-inl.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" +#include "well_known_classes.h" #include "aarch32/constants-aarch32.h" @@ -120,11 +122,10 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { public: explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) { - DCHECK(gUseReadBarrier); - DCHECK(kUseBakerReadBarrier); } void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(codegen->EmitBakerReadBarrier()); CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); ArmVIXLAssembler* assembler = arm_codegen->GetAssembler(); LocationSummary* locations = instruction_->GetLocations(); @@ -1242,7 +1243,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invo void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - if (gUseReadBarrier && !kUseBakerReadBarrier) { + if (codegen_->EmitNonBakerReadBarrier()) { return; } @@ -1252,9 +1253,9 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { return; } - HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); - HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); - HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull(); + HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull(); + HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull(); if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) { locations->SetInAt(1, Location::RequiresRegister()); @@ -1265,7 +1266,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) { locations->SetInAt(4, Location::RequiresRegister()); } - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // Temporary register IP cannot be used in // ReadBarrierSystemArrayCopySlowPathARM (because that register // is clobbered by ReadBarrierMarkRegX entry points). Get an extra @@ -1339,7 +1340,7 @@ static void CheckPosition(ArmVIXLAssembler* assembler, void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier); ArmVIXLAssembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1453,7 +1454,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { if (!optimizations.GetSourceIsNonPrimitiveArray()) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( @@ -1464,7 +1465,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false); __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp1` has been unpoisoned - // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // by the previous call to GenerateFieldLoadWithBakerReadBarrier. // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_); __ Ldrh(temp1, MemOperand(temp1, primitive_offset)); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); @@ -1488,7 +1489,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check= */ false); __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp2` has been unpoisoned - // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // by the previous call to GenerateFieldLoadWithBakerReadBarrier. // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); __ Ldrh(temp2, MemOperand(temp2, primitive_offset)); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); @@ -1584,7 +1585,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false); @@ -1593,7 +1594,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false); __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp3` has been unpoisoned - // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // by the previous call to GenerateFieldLoadWithBakerReadBarrier. } else { // /* HeapReference<Class> */ temp1 = src->klass_ __ Ldr(temp1, MemOperand(src, class_offset)); @@ -1621,7 +1622,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target= */ false); } - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // TODO: Also convert this intrinsic to the IsGcMarking strategy? // SystemArrayCopy implementation for Baker read barriers (see @@ -2433,18 +2434,35 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) { __ Vrintm(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); } -void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { - InvokeRuntimeCallingConventionARMVIXL calling_convention; - IntrinsicVisitor::ComputeIntegerValueOfLocations( - invoke, - codegen_, - LocationFrom(r0), - LocationFrom(calling_convention.GetRegisterAt(0))); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = - IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); +#define VISIT_INTRINSIC(name, low, high, type, start_index) \ + void IntrinsicLocationsBuilderARMVIXL::Visit ##name ##ValueOf(HInvoke* invoke) { \ + InvokeRuntimeCallingConventionARMVIXL calling_convention; \ + IntrinsicVisitor::ComputeValueOfLocations( \ + invoke, \ + codegen_, \ + low, \ + high - low + 1, \ + LocationFrom(r0), \ + LocationFrom(calling_convention.GetRegisterAt(0))); \ + } \ + void IntrinsicCodeGeneratorARMVIXL::Visit ##name ##ValueOf(HInvoke* invoke) { \ + IntrinsicVisitor::ValueOfInfo info = \ + IntrinsicVisitor::ComputeValueOfInfo( \ + invoke, \ + codegen_->GetCompilerOptions(), \ + WellKnownClasses::java_lang_ ##name ##_value, \ + low, \ + high - low + 1, \ + start_index); \ + HandleValueOf(invoke, info, type); \ + } + BOXED_TYPES(VISIT_INTRINSIC) +#undef VISIT_INTRINSIC + + +void IntrinsicCodeGeneratorARMVIXL::HandleValueOf(HInvoke* invoke, + const IntrinsicVisitor::ValueOfInfo& info, + DataType::Type type) { LocationSummary* locations = invoke->GetLocations(); ArmVIXLAssembler* const assembler = GetAssembler(); @@ -2457,20 +2475,20 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); }; - if (invoke->InputAt(0)->IsConstant()) { + if (invoke->InputAt(0)->IsIntConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); if (static_cast<uint32_t>(value - info.low) < info.length) { - // Just embed the j.l.Integer in the code. - DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + // Just embed the object in the code. + DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference); codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { DCHECK(locations->CanCall()); - // Allocate and initialize a new j.l.Integer. - // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the + // Allocate and initialize a new object. + // TODO: If we JIT, we could allocate the object now, and store it in the // JIT object table. allocate_instance(); __ Mov(temp, value); - assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset); + assembler->StoreToOffset(GetStoreOperandType(type), temp, out, info.value_offset); // Class pointer and `value` final field stores require a barrier before publication. codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } @@ -2482,15 +2500,15 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { __ Cmp(out, info.length); vixl32::Label allocate, done; __ B(hs, &allocate, /* is_far_target= */ false); - // If the value is within the bounds, load the j.l.Integer directly from the array. + // If the value is within the bounds, load the object directly from the array. codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference); codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out); assembler->MaybeUnpoisonHeapReference(out); __ B(&done); __ Bind(&allocate); - // Otherwise allocate and initialize a new j.l.Integer. + // Otherwise allocate and initialize a new object. allocate_instance(); - assembler->StoreToOffset(kStoreWord, in, out, info.value_offset); + assembler->StoreToOffset(GetStoreOperandType(type), in, out, info.value_offset); // Class pointer and `value` final field stores require a barrier before publication. codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); __ Bind(&done); @@ -2511,7 +2529,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) { SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); codegen_->AddSlowPath(slow_path); - if (gUseReadBarrier) { + if (codegen_->EmitReadBarrier()) { // Check self->GetWeakRefAccessEnabled(). UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); @@ -2539,7 +2557,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) { // Load the value from the field. uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value(); - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, out, RegisterFrom(obj), @@ -2560,7 +2578,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) { - IntrinsicVisitor::CreateReferenceRefersToLocations(invoke); + IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) { @@ -2587,7 +2605,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) { assembler->MaybeUnpoisonHeapReference(tmp); codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile. - if (gUseReadBarrier) { + if (codegen_->EmitReadBarrier()) { DCHECK(kUseBakerReadBarrier); vixl32::Label calculate_result; @@ -2613,7 +2631,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) { __ Bind(&calculate_result); } else { - DCHECK(!gUseReadBarrier); + DCHECK(!codegen_->EmitReadBarrier()); __ Sub(out, tmp, other); } @@ -2653,7 +2671,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) { locations->SetInAt(0, Location::Any()); } -void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } +void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {} void IntrinsicLocationsBuilderARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) { CreateIntIntToIntSlowPathCallLocations(allocator_, invoke); @@ -2732,7 +2750,7 @@ static void GenerateIntrinsicGet(HInvoke* invoke, } break; case DataType::Type::kReference: - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen->EmitBakerReadBarrier()) { // Piggy-back on the field load path using introspection for the Baker read barrier. vixl32::Register temp = RegisterFrom(maybe_temp); __ Add(temp, base, offset); @@ -2777,32 +2795,18 @@ static void GenerateIntrinsicGet(HInvoke* invoke, codegen->GenerateMemoryBarrier( seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny); } - if (type == DataType::Type::kReference && !(gUseReadBarrier && kUseBakerReadBarrier)) { + if (type == DataType::Type::kReference && !codegen->EmitBakerReadBarrier()) { Location base_loc = LocationFrom(base); Location index_loc = LocationFrom(offset); codegen->MaybeGenerateReadBarrierSlow(invoke, out, out, base_loc, /* offset=*/ 0u, index_loc); } } -static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) { - switch (intrinsic) { - case Intrinsics::kUnsafeGetObject: - case Intrinsics::kUnsafeGetObjectVolatile: - case Intrinsics::kJdkUnsafeGetObject: - case Intrinsics::kJdkUnsafeGetObjectVolatile: - case Intrinsics::kJdkUnsafeGetObjectAcquire: - return true; - default: - break; - } - return false; -} - static void CreateUnsafeGetLocations(HInvoke* invoke, CodeGeneratorARMVIXL* codegen, DataType::Type type, bool atomic) { - bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); + bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke); ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); LocationSummary* locations = new (allocator) LocationSummary(invoke, @@ -2818,7 +2822,7 @@ static void CreateUnsafeGetLocations(HInvoke* invoke, locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); - if ((gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) || + if ((type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) || (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier, @@ -2837,7 +2841,7 @@ static void GenUnsafeGet(HInvoke* invoke, vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only. Location out = locations->Out(); Location maybe_temp = Location::NoLocation(); - if ((gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) || + if ((type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) || (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) { maybe_temp = locations->GetTemp(0); } @@ -2887,19 +2891,27 @@ void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) { - VisitJdkUnsafeGetObject(invoke); + VisitJdkUnsafeGetReference(invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) { - VisitJdkUnsafeGetObject(invoke); + VisitJdkUnsafeGetReference(invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - VisitJdkUnsafeGetObjectVolatile(invoke); + VisitJdkUnsafeGetReferenceVolatile(invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - VisitJdkUnsafeGetObjectVolatile(invoke); + VisitJdkUnsafeGetReferenceVolatile(invoke); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetByte(HInvoke* invoke) { + VisitJdkUnsafeGetByte(invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetByte(HInvoke* invoke) { + VisitJdkUnsafeGetByte(invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGet(HInvoke* invoke) { @@ -2956,33 +2968,42 @@ void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke invoke, codegen_, DataType::Type::kInt64, std::memory_order_acquire, /*atomic=*/ true); } -void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetObject(HInvoke* invoke) { +void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReference(HInvoke* invoke) { CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false); } -void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetObject(HInvoke* invoke) { +void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReference(HInvoke* invoke) { GenUnsafeGet( invoke, codegen_, DataType::Type::kReference, std::memory_order_relaxed, /*atomic=*/ false); } -void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) { +void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) { CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true); } -void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) { +void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) { GenUnsafeGet( invoke, codegen_, DataType::Type::kReference, std::memory_order_seq_cst, /*atomic=*/ true); } -void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) { +void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) { CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true); } -void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) { +void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) { GenUnsafeGet( invoke, codegen_, DataType::Type::kReference, std::memory_order_acquire, /*atomic=*/ true); } +void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetByte(HInvoke* invoke) { + CreateUnsafeGetLocations(invoke, codegen_, DataType::Type::kInt8, /*atomic=*/ false); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetByte(HInvoke* invoke) { + GenUnsafeGet( + invoke, codegen_, DataType::Type::kInt8, std::memory_order_relaxed, /*atomic=*/ false); +} + static void GenerateIntrinsicSet(CodeGeneratorARMVIXL* codegen, DataType::Type type, std::memory_order order, @@ -3156,11 +3177,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) { VisitJdkUnsafePutVolatile(invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) { - VisitJdkUnsafePutObject(invoke); + VisitJdkUnsafePutReference(invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) { - VisitJdkUnsafePutObject(invoke); + VisitJdkUnsafePutReference(invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) { @@ -3172,11 +3193,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - VisitJdkUnsafePutObjectVolatile(invoke); + VisitJdkUnsafePutReferenceVolatile(invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - VisitJdkUnsafePutObjectVolatile(invoke); + VisitJdkUnsafePutReferenceVolatile(invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) { @@ -3203,6 +3224,14 @@ void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) VisitJdkUnsafePutLongVolatile(invoke); } +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutByte(HInvoke* invoke) { + VisitJdkUnsafePutByte(invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutByte(HInvoke* invoke) { + VisitJdkUnsafePutByte(invoke); +} + void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePut(HInvoke* invoke) { CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ false); } @@ -3215,6 +3244,18 @@ void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePut(HInvoke* invoke) { codegen_); } +void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutByte(HInvoke* invoke) { + CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt8, /*atomic=*/ false); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutByte(HInvoke* invoke) { + GenUnsafePut(invoke, + DataType::Type::kInt8, + std::memory_order_relaxed, + /*atomic=*/ false, + codegen_); +} + void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutOrdered(HInvoke* invoke) { CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kInt32, /*atomic=*/ true); } @@ -3251,11 +3292,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutRelease(HInvoke* invoke) { codegen_); } -void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutObject(HInvoke* invoke) { +void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReference(HInvoke* invoke) { CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ false); } -void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObject(HInvoke* invoke) { +void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReference(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kReference, std::memory_order_relaxed, @@ -3275,11 +3316,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObjectOrdered(HInvoke* invo codegen_); } -void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) { +void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) { CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true); } -void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) { +void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kReference, std::memory_order_seq_cst, @@ -3287,11 +3328,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObjectVolatile(HInvoke* inv codegen_); } -void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) { +void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) { CreateUnsafePutLocations(invoke, codegen_, DataType::Type::kReference, /*atomic=*/ true); } -void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) { +void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kReference, std::memory_order_release, @@ -3470,7 +3511,7 @@ static void GenerateCompareAndSet(CodeGeneratorARMVIXL* codegen, // branch goes to the read barrier slow path that clobbers `success` anyway. bool init_failure_for_cmp = success.IsValid() && - !(gUseReadBarrier && type == DataType::Type::kReference && expected.IsRegister()); + !(type == DataType::Type::kReference && codegen->EmitReadBarrier() && expected.IsRegister()); // Instruction scheduling: Loading a constant between LDREX* and using the loaded value // is essentially free, so prepare the failure value here if we can. bool init_failure_for_cmp_early = @@ -3574,7 +3615,7 @@ class ReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL { // Mark the `old_value_` from the main path and compare with `expected_`. if (kUseBakerReadBarrier) { DCHECK(mark_old_value_slow_path_ == nullptr); - arm_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_temp_, old_value_); + arm_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_temp_, old_value_); } else { DCHECK(mark_old_value_slow_path_ != nullptr); __ B(mark_old_value_slow_path_->GetEntryLabel()); @@ -3627,7 +3668,7 @@ class ReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL { __ Bind(&mark_old_value); if (kUseBakerReadBarrier) { DCHECK(update_old_value_slow_path_ == nullptr); - arm_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_, old_value_temp_); + arm_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_, old_value_temp_); } else { // Note: We could redirect the `failure` above directly to the entry label and bind // the exit label in the main path, but the main path would need to access the @@ -3654,8 +3695,9 @@ class ReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL { SlowPathCodeARMVIXL* update_old_value_slow_path_; }; -static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) { - const bool can_call = gUseReadBarrier && IsUnsafeCASObject(invoke); +static void CreateUnsafeCASLocations(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) { + const bool can_call = codegen->EmitReadBarrier() && IsUnsafeCASReference(invoke); + ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); LocationSummary* locations = new (allocator) LocationSummary(invoke, can_call @@ -3706,7 +3748,7 @@ static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMV vixl32::Label* exit_loop = &exit_loop_label; vixl32::Label* cmp_failure = &exit_loop_label; - if (gUseReadBarrier && type == DataType::Type::kReference) { + if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) { // If marking, check if the stored reference is a from-space reference to the same // object as the to-space reference `expected`. If so, perform a custom CAS loop. ReadBarrierCasSlowPathARMVIXL* slow_path = @@ -3762,19 +3804,19 @@ void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCASInt(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCASObject(HInvoke* invoke) { // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc). - VisitJdkUnsafeCompareAndSetObject(invoke); + VisitJdkUnsafeCompareAndSetReference(invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) { - CreateUnsafeCASLocations(allocator_, invoke); + CreateUnsafeCASLocations(invoke, codegen_); } -void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { +void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers (b/173104084). - if (gUseReadBarrier && !kUseBakerReadBarrier) { + if (codegen_->EmitNonBakerReadBarrier()) { return; } - CreateUnsafeCASLocations(allocator_, invoke); + CreateUnsafeCASLocations(invoke, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) { @@ -3790,15 +3832,15 @@ void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCASInt(HInvoke* invoke) { } void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCASObject(HInvoke* invoke) { // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc). - VisitJdkUnsafeCompareAndSetObject(invoke); + VisitJdkUnsafeCompareAndSetReference(invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) { GenUnsafeCas(invoke, DataType::Type::kInt32, codegen_); } -void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { +void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers (b/173104084). - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier); GenUnsafeCas(invoke, DataType::Type::kReference, codegen_); } @@ -3944,6 +3986,172 @@ static void GenerateGetAndUpdate(CodeGeneratorARMVIXL* codegen, __ B(ne, &loop_label); } +static void CreateUnsafeGetAndUpdateLocations(HInvoke* invoke, + CodeGeneratorARMVIXL* codegen, + DataType::Type type, + GetAndUpdateOp get_and_update_op) { + const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke); + ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, + can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + + size_t num_temps = 1u; // We always need `tmp_ptr`. + if (get_and_update_op == GetAndUpdateOp::kAdd) { + // Add `maybe_temp` used for the new value in `GenerateGetAndUpdate()`. + num_temps += (type == DataType::Type::kInt64) ? 2u : 1u; + if (type == DataType::Type::kInt64) { + // There are enough available registers but the register allocator can fail to allocate + // them correctly because it can block register pairs by single-register inputs and temps. + // To work around this limitation, use a fixed register pair for both the output as well + // as the offset which is not needed anymore after the address calculation. + // (Alternatively, we could set up distinct fixed locations for `offset`, `arg` and `out`.) + locations->SetInAt(2, LocationFrom(r0, r1)); + locations->UpdateOut(LocationFrom(r0, r1)); + } + } + locations->AddRegisterTemps(num_temps); +} + +static void GenUnsafeGetAndUpdate(HInvoke* invoke, + CodeGeneratorARMVIXL* codegen, + DataType::Type type, + GetAndUpdateOp get_and_update_op) { + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Location out = locations->Out(); // Result. + vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer. + vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B). + Location arg = locations->InAt(3); // New value or addend. + vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0)); // Pointer to actual memory. + Location maybe_temp = Location::NoLocation(); + if (get_and_update_op == GetAndUpdateOp::kAdd) { + maybe_temp = (type == DataType::Type::kInt64) + ? LocationFrom(RegisterFrom(locations->GetTemp(1)), RegisterFrom(locations->GetTemp(2))) + : locations->GetTemp(1); + } + + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + + if (type == DataType::Type::kReference) { + DCHECK(get_and_update_op == GetAndUpdateOp::kSet); + // Mark card for object as a new value shall be stored. + bool new_value_can_be_null = true; // TODO: Worth finding out this information? + vixl32::Register card = tmp_ptr; // Use the `tmp_ptr` also as the `card` temporary. + codegen->MarkGCCard(temp, card, base, /*value=*/ RegisterFrom(arg), new_value_can_be_null); + } + + // Note: UnsafeGetAndUpdate operations are sequentially consistent, requiring + // a barrier before and after the raw load/store-exclusive operation. + + __ Add(tmp_ptr, base, Operand(offset)); + codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + GenerateGetAndUpdate(codegen, + get_and_update_op, + type, + tmp_ptr, + arg, + /*old_value=*/ out, + /*store_result=*/ temp, + maybe_temp, + /*maybe_vreg_temp=*/ Location::NoLocation()); + codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + + if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) { + DCHECK(get_and_update_op == GetAndUpdateOp::kSet); + if (kUseBakerReadBarrier) { + codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(out), RegisterFrom(out)); + } else { + codegen->GenerateReadBarrierSlow( + invoke, + out, + out, + Location::RegisterLocation(base.GetCode()), + /*offset=*/ 0u, + /*index=*/ Location::RegisterLocation(offset.GetCode())); + } + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndAddInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddInt(invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndAddLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddLong(invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetInt(invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetLong(invoke); +} +void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetAndSetObject(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetReference(invoke); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd); +} +void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd); +} +void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet); +} +void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet); +} +void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations( + invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndAddInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddInt(invoke); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndAddLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddLong(invoke); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetInt(invoke); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetLong(invoke); +} +void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetAndSetObject(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetReference(invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd); +} +void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd); +} +void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet); +} +void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet); +} +void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet); +} + class VarHandleSlowPathARMVIXL : public IntrinsicSlowPathARMVIXL { public: VarHandleSlowPathARMVIXL(HInvoke* invoke, std::memory_order order) @@ -4335,23 +4543,23 @@ static void GenerateVarHandleTarget(HInvoke* invoke, __ Mov(target.offset, target_field->GetOffset().Uint32Value()); } else { // For static fields, we need to fill the `target.object` with the declaring class, - // so we can use `target.object` as temporary for the `ArtMethod*`. For instance fields, - // we do not need the declaring class, so we can forget the `ArtMethod*` when - // we load the `target.offset`, so use the `target.offset` to hold the `ArtMethod*`. - vixl32::Register method = (expected_coordinates_count == 0) ? target.object : target.offset; + // so we can use `target.object` as temporary for the `ArtField*`. For instance fields, + // we do not need the declaring class, so we can forget the `ArtField*` when + // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`. + vixl32::Register field = (expected_coordinates_count == 0) ? target.object : target.offset; const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset(); const MemberOffset offset_offset = ArtField::OffsetOffset(); - // Load the ArtField, the offset and, if needed, declaring class. - __ Ldr(method, MemOperand(varhandle, art_field_offset.Int32Value())); - __ Ldr(target.offset, MemOperand(method, offset_offset.Int32Value())); + // Load the ArtField*, the offset and, if needed, declaring class. + __ Ldr(field, MemOperand(varhandle, art_field_offset.Int32Value())); + __ Ldr(target.offset, MemOperand(field, offset_offset.Int32Value())); if (expected_coordinates_count == 0u) { codegen->GenerateGcRootFieldLoad(invoke, LocationFrom(target.object), - method, + field, ArtField::DeclaringClassOffset().Int32Value(), - gCompilerReadBarrierOption); + codegen->GetCompilerReadBarrierOption()); } } } else { @@ -4371,7 +4579,8 @@ static void GenerateVarHandleTarget(HInvoke* invoke, } } -static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) { +static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke, + CodeGeneratorARMVIXL* codegen) { size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke); DataType::Type return_type = invoke->GetType(); @@ -4403,7 +4612,7 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) { } // Add a temporary for offset. - if ((gUseReadBarrier && !kUseBakerReadBarrier) && + if (codegen->EmitNonBakerReadBarrier() && GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields. // To preserve the offset value across the non-Baker read barrier slow path // for loading the declaring class, use a fixed callee-save register. @@ -4428,7 +4637,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke, return; } - if ((gUseReadBarrier && !kUseBakerReadBarrier) && + if (codegen->EmitNonBakerReadBarrier() && invoke->GetType() == DataType::Type::kReference && invoke->GetIntrinsic() != Intrinsics::kVarHandleGet && invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) { @@ -4438,7 +4647,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke, return; } - LocationSummary* locations = CreateVarHandleCommonLocations(invoke); + LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen); DataType::Type type = invoke->GetType(); if (type == DataType::Type::kFloat64 && Use64BitExclusiveLoadStore(atomic, codegen)) { @@ -4476,7 +4685,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, Location maybe_temp = Location::NoLocation(); Location maybe_temp2 = Location::NoLocation(); Location maybe_temp3 = Location::NoLocation(); - if (gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) { + if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) { // Reuse the offset temporary. maybe_temp = LocationFrom(target.offset); } else if (DataType::Is64BitType(type) && Use64BitExclusiveLoadStore(atomic, codegen)) { @@ -4580,7 +4789,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke, return; } - LocationSummary* locations = CreateVarHandleCommonLocations(invoke); + LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen); uint32_t number_of_arguments = invoke->GetNumberOfArguments(); DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u); @@ -4741,7 +4950,9 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleSetVolatile(HInvoke* invoke) { GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst, /*atomic=*/ true); } -static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, bool return_success) { +static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, + CodeGeneratorARMVIXL* codegen, + bool return_success) { VarHandleOptimizations optimizations(invoke); if (optimizations.GetDoNotIntrinsify()) { return; @@ -4749,8 +4960,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo uint32_t number_of_arguments = invoke->GetNumberOfArguments(); DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u); - if ((gUseReadBarrier && !kUseBakerReadBarrier) && - value_type == DataType::Type::kReference) { + if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) { // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores // the passed reference and reloads it from the field. This breaks the read barriers // in slow path in different ways. The marked old value may not actually be a to-space @@ -4761,9 +4971,9 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo return; } - LocationSummary* locations = CreateVarHandleCommonLocations(invoke); + LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen); - if (gUseReadBarrier && !kUseBakerReadBarrier) { + if (codegen->EmitNonBakerReadBarrier()) { // We need callee-save registers for both the class object and offset instead of // the temporaries reserved in CreateVarHandleCommonLocations(). static_assert(POPCOUNT(kArmCalleeSaveRefSpills) >= 2u); @@ -4799,7 +5009,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo locations->AddRegisterTemps(2u); } } - if (gUseReadBarrier && value_type == DataType::Type::kReference) { + if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) { // Add a temporary for store result, also used for the `old_value_temp` in slow path. locations->AddTemp(Location::RequiresRegister()); } @@ -4930,7 +5140,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, vixl32::Label* exit_loop = &exit_loop_label; vixl32::Label* cmp_failure = &exit_loop_label; - if (gUseReadBarrier && value_type == DataType::Type::kReference) { + if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) { // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked // reloaded old value for subsequent CAS in the slow path. This must not clobber `old_value`. vixl32::Register old_value_temp = return_success ? RegisterFrom(out) : store_result; @@ -5008,7 +5218,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* invoke) { @@ -5017,7 +5227,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchange(HInvoke* in } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) { @@ -5026,7 +5236,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeAcquire(HInv } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ false); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) { @@ -5035,7 +5245,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndExchangeRelease(HInv } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) { @@ -5044,7 +5254,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleCompareAndSet(HInvoke* invoke) } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) { @@ -5053,7 +5263,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSet(HInvoke* inv } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) { @@ -5062,7 +5272,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetAcquire(HInvo } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) { @@ -5071,7 +5281,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetPlain(HInvoke } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke, /*return_success=*/ true); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) { @@ -5080,21 +5290,21 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleWeakCompareAndSetRelease(HInvo } static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, + CodeGeneratorARMVIXL* codegen, GetAndUpdateOp get_and_update_op) { VarHandleOptimizations optimizations(invoke); if (optimizations.GetDoNotIntrinsify()) { return; } - if ((gUseReadBarrier && !kUseBakerReadBarrier) && - invoke->GetType() == DataType::Type::kReference) { + if (invoke->GetType() == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) { // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores // the passed reference and reloads it from the field, thus seeing the new value // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084 return; } - LocationSummary* locations = CreateVarHandleCommonLocations(invoke); + LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen); // We can reuse the declaring class (if present) and offset temporary, except for // non-Baker read barriers that need them for the slow path. @@ -5107,8 +5317,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, // Add temps needed to do the GenerateGetAndUpdate() with core registers. size_t temps_needed = (value_type == DataType::Type::kFloat64) ? 5u : 3u; locations->AddRegisterTemps(temps_needed - locations->GetTempCount()); - } else if ((gUseReadBarrier && !kUseBakerReadBarrier) && - value_type == DataType::Type::kReference) { + } else if (value_type == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) { // We need to preserve the declaring class (if present) and offset for read barrier // slow paths, so we must use a separate temporary for the exclusive store result. locations->AddTemp(Location::RequiresRegister()); @@ -5213,7 +5422,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, if (byte_swap) { GenerateReverseBytes(assembler, DataType::Type::kInt32, arg, arg); } - } else if (gUseReadBarrier && value_type == DataType::Type::kReference) { + } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) { if (kUseBakerReadBarrier) { // Load the old value initially to a temporary register. // We shall move it to `out` later with a read barrier. @@ -5296,10 +5505,10 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, } else { __ Vmov(SRegisterFrom(out), RegisterFrom(old_value)); } - } else if (gUseReadBarrier && value_type == DataType::Type::kReference) { + } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) { if (kUseBakerReadBarrier) { - codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(RegisterFrom(out), - RegisterFrom(old_value)); + codegen->GenerateIntrinsicMoveWithBakerReadBarrier(RegisterFrom(out), + RegisterFrom(old_value)); } else { codegen->GenerateReadBarrierSlow( invoke, @@ -5327,7 +5536,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) { @@ -5335,7 +5544,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSet(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) { @@ -5343,7 +5552,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetAcquire(HInvoke* invo } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kSet); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invoke) { @@ -5351,7 +5560,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndSetRelease(HInvoke* invo } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) { @@ -5359,7 +5568,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAdd(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) { @@ -5367,7 +5576,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddAcquire(HInvoke* invo } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAdd); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invoke) { @@ -5375,7 +5584,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndAddRelease(HInvoke* invo } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) { @@ -5383,7 +5592,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAnd(HInvoke* invo } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) { @@ -5391,7 +5600,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndAcquire(HInvok } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) { @@ -5399,7 +5608,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseAndRelease(HInvok } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) { @@ -5407,7 +5616,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOr(HInvoke* invok } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) { @@ -5415,7 +5624,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) { @@ -5423,7 +5632,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseOrRelease(HInvoke } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) { @@ -5431,7 +5640,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXor(HInvoke* invo } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) { @@ -5439,7 +5648,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorAcquire(HInvok } void IntrinsicLocationsBuilderARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) { - CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor); + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor); } void IntrinsicCodeGeneratorARMVIXL::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h index 54475bcc7e..fd86866d4e 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.h +++ b/compiler/optimizing/intrinsics_arm_vixl.h @@ -19,6 +19,7 @@ #include "base/macros.h" #include "intrinsics.h" +#include "intrinsics_list.h" #include "utils/arm/assembler_arm_vixl.h" namespace art HIDDEN { @@ -36,9 +37,7 @@ class IntrinsicLocationsBuilderARMVIXL final : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether @@ -63,15 +62,17 @@ class IntrinsicCodeGeneratorARMVIXL final : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS private: ArenaAllocator* GetAllocator(); ArmVIXLAssembler* GetAssembler(); + void HandleValueOf(HInvoke* invoke, + const IntrinsicVisitor::ValueOfInfo& info, + DataType::Type type); + CodeGeneratorARMVIXL* const codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorARMVIXL); diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc new file mode 100644 index 0000000000..7f99f91374 --- /dev/null +++ b/compiler/optimizing/intrinsics_riscv64.cc @@ -0,0 +1,4584 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsics_riscv64.h" + +#include "code_generator_riscv64.h" +#include "intrinsic_objects.h" +#include "intrinsics_utils.h" +#include "well_known_classes.h" + +namespace art HIDDEN { +namespace riscv64 { + +using IntrinsicSlowPathRISCV64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorRISCV64, + SlowPathCodeRISCV64, + Riscv64Assembler>; + +#define __ assembler-> + +// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. +class ReadBarrierSystemArrayCopySlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + ReadBarrierSystemArrayCopySlowPathRISCV64(HInstruction* instruction, Location tmp) + : SlowPathCodeRISCV64(instruction), tmp_(tmp) {} + + void EmitNativeCode(CodeGenerator* codegen_in) override { + DCHECK(codegen_in->EmitBakerReadBarrier()); + CodeGeneratorRISCV64* codegen = down_cast<CodeGeneratorRISCV64*>(codegen_in); + Riscv64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(locations->CanCall()); + DCHECK(instruction_->IsInvokeStaticOrDirect()) + << "Unexpected instruction in read barrier arraycopy slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); + + const int32_t element_size = DataType::Size(DataType::Type::kReference); + + XRegister src_curr_addr = locations->GetTemp(0).AsRegister<XRegister>(); + XRegister dst_curr_addr = locations->GetTemp(1).AsRegister<XRegister>(); + XRegister src_stop_addr = locations->GetTemp(2).AsRegister<XRegister>(); + XRegister tmp_reg = tmp_.AsRegister<XRegister>(); + + __ Bind(GetEntryLabel()); + Riscv64Label slow_copy_loop; + __ Bind(&slow_copy_loop); + __ Loadwu(tmp_reg, src_curr_addr, 0); + codegen->MaybeUnpoisonHeapReference(tmp_reg); + // TODO: Inline the mark bit check before calling the runtime? + // tmp_reg = ReadBarrier::Mark(tmp_reg); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + // (See ReadBarrierMarkSlowPathRISCV64::EmitNativeCode for more + // explanations.) + int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(tmp_); + // This runtime call does not require a stack map. + codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + codegen->MaybePoisonHeapReference(tmp_reg); + __ Storew(tmp_reg, dst_curr_addr, 0); + __ Addi(src_curr_addr, src_curr_addr, element_size); + __ Addi(dst_curr_addr, dst_curr_addr, element_size); + __ Bne(src_curr_addr, src_stop_addr, &slow_copy_loop); + __ J(GetExitLabel()); + } + + const char* GetDescription() const override { + return "ReadBarrierSystemArrayCopySlowPathRISCV64"; + } + + private: + Location tmp_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathRISCV64); +}; + +bool IntrinsicLocationsBuilderRISCV64::TryDispatch(HInvoke* invoke) { + Dispatch(invoke); + LocationSummary* res = invoke->GetLocations(); + if (res == nullptr) { + return false; + } + return res->Intrinsified(); +} + +Riscv64Assembler* IntrinsicCodeGeneratorRISCV64::GetAssembler() { + return codegen_->GetAssembler(); +} + +static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { + DCHECK_EQ(invoke->GetNumberOfArguments(), 1U); + DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType())); + DCHECK(DataType::IsFloatingPointType(invoke->GetType())); + + LocationSummary* const locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + + locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType())); +} + +static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { + DCHECK_EQ(invoke->GetNumberOfArguments(), 2U); + DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType())); + DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType())); + DCHECK(DataType::IsFloatingPointType(invoke->GetType())); + + LocationSummary* const locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + + locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); + locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType())); +} + +static void CreateFpFpFpToFpNoOverlapLocations(ArenaAllocator* allocator, HInvoke* invoke) { + DCHECK_EQ(invoke->GetNumberOfArguments(), 3U); + DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType())); + DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType())); + DCHECK(DataType::IsFloatingPointType(invoke->InputAt(2)->GetType())); + DCHECK(DataType::IsFloatingPointType(invoke->GetType())); + + LocationSummary* const locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); +} + +static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = GetAssembler(); + __ FMvXD(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = GetAssembler(); + __ FMvDX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = GetAssembler(); + __ FMvXW(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = GetAssembler(); + __ FMvWX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) { + CreateFPToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = GetAssembler(); + XRegister out = locations->Out().AsRegister<XRegister>(); + __ FClassD(out, locations->InAt(0).AsFpuRegister<FRegister>()); + __ Andi(out, out, kPositiveInfinity | kNegativeInfinity); + __ Snez(out, out); +} + +void IntrinsicLocationsBuilderRISCV64::VisitFloatIsInfinite(HInvoke* invoke) { + CreateFPToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitFloatIsInfinite(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = GetAssembler(); + XRegister out = locations->Out().AsRegister<XRegister>(); + __ FClassS(out, locations->InAt(0).AsFpuRegister<FRegister>()); + __ Andi(out, out, kPositiveInfinity | kNegativeInfinity); + __ Snez(out, out); +} + +static void CreateIntToIntNoOverlapLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +template <typename EmitOp> +void EmitMemoryPeek(HInvoke* invoke, EmitOp&& emit_op) { + LocationSummary* locations = invoke->GetLocations(); + emit_op(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekByte(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekByte(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lb(rd, rs1, 0); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lw(rd, rs1, 0); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Ld(rd, rs1, 0); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekShortNative(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekShortNative(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lh(rd, rs1, 0); }); +} + +static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); +} + +static void CreateIntIntToIntSlowPathCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Force kOutputOverlap; see comments in IntrinsicSlowPath::EmitNativeCode. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +template <typename EmitOp> +void EmitMemoryPoke(HInvoke* invoke, EmitOp&& emit_op) { + LocationSummary* locations = invoke->GetLocations(); + emit_op(locations->InAt(1).AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeByte(HInvoke* invoke) { + CreateIntIntToVoidLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeByte(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sb(rs2, rs1, 0); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sw(rs2, rs1, 0); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sd(rs2, rs1, 0); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeShortNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeShortNative(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sh(rs2, rs1, 0); }); +} + +static void GenerateReverseBytes(CodeGeneratorRISCV64* codegen, + Location rd, + XRegister rs1, + DataType::Type type) { + Riscv64Assembler* assembler = codegen->GetAssembler(); + switch (type) { + case DataType::Type::kUint16: + // There is no 16-bit reverse bytes instruction. + __ Rev8(rd.AsRegister<XRegister>(), rs1); + __ Srli(rd.AsRegister<XRegister>(), rd.AsRegister<XRegister>(), 48); + break; + case DataType::Type::kInt16: + // There is no 16-bit reverse bytes instruction. + __ Rev8(rd.AsRegister<XRegister>(), rs1); + __ Srai(rd.AsRegister<XRegister>(), rd.AsRegister<XRegister>(), 48); + break; + case DataType::Type::kInt32: + // There is no 32-bit reverse bytes instruction. + __ Rev8(rd.AsRegister<XRegister>(), rs1); + __ Srai(rd.AsRegister<XRegister>(), rd.AsRegister<XRegister>(), 32); + break; + case DataType::Type::kInt64: + __ Rev8(rd.AsRegister<XRegister>(), rs1); + break; + case DataType::Type::kFloat32: + // There is no 32-bit reverse bytes instruction. + __ Rev8(rs1, rs1); // Note: Clobbers `rs1`. + __ Srai(rs1, rs1, 32); + __ FMvWX(rd.AsFpuRegister<FRegister>(), rs1); + break; + case DataType::Type::kFloat64: + __ Rev8(rs1, rs1); // Note: Clobbers `rs1`. + __ FMvDX(rd.AsFpuRegister<FRegister>(), rs1); + break; + default: + LOG(FATAL) << "Unexpected type: " << type; + UNREACHABLE(); + } +} + +static void GenerateReverseBytes(CodeGeneratorRISCV64* codegen, + HInvoke* invoke, + DataType::Type type) { + DCHECK_EQ(type, invoke->GetType()); + LocationSummary* locations = invoke->GetLocations(); + GenerateReverseBytes(codegen, locations->Out(), locations->InAt(0).AsRegister<XRegister>(), type); +} + +static void GenerateReverse(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) { + DCHECK_EQ(type, invoke->GetType()); + Riscv64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + XRegister in = locations->InAt(0).AsRegister<XRegister>(); + XRegister out = locations->Out().AsRegister<XRegister>(); + ScratchRegisterScope srs(assembler); + XRegister temp1 = srs.AllocateXRegister(); + XRegister temp2 = srs.AllocateXRegister(); + + auto maybe_extend_mask = [type, assembler](XRegister mask, XRegister temp) { + if (type == DataType::Type::kInt64) { + __ Slli(temp, mask, 32); + __ Add(mask, mask, temp); + } + }; + + // Swap bits in bit pairs. + __ Li(temp1, 0x55555555); + maybe_extend_mask(temp1, temp2); + __ Srli(temp2, in, 1); + __ And(out, in, temp1); + __ And(temp2, temp2, temp1); + __ Sh1Add(out, out, temp2); + + // Swap bit pairs in 4-bit groups. + __ Li(temp1, 0x33333333); + maybe_extend_mask(temp1, temp2); + __ Srli(temp2, out, 2); + __ And(out, out, temp1); + __ And(temp2, temp2, temp1); + __ Sh2Add(out, out, temp2); + + // Swap 4-bit groups in 8-bit groups. + __ Li(temp1, 0x0f0f0f0f); + maybe_extend_mask(temp1, temp2); + __ Srli(temp2, out, 4); + __ And(out, out, temp1); + __ And(temp2, temp2, temp1); + __ Slli(out, out, 4); + __ Add(out, out, temp2); + + GenerateReverseBytes(codegen, Location::RegisterLocation(out), out, type); +} + +void IntrinsicLocationsBuilderRISCV64::VisitIntegerReverse(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitIntegerReverse(HInvoke* invoke) { + GenerateReverse(codegen_, invoke, DataType::Type::kInt32); +} + +void IntrinsicLocationsBuilderRISCV64::VisitLongReverse(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitLongReverse(HInvoke* invoke) { + GenerateReverse(codegen_, invoke, DataType::Type::kInt64); +} + +void IntrinsicLocationsBuilderRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) { + GenerateReverseBytes(codegen_, invoke, DataType::Type::kInt32); +} + +void IntrinsicLocationsBuilderRISCV64::VisitLongReverseBytes(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitLongReverseBytes(HInvoke* invoke) { + GenerateReverseBytes(codegen_, invoke, DataType::Type::kInt64); +} + +void IntrinsicLocationsBuilderRISCV64::VisitShortReverseBytes(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitShortReverseBytes(HInvoke* invoke) { + GenerateReverseBytes(codegen_, invoke, DataType::Type::kInt16); +} + +template <typename EmitOp> +void EmitIntegralUnOp(HInvoke* invoke, EmitOp&& emit_op) { + LocationSummary* locations = invoke->GetLocations(); + emit_op(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitIntegerBitCount(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitIntegerBitCount(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpopw(rd, rs1); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitLongBitCount(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitLongBitCount(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpop(rd, rs1); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { + ScratchRegisterScope srs(assembler); + XRegister tmp = srs.AllocateXRegister(); + XRegister tmp2 = srs.AllocateXRegister(); + __ Clzw(tmp, rs1); + __ Li(tmp2, INT64_C(-0x80000000)); + __ Srlw(tmp2, tmp2, tmp); + __ And(rd, rs1, tmp2); // Make sure the result is zero if the input is zero. + }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitLongHighestOneBit(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitLongHighestOneBit(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { + ScratchRegisterScope srs(assembler); + XRegister tmp = srs.AllocateXRegister(); + XRegister tmp2 = srs.AllocateXRegister(); + __ Clz(tmp, rs1); + __ Li(tmp2, INT64_C(-0x8000000000000000)); + __ Srl(tmp2, tmp2, tmp); + __ And(rd, rs1, tmp2); // Make sure the result is zero if the input is zero. + }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { + ScratchRegisterScope srs(assembler); + XRegister tmp = srs.AllocateXRegister(); + __ NegW(tmp, rs1); + __ And(rd, rs1, tmp); + }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitLongLowestOneBit(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitLongLowestOneBit(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { + ScratchRegisterScope srs(assembler); + XRegister tmp = srs.AllocateXRegister(); + __ Neg(tmp, rs1); + __ And(rd, rs1, tmp); + }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clzw(rd, rs1); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clz(rd, rs1); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctzw(rd, rs1); }); +} + +void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { + CreateIntToIntNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctz(rd, rs1); }); +} + +static void GenerateDivideUnsigned(HInvoke* invoke, CodeGeneratorRISCV64* codegen) { + LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = codegen->GetAssembler(); + DataType::Type type = invoke->GetType(); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); + + XRegister dividend = locations->InAt(0).AsRegister<XRegister>(); + XRegister divisor = locations->InAt(1).AsRegister<XRegister>(); + XRegister out = locations->Out().AsRegister<XRegister>(); + + // Check if divisor is zero, bail to managed implementation to handle. + SlowPathCodeRISCV64* slow_path = + new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke); + codegen->AddSlowPath(slow_path); + __ Beqz(divisor, slow_path->GetEntryLabel()); + + if (type == DataType::Type::kInt32) { + __ Divuw(out, dividend, divisor); + } else { + __ Divu(out, dividend, divisor); + } + + __ Bind(slow_path->GetExitLabel()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitIntegerDivideUnsigned(HInvoke* invoke) { + CreateIntIntToIntSlowPathCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitIntegerDivideUnsigned(HInvoke* invoke) { + GenerateDivideUnsigned(invoke, codegen_); +} + +void IntrinsicLocationsBuilderRISCV64::VisitLongDivideUnsigned(HInvoke* invoke) { + CreateIntIntToIntSlowPathCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitLongDivideUnsigned(HInvoke* invoke) { + GenerateDivideUnsigned(invoke, codegen_); +} + +#define VISIT_INTRINSIC(name, low, high, type, start_index) \ + void IntrinsicLocationsBuilderRISCV64::Visit ##name ##ValueOf(HInvoke* invoke) { \ + InvokeRuntimeCallingConvention calling_convention; \ + IntrinsicVisitor::ComputeValueOfLocations( \ + invoke, \ + codegen_, \ + low, \ + high - low + 1, \ + calling_convention.GetReturnLocation(DataType::Type::kReference), \ + Location::RegisterLocation(calling_convention.GetRegisterAt(0))); \ + } \ + void IntrinsicCodeGeneratorRISCV64::Visit ##name ##ValueOf(HInvoke* invoke) { \ + IntrinsicVisitor::ValueOfInfo info = \ + IntrinsicVisitor::ComputeValueOfInfo( \ + invoke, \ + codegen_->GetCompilerOptions(), \ + WellKnownClasses::java_lang_ ##name ##_value, \ + low, \ + high - low + 1, \ + start_index); \ + HandleValueOf(invoke, info, type); \ + } + BOXED_TYPES(VISIT_INTRINSIC) +#undef VISIT_INTRINSIC + +void IntrinsicCodeGeneratorRISCV64::HandleValueOf(HInvoke* invoke, + const IntrinsicVisitor::ValueOfInfo& info, + DataType::Type type) { + Riscv64Assembler* assembler = codegen_->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + XRegister out = locations->Out().AsRegister<XRegister>(); + ScratchRegisterScope srs(assembler); + XRegister temp = srs.AllocateXRegister(); + auto allocate_instance = [&]() { + DCHECK_EQ(out, InvokeRuntimeCallingConvention().GetRegisterAt(0)); + codegen_->LoadIntrinsicDeclaringClass(out, invoke); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + }; + if (invoke->InputAt(0)->IsIntConstant()) { + int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + if (static_cast<uint32_t>(value - info.low) < info.length) { + // Just embed the object in the code. + DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); + } else { + DCHECK(locations->CanCall()); + // Allocate and initialize a new object. + // TODO: If we JIT, we could allocate the object now, and store it in the + // JIT object table. + allocate_instance(); + __ Li(temp, value); + codegen_->GetInstructionVisitor()->Store( + Location::RegisterLocation(temp), out, info.value_offset, type); + // Class pointer and `value` final field stores require a barrier before publication. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); + } + } else { + DCHECK(locations->CanCall()); + XRegister in = locations->InAt(0).AsRegister<XRegister>(); + Riscv64Label allocate, done; + // Check bounds of our cache. + __ AddConst32(out, in, -info.low); + __ Li(temp, info.length); + __ Bgeu(out, temp, &allocate); + // If the value is within the bounds, load the object directly from the array. + codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference); + __ Sh2Add(temp, out, temp); + __ Loadwu(out, temp, 0); + codegen_->MaybeUnpoisonHeapReference(out); + __ J(&done); + __ Bind(&allocate); + // Otherwise allocate and initialize a new object. + allocate_instance(); + codegen_->GetInstructionVisitor()->Store( + Location::RegisterLocation(in), out, info.value_offset, type); + // Class pointer and `value` final field stores require a barrier before publication. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); + __ Bind(&done); + } +} + +void IntrinsicLocationsBuilderRISCV64::VisitReferenceGetReferent(HInvoke* invoke) { + IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_); + + if (codegen_->EmitBakerReadBarrier() && invoke->GetLocations() != nullptr) { + invoke->GetLocations()->AddTemp(Location::RequiresRegister()); + } +} + +void IntrinsicCodeGeneratorRISCV64::VisitReferenceGetReferent(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + Location obj = locations->InAt(0); + Location out = locations->Out(); + + SlowPathCodeRISCV64* slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke); + codegen_->AddSlowPath(slow_path); + + if (codegen_->EmitReadBarrier()) { + // Check self->GetWeakRefAccessEnabled(). + ScratchRegisterScope srs(assembler); + XRegister temp = srs.AllocateXRegister(); + __ Loadwu(temp, TR, Thread::WeakRefAccessEnabledOffset<kRiscv64PointerSize>().Int32Value()); + static_assert(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled) == 0); + __ Bnez(temp, slow_path->GetEntryLabel()); + } + + { + // Load the java.lang.ref.Reference class. + ScratchRegisterScope srs(assembler); + XRegister temp = srs.AllocateXRegister(); + codegen_->LoadIntrinsicDeclaringClass(temp, invoke); + + // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together. + MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset(); + DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u); + DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u, + IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value()); + __ Loadhu(temp, temp, disable_intrinsic_offset.Int32Value()); + __ Bnez(temp, slow_path->GetEntryLabel()); + } + + // Load the value from the field. + uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value(); + if (codegen_->EmitBakerReadBarrier()) { + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + out, + obj.AsRegister<XRegister>(), + referent_offset, + /*maybe_temp=*/ locations->GetTemp(0), + /*needs_null_check=*/ false); + } else { + codegen_->GetInstructionVisitor()->Load( + out, obj.AsRegister<XRegister>(), referent_offset, DataType::Type::kReference); + codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset); + } + // Emit memory barrier for load-acquire. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + __ Bind(slow_path->GetExitLabel()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitReferenceRefersTo(HInvoke* invoke) { + IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitReferenceRefersTo(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + XRegister obj = locations->InAt(0).AsRegister<XRegister>(); + XRegister other = locations->InAt(1).AsRegister<XRegister>(); + XRegister out = locations->Out().AsRegister<XRegister>(); + + uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + codegen_->GetInstructionVisitor()->Load( + Location::RegisterLocation(out), obj, referent_offset, DataType::Type::kReference); + codegen_->MaybeRecordImplicitNullCheck(invoke); + codegen_->MaybeUnpoisonHeapReference(out); + + // Emit memory barrier for load-acquire. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + + if (codegen_->EmitReadBarrier()) { + DCHECK(kUseBakerReadBarrier); + + Riscv64Label calculate_result; + + // If equal to `other`, the loaded reference is final (it cannot be a from-space reference). + __ Beq(out, other, &calculate_result); + + // If the GC is not marking, the loaded reference is final. + ScratchRegisterScope srs(assembler); + XRegister tmp = srs.AllocateXRegister(); + __ Loadwu(tmp, TR, Thread::IsGcMarkingOffset<kRiscv64PointerSize>().Int32Value()); + __ Beqz(tmp, &calculate_result); + + // Check if the loaded reference is null. + __ Beqz(out, &calculate_result); + + // For correct memory visibility, we need a barrier before loading the lock word to + // synchronize with the publishing of `other` by the CC GC. However, as long as the + // load-acquire above is implemented as a plain load followed by a barrier (rather + // than an atomic load-acquire instruction which synchronizes only with other + // instructions on the same memory location), that barrier is sufficient. + + // Load the lockword and check if it is a forwarding address. + static_assert(LockWord::kStateShift == 30u); + static_assert(LockWord::kStateForwardingAddress == 3u); + // Load the lock word sign-extended. Comparing it to the sign-extended forwarding + // address bits as unsigned is the same as comparing both zero-extended. + __ Loadw(tmp, out, monitor_offset); + // Materialize sign-extended forwarding address bits. This is a single LUI instruction. + XRegister tmp2 = srs.AllocateXRegister(); + __ Li(tmp2, INT64_C(-1) & ~static_cast<int64_t>((1 << LockWord::kStateShift) - 1)); + // If we do not have a forwarding address, the loaded reference cannot be the same as `other`, + // so we proceed to calculate the result with `out != other`. + __ Bltu(tmp, tmp2, &calculate_result); + + // Extract the forwarding address for comparison with `other`. + // Note that the high 32 bits shall not be used for the result calculation. + __ Slliw(out, tmp, LockWord::kForwardingAddressShift); + + __ Bind(&calculate_result); + } + + // Calculate the result `out == other`. + __ Subw(out, out, other); + __ Seqz(out, out); +} + +static void GenerateVisitStringIndexOf(HInvoke* invoke, + Riscv64Assembler* assembler, + CodeGeneratorRISCV64* codegen, + bool start_at_zero) { + LocationSummary* locations = invoke->GetLocations(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, + // or directly dispatch for a large constant, or omit slow-path for a small constant or a char. + SlowPathCodeRISCV64* slow_path = nullptr; + HInstruction* code_point = invoke->InputAt(1); + if (code_point->IsIntConstant()) { + if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) { + // Always needs the slow-path. We could directly dispatch to it, but this case should be + // rare, so for simplicity just put the full slow-path down and branch unconditionally. + slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke); + codegen->AddSlowPath(slow_path); + __ J(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + } else if (code_point->GetType() != DataType::Type::kUint16) { + slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke); + codegen->AddSlowPath(slow_path); + ScratchRegisterScope srs(assembler); + XRegister tmp = srs.AllocateXRegister(); + __ Srliw(tmp, locations->InAt(1).AsRegister<XRegister>(), 16); + __ Bnez(tmp, slow_path->GetEntryLabel()); + } + + if (start_at_zero) { + // Start-index = 0. + XRegister tmp_reg = locations->GetTemp(0).AsRegister<XRegister>(); + __ Li(tmp_reg, 0); + } + + codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path); + CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>(); + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderRISCV64::VisitStringIndexOf(HInvoke* invoke) { + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32)); + + // Need to send start_index=0. + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); +} + +void IntrinsicCodeGeneratorRISCV64::VisitStringIndexOf(HInvoke* invoke) { + GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true); +} + +void IntrinsicLocationsBuilderRISCV64::VisitStringIndexOfAfter(HInvoke* invoke) { + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32)); +} + +void IntrinsicCodeGeneratorRISCV64::VisitStringIndexOfAfter(HInvoke* invoke) { + GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false); +} + +static void GenerateSet(CodeGeneratorRISCV64* codegen, + std::memory_order order, + Location value, + XRegister rs1, + int32_t offset, + DataType::Type type) { + if (order == std::memory_order_seq_cst) { + codegen->GetInstructionVisitor()->StoreSeqCst(value, rs1, offset, type); + } else { + if (order == std::memory_order_release) { + codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + } else { + DCHECK(order == std::memory_order_relaxed); + } + codegen->GetInstructionVisitor()->Store(value, rs1, offset, type); + } +} + +std::pair<AqRl, AqRl> GetLrScAqRl(std::memory_order order) { + AqRl load_aqrl = AqRl::kNone; + AqRl store_aqrl = AqRl::kNone; + if (order == std::memory_order_acquire) { + load_aqrl = AqRl::kAcquire; + } else if (order == std::memory_order_release) { + store_aqrl = AqRl::kRelease; + } else if (order == std::memory_order_seq_cst) { + load_aqrl = AqRl::kAqRl; + store_aqrl = AqRl::kRelease; + } else { + DCHECK(order == std::memory_order_relaxed); + } + return {load_aqrl, store_aqrl}; +} + +AqRl GetAmoAqRl(std::memory_order order) { + AqRl amo_aqrl = AqRl::kNone; + if (order == std::memory_order_acquire) { + amo_aqrl = AqRl::kAcquire; + } else if (order == std::memory_order_release) { + amo_aqrl = AqRl::kRelease; + } else { + DCHECK(order == std::memory_order_seq_cst); + amo_aqrl = AqRl::kAqRl; + } + return amo_aqrl; +} + +static void EmitLoadReserved(Riscv64Assembler* assembler, + DataType::Type type, + XRegister ptr, + XRegister old_value, + AqRl aqrl) { + switch (type) { + case DataType::Type::kInt32: + __ LrW(old_value, ptr, aqrl); + break; + case DataType::Type::kReference: + __ LrW(old_value, ptr, aqrl); + // TODO(riscv64): The `ZextW()` macro currently emits `SLLI+SRLI` which are from the + // base "I" instruction set. When the assembler is updated to use a single-instruction + // `ZextW()` macro, either the ADD.UW, or the C.ZEXT.W (16-bit encoding), we need to + // rewrite this to avoid these non-"I" instructions. We could, for example, sign-extend + // the reference and do the CAS as `Int32`. + __ ZextW(old_value, old_value); + break; + case DataType::Type::kInt64: + __ LrD(old_value, ptr, aqrl); + break; + default: + LOG(FATAL) << "Unexpected type: " << type; + UNREACHABLE(); + } +} + +static void EmitStoreConditional(Riscv64Assembler* assembler, + DataType::Type type, + XRegister ptr, + XRegister store_result, + XRegister to_store, + AqRl aqrl) { + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kReference: + __ ScW(store_result, to_store, ptr, aqrl); + break; + case DataType::Type::kInt64: + __ ScD(store_result, to_store, ptr, aqrl); + break; + default: + LOG(FATAL) << "Unexpected type: " << type; + UNREACHABLE(); + } +} + +static void GenerateCompareAndSet(Riscv64Assembler* assembler, + DataType::Type type, + std::memory_order order, + bool strong, + Riscv64Label* cmp_failure, + XRegister ptr, + XRegister new_value, + XRegister old_value, + XRegister mask, + XRegister masked, + XRegister store_result, + XRegister expected, + XRegister expected2 = kNoXRegister) { + DCHECK(!DataType::IsFloatingPointType(type)); + DCHECK_GE(DataType::Size(type), 4u); + + // The `expected2` is valid only for reference slow path and represents the unmarked old value + // from the main path attempt to emit CAS when the marked old value matched `expected`. + DCHECK_IMPLIES(expected2 != kNoXRegister, type == DataType::Type::kReference); + + auto [load_aqrl, store_aqrl] = GetLrScAqRl(order); + + // repeat: { + // old_value = [ptr]; // Load exclusive. + // cmp_value = old_value & mask; // Extract relevant bits if applicable. + // if (cmp_value != expected && cmp_value != expected2) goto cmp_failure; + // store_result = failed([ptr] <- new_value); // Store exclusive. + // } + // if (strong) { + // if (store_result) goto repeat; // Repeat until compare fails or store exclusive succeeds. + // } else { + // store_result = store_result ^ 1; // Report success as 1, failure as 0. + // } + // + // (If `mask` is not valid, `expected` is compared with `old_value` instead of `cmp_value`.) + // (If `expected2` is not valid, the `cmp_value == expected2` part is not emitted.) + + // Note: We're using "bare" local branches to enforce that they shall not be expanded + // and the scrach register `TMP` shall not be clobbered if taken. Taking the branch to + // `cmp_failure` can theoretically clobber `TMP` (if outside the 1 MiB range). + Riscv64Label loop; + if (strong) { + __ Bind(&loop); + } + EmitLoadReserved(assembler, type, ptr, old_value, load_aqrl); + XRegister to_store = new_value; + if (mask != kNoXRegister) { + DCHECK_EQ(expected2, kNoXRegister); + DCHECK_NE(masked, kNoXRegister); + __ And(masked, old_value, mask); + __ Bne(masked, expected, cmp_failure); + // The `old_value` does not need to be preserved as the caller shall use `masked` + // to return the old value if needed. + to_store = old_value; + // TODO(riscv64): We could XOR the old and new value before the loop and use a single XOR here + // instead of the XOR+OR. (The `new_value` is either Zero or a temporary we can clobber.) + __ Xor(to_store, old_value, masked); + __ Or(to_store, to_store, new_value); + } else if (expected2 != kNoXRegister) { + Riscv64Label match2; + __ Beq(old_value, expected2, &match2, /*is_bare=*/ true); + __ Bne(old_value, expected, cmp_failure); + __ Bind(&match2); + } else { + __ Bne(old_value, expected, cmp_failure); + } + EmitStoreConditional(assembler, type, ptr, store_result, to_store, store_aqrl); + if (strong) { + __ Bnez(store_result, &loop, /*is_bare=*/ true); + } else { + // Flip the `store_result` register to indicate success by 1 and failure by 0. + __ Xori(store_result, store_result, 1); + } +} + +class ReadBarrierCasSlowPathRISCV64 : public SlowPathCodeRISCV64 { + public: + ReadBarrierCasSlowPathRISCV64(HInvoke* invoke, + std::memory_order order, + bool strong, + XRegister base, + XRegister offset, + XRegister expected, + XRegister new_value, + XRegister old_value, + XRegister old_value_temp, + XRegister store_result, + bool update_old_value, + CodeGeneratorRISCV64* riscv64_codegen) + : SlowPathCodeRISCV64(invoke), + order_(order), + strong_(strong), + base_(base), + offset_(offset), + expected_(expected), + new_value_(new_value), + old_value_(old_value), + old_value_temp_(old_value_temp), + store_result_(store_result), + update_old_value_(update_old_value), + mark_old_value_slow_path_(nullptr), + update_old_value_slow_path_(nullptr) { + // We need to add slow paths now, it is too late when emitting slow path code. + Location old_value_loc = Location::RegisterLocation(old_value); + Location old_value_temp_loc = Location::RegisterLocation(old_value_temp); + if (kUseBakerReadBarrier) { + mark_old_value_slow_path_ = riscv64_codegen->AddGcRootBakerBarrierBarrierSlowPath( + invoke, old_value_temp_loc, kBakerReadBarrierTemp); + if (update_old_value_) { + update_old_value_slow_path_ = riscv64_codegen->AddGcRootBakerBarrierBarrierSlowPath( + invoke, old_value_loc, kBakerReadBarrierTemp); + } + } else { + Location base_loc = Location::RegisterLocation(base); + Location index = Location::RegisterLocation(offset); + mark_old_value_slow_path_ = riscv64_codegen->AddReadBarrierSlowPath( + invoke, old_value_temp_loc, old_value_loc, base_loc, /*offset=*/ 0u, index); + if (update_old_value_) { + update_old_value_slow_path_ = riscv64_codegen->AddReadBarrierSlowPath( + invoke, old_value_loc, old_value_temp_loc, base_loc, /*offset=*/ 0u, index); + } + } + } + + const char* GetDescription() const override { return "ReadBarrierCasSlowPathRISCV64"; } + + // We return to a different label on success for a strong CAS that does not return old value. + Riscv64Label* GetSuccessExitLabel() { + return &success_exit_label_; + } + + void EmitNativeCode(CodeGenerator* codegen) override { + CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen); + Riscv64Assembler* assembler = riscv64_codegen->GetAssembler(); + __ Bind(GetEntryLabel()); + + // Mark the `old_value_` from the main path and compare with `expected_`. + DCHECK(mark_old_value_slow_path_ != nullptr); + if (kUseBakerReadBarrier) { + __ Mv(old_value_temp_, old_value_); + riscv64_codegen->EmitBakerReadBarierMarkingCheck(mark_old_value_slow_path_, + Location::RegisterLocation(old_value_temp_), + kBakerReadBarrierTemp); + } else { + __ J(mark_old_value_slow_path_->GetEntryLabel()); + __ Bind(mark_old_value_slow_path_->GetExitLabel()); + } + Riscv64Label move_marked_old_value; + __ Bne(old_value_temp_, expected_, update_old_value_ ? &move_marked_old_value : GetExitLabel()); + + // The `old_value` we have read did not match `expected` (which is always a to-space + // reference) but after the read barrier the marked to-space value matched, so the + // `old_value` must be a from-space reference to the same object. Do the same CAS loop + // as the main path but check for both `expected` and the unmarked old value + // representing the to-space and from-space references for the same object. + + ScratchRegisterScope srs(assembler); + XRegister tmp_ptr = srs.AllocateXRegister(); + XRegister store_result = + store_result_ != kNoXRegister ? store_result_ : srs.AllocateXRegister(); + + // Recalculate the `tmp_ptr` from main path potentially clobbered by the read barrier above + // or by an expanded conditional branch (clobbers `TMP` if beyond 1MiB). + __ Add(tmp_ptr, base_, offset_); + + Riscv64Label mark_old_value; + GenerateCompareAndSet(riscv64_codegen->GetAssembler(), + DataType::Type::kReference, + order_, + strong_, + /*cmp_failure=*/ update_old_value_ ? &mark_old_value : GetExitLabel(), + tmp_ptr, + new_value_, + /*old_value=*/ old_value_temp_, + /*mask=*/ kNoXRegister, + /*masked=*/ kNoXRegister, + store_result, + expected_, + /*expected2=*/ old_value_); + if (update_old_value_) { + // To reach this point, the `old_value_temp_` must be either a from-space or a to-space + // reference of the `expected_` object. Update the `old_value_` to the to-space reference. + __ Mv(old_value_, expected_); + } + if (!update_old_value_ && strong_) { + // Load success value to the result register. + // We must jump to the instruction that loads the success value in the main path. + // Note that a SC failure in the CAS loop sets the `store_result` to 1, so the main + // path must not use the `store_result` as an indication of success. + __ J(GetSuccessExitLabel()); + } else { + __ J(GetExitLabel()); + } + + if (update_old_value_) { + // TODO(riscv64): If we initially saw a from-space reference and then saw + // a different reference, can the latter be also a from-space reference? + // (Shouldn't every reference write store a to-space reference?) + DCHECK(update_old_value_slow_path_ != nullptr); + __ Bind(&mark_old_value); + if (kUseBakerReadBarrier) { + DCHECK(update_old_value_slow_path_ == nullptr); + __ Mv(old_value_, old_value_temp_); + riscv64_codegen->EmitBakerReadBarierMarkingCheck(update_old_value_slow_path_, + Location::RegisterLocation(old_value_), + kBakerReadBarrierTemp); + } else { + // Note: We could redirect the `failure` above directly to the entry label and bind + // the exit label in the main path, but the main path would need to access the + // `update_old_value_slow_path_`. To keep the code simple, keep the extra jumps. + __ J(update_old_value_slow_path_->GetEntryLabel()); + __ Bind(update_old_value_slow_path_->GetExitLabel()); + } + __ J(GetExitLabel()); + + __ Bind(&move_marked_old_value); + __ Mv(old_value_, old_value_temp_); + __ J(GetExitLabel()); + } + } + + private: + // Use RA as temp. It is clobbered in the slow path anyway. + static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA); + + std::memory_order order_; + bool strong_; + XRegister base_; + XRegister offset_; + XRegister expected_; + XRegister new_value_; + XRegister old_value_; + XRegister old_value_temp_; + XRegister store_result_; + bool update_old_value_; + SlowPathCodeRISCV64* mark_old_value_slow_path_; + SlowPathCodeRISCV64* update_old_value_slow_path_; + Riscv64Label success_exit_label_; +}; + +static void EmitBlt32(Riscv64Assembler* assembler, + XRegister rs1, + Location rs2, + Riscv64Label* label, + XRegister temp) { + if (rs2.IsConstant()) { + __ Li(temp, rs2.GetConstant()->AsIntConstant()->GetValue()); + __ Blt(rs1, temp, label); + } else { + __ Blt(rs1, rs2.AsRegister<XRegister>(), label); + } +} + +static void CheckSystemArrayCopyPosition(Riscv64Assembler* assembler, + XRegister array, + Location pos, + Location length, + SlowPathCodeRISCV64* slow_path, + XRegister temp1, + XRegister temp2, + bool length_is_array_length, + bool position_sign_checked) { + const int32_t length_offset = mirror::Array::LengthOffset().Int32Value(); + if (pos.IsConstant()) { + int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); + DCHECK_GE(pos_const, 0); // Checked in location builder. + if (pos_const == 0) { + if (!length_is_array_length) { + // Check that length(array) >= length. + __ Loadw(temp1, array, length_offset); + EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2); + } + } else { + // Calculate length(array) - pos. + // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow + // as `int32_t`. If the result is negative, the BLT below shall go to the slow path. + __ Loadw(temp1, array, length_offset); + __ AddConst32(temp1, temp1, -pos_const); + + // Check that (length(array) - pos) >= length. + EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2); + } + } else if (length_is_array_length) { + // The only way the copy can succeed is if pos is zero. + __ Bnez(pos.AsRegister<XRegister>(), slow_path->GetEntryLabel()); + } else { + // Check that pos >= 0. + XRegister pos_reg = pos.AsRegister<XRegister>(); + if (!position_sign_checked) { + __ Bltz(pos_reg, slow_path->GetEntryLabel()); + } + + // Calculate length(array) - pos. + // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow + // as `int32_t`. If the result is negative, the BLT below shall go to the slow path. + __ Loadw(temp1, array, length_offset); + __ Sub(temp1, temp1, pos_reg); + + // Check that (length(array) - pos) >= length. + EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2); + } +} + +static void GenArrayAddress(CodeGeneratorRISCV64* codegen, + XRegister dest, + XRegister base, + Location pos, + DataType::Type type, + int32_t data_offset) { + Riscv64Assembler* assembler = codegen->GetAssembler(); + if (pos.IsConstant()) { + int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue(); + __ AddConst64(dest, base, DataType::Size(type) * constant + data_offset); + } else { + codegen->GetInstructionVisitor()->ShNAdd(dest, pos.AsRegister<XRegister>(), base, type); + if (data_offset != 0) { + __ AddConst64(dest, dest, data_offset); + } + } +} + +// Compute base source address, base destination address, and end +// source address for System.arraycopy* intrinsics in `src_base`, +// `dst_base` and `src_end` respectively. +static void GenSystemArrayCopyAddresses(CodeGeneratorRISCV64* codegen, + DataType::Type type, + XRegister src, + Location src_pos, + XRegister dst, + Location dst_pos, + Location copy_length, + XRegister src_base, + XRegister dst_base, + XRegister src_end) { + // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics. + DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16) + << "Unexpected element type: " << type; + const int32_t element_size = DataType::Size(type); + const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + GenArrayAddress(codegen, src_base, src, src_pos, type, data_offset); + GenArrayAddress(codegen, dst_base, dst, dst_pos, type, data_offset); + GenArrayAddress(codegen, src_end, src_base, copy_length, type, /*data_offset=*/ 0); +} + +static Location LocationForSystemArrayCopyInput(HInstruction* input) { + HIntConstant* const_input = input->AsIntConstantOrNull(); + if (const_input != nullptr && IsInt<12>(const_input->GetValue())) { + return Location::ConstantLocation(const_input); + } else { + return Location::RequiresRegister(); + } +} + +// We can choose to use the native implementation there for longer copy lengths. +static constexpr int32_t kSystemArrayCopyThreshold = 128; + +void IntrinsicLocationsBuilderRISCV64::VisitSystemArrayCopy(HInvoke* invoke) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + if (codegen_->EmitNonBakerReadBarrier()) { + return; + } + + size_t num_temps = codegen_->EmitBakerReadBarrier() ? 4u : 2u; + LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary( + invoke, kSystemArrayCopyThreshold, num_temps); + if (locations != nullptr) { + // We request position and length as constants only for small integral values. + locations->SetInAt(1, LocationForSystemArrayCopyInput(invoke->InputAt(1))); + locations->SetInAt(3, LocationForSystemArrayCopyInput(invoke->InputAt(3))); + locations->SetInAt(4, LocationForSystemArrayCopyInput(invoke->InputAt(4))); + } +} + +void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopy(HInvoke* invoke) { + // The only read barrier implementation supporting the + // SystemArrayCopy intrinsic is the Baker-style read barriers. + DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier); + + Riscv64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + XRegister src = locations->InAt(0).AsRegister<XRegister>(); + Location src_pos = locations->InAt(1); + XRegister dest = locations->InAt(2).AsRegister<XRegister>(); + Location dest_pos = locations->InAt(3); + Location length = locations->InAt(4); + XRegister temp1 = locations->GetTemp(0).AsRegister<XRegister>(); + XRegister temp2 = locations->GetTemp(1).AsRegister<XRegister>(); + + SlowPathCodeRISCV64* intrinsic_slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke); + codegen_->AddSlowPath(intrinsic_slow_path); + + Riscv64Label conditions_on_positions_validated; + SystemArrayCopyOptimizations optimizations(invoke); + + // If source and destination are the same, we go to slow path if we need to do forward copying. + // We do not need to do this check if the source and destination positions are the same. + if (!optimizations.GetSourcePositionIsDestinationPosition()) { + if (src_pos.IsConstant()) { + int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); + if (dest_pos.IsConstant()) { + int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); + if (optimizations.GetDestinationIsSource()) { + // Checked when building locations. + DCHECK_GE(src_pos_constant, dest_pos_constant); + } else if (src_pos_constant < dest_pos_constant) { + __ Beq(src, dest, intrinsic_slow_path->GetEntryLabel()); + } + } else { + if (!optimizations.GetDestinationIsSource()) { + __ Bne(src, dest, &conditions_on_positions_validated); + } + __ Li(temp1, src_pos_constant); + __ Bgt(dest_pos.AsRegister<XRegister>(), temp1, intrinsic_slow_path->GetEntryLabel()); + } + } else { + if (!optimizations.GetDestinationIsSource()) { + __ Bne(src, dest, &conditions_on_positions_validated); + } + XRegister src_pos_reg = src_pos.AsRegister<XRegister>(); + EmitBlt32(assembler, src_pos_reg, dest_pos, intrinsic_slow_path->GetEntryLabel(), temp2); + } + } + + __ Bind(&conditions_on_positions_validated); + + if (!optimizations.GetSourceIsNotNull()) { + // Bail out if the source is null. + __ Beqz(src, intrinsic_slow_path->GetEntryLabel()); + } + + if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { + // Bail out if the destination is null. + __ Beqz(dest, intrinsic_slow_path->GetEntryLabel()); + } + + // We have already checked in the LocationsBuilder for the constant case. + if (!length.IsConstant()) { + // Merge the following two comparisons into one: + // If the length is negative, bail out (delegate to libcore's native implementation). + // If the length >= 128 then (currently) prefer native implementation. + __ Li(temp1, kSystemArrayCopyThreshold); + __ Bgeu(length.AsRegister<XRegister>(), temp1, intrinsic_slow_path->GetEntryLabel()); + } + // Validity checks: source. + CheckSystemArrayCopyPosition(assembler, + src, + src_pos, + length, + intrinsic_slow_path, + temp1, + temp2, + optimizations.GetCountIsSourceLength(), + /*position_sign_checked=*/ false); + + // Validity checks: dest. + bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition(); + CheckSystemArrayCopyPosition(assembler, + dest, + dest_pos, + length, + intrinsic_slow_path, + temp1, + temp2, + optimizations.GetCountIsDestinationLength(), + dest_position_sign_checked); + { + // We use a block to end the scratch scope before the write barrier, thus + // freeing the temporary registers so they can be used in `MarkGCCard`. + ScratchRegisterScope srs(assembler); + bool emit_rb = codegen_->EmitBakerReadBarrier(); + XRegister temp3 = + emit_rb ? locations->GetTemp(2).AsRegister<XRegister>() : srs.AllocateXRegister(); + + auto check_non_primitive_array_class = [&](XRegister klass, XRegister temp) { + // No read barrier is needed for reading a chain of constant references for comparing + // with null, or for reading a constant primitive value, see `ReadBarrierOption`. + // /* HeapReference<Class> */ temp = klass->component_type_ + __ Loadwu(temp, klass, component_offset); + codegen_->MaybeUnpoisonHeapReference(temp); + __ Beqz(temp, intrinsic_slow_path->GetEntryLabel()); + // /* uint16_t */ temp = static_cast<uint16>(klass->primitive_type_); + __ Loadhu(temp, temp, primitive_offset); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); + __ Bnez(temp, intrinsic_slow_path->GetEntryLabel()); + }; + + if (!optimizations.GetDoesNotNeedTypeCheck()) { + // Check whether all elements of the source array are assignable to the component + // type of the destination array. We do two checks: the classes are the same, + // or the destination is Object[]. If none of these checks succeed, we go to the + // slow path. + + if (emit_rb) { + // /* HeapReference<Class> */ temp1 = dest->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + Location::RegisterLocation(temp1), + dest, + class_offset, + Location::RegisterLocation(temp3), + /* needs_null_check= */ false); + // /* HeapReference<Class> */ temp2 = src->klass_ + codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, + Location::RegisterLocation(temp2), + src, + class_offset, + Location::RegisterLocation(temp3), + /* needs_null_check= */ false); + } else { + // /* HeapReference<Class> */ temp1 = dest->klass_ + __ Loadwu(temp1, dest, class_offset); + codegen_->MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp2 = src->klass_ + __ Loadwu(temp2, src, class_offset); + codegen_->MaybeUnpoisonHeapReference(temp2); + } + + if (optimizations.GetDestinationIsTypedObjectArray()) { + DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); + Riscv64Label do_copy; + // For class match, we can skip the source type check regardless of the optimization flag. + __ Beq(temp1, temp2, &do_copy); + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + // No read barrier is needed for reading a chain of constant references + // for comparing with null, see `ReadBarrierOption`. + __ Loadwu(temp1, temp1, component_offset); + codegen_->MaybeUnpoisonHeapReference(temp1); + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ Loadwu(temp1, temp1, super_offset); + // No need to unpoison the result, we're comparing against null. + __ Bnez(temp1, intrinsic_slow_path->GetEntryLabel()); + // Bail out if the source is not a non primitive array. + if (!optimizations.GetSourceIsNonPrimitiveArray()) { + check_non_primitive_array_class(temp2, temp3); + } + __ Bind(&do_copy); + } else { + DCHECK(!optimizations.GetDestinationIsTypedObjectArray()); + // For class match, we can skip the array type check completely if at least one of source + // and destination is known to be a non primitive array, otherwise one check is enough. + __ Bne(temp1, temp2, intrinsic_slow_path->GetEntryLabel()); + if (!optimizations.GetDestinationIsNonPrimitiveArray() && + !optimizations.GetSourceIsNonPrimitiveArray()) { + check_non_primitive_array_class(temp2, temp3); + } + } + } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { + DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); + // Bail out if the source is not a non primitive array. + // No read barrier is needed for reading a chain of constant references for comparing + // with null, or for reading a constant primitive value, see `ReadBarrierOption`. + // /* HeapReference<Class> */ temp1 = src->klass_ + __ Loadwu(temp2, src, class_offset); + codegen_->MaybeUnpoisonHeapReference(temp2); + check_non_primitive_array_class(temp2, temp3); + } + + if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) { + // Null constant length: not need to emit the loop code at all. + } else { + XRegister src_curr_addr = temp1; + XRegister dst_curr_addr = temp2; + XRegister src_stop_addr = temp3; + Riscv64Label done; + const DataType::Type type = DataType::Type::kReference; + const int32_t element_size = DataType::Size(type); + + if (length.IsRegister()) { + // Don't enter the copy loop if the length is null. + __ Beqz(length.AsRegister<XRegister>(), &done); + } + + XRegister tmp = kNoXRegister; + SlowPathCodeRISCV64* read_barrier_slow_path = nullptr; + if (emit_rb) { + // TODO: Also convert this intrinsic to the IsGcMarking strategy? + + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier): + // + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + + // /* uint32_t */ monitor = src->monitor_ + tmp = locations->GetTemp(3).AsRegister<XRegister>(); + __ Loadwu(tmp, src, monitor_offset); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + // Shift the RB state bit to the sign bit while also clearing the low 32 bits + // for the fake dependency below. + static_assert(LockWord::kReadBarrierStateShift < 31); + __ Slli(tmp, tmp, 63 - LockWord::kReadBarrierStateShift); + + // Introduce a dependency on the lock_word including rb_state, to prevent load-load + // reordering, and without using a memory barrier (which would be more expensive). + // `src` is unchanged by this operation (since Adduw adds low 32 bits + // which are zero after left shift), but its value now depends on `tmp`. + __ AddUw(src, tmp, src); + + // Slow path used to copy array when `src` is gray. + read_barrier_slow_path = new (codegen_->GetScopedAllocator()) + ReadBarrierSystemArrayCopySlowPathRISCV64(invoke, Location::RegisterLocation(tmp)); + codegen_->AddSlowPath(read_barrier_slow_path); + } + + // Compute base source address, base destination address, and end source address for + // System.arraycopy* intrinsics in `src_base`, `dst_base` and `src_end` respectively. + // Note that `src_curr_addr` is computed from from `src` (and `src_pos`) here, and + // thus honors the artificial dependency of `src` on `tmp` for read barriers. + GenSystemArrayCopyAddresses(codegen_, + type, + src, + src_pos, + dest, + dest_pos, + length, + src_curr_addr, + dst_curr_addr, + src_stop_addr); + + if (emit_rb) { + // Given the numeric representation, it's enough to check the low bit of the RB state. + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + DCHECK_NE(tmp, kNoXRegister); + __ Bltz(tmp, read_barrier_slow_path->GetEntryLabel()); + } else { + // After allocating the last scrach register, we cannot use macro load/store instructions + // such as `Loadwu()` and need to use raw instructions. However, all offsets below are 0. + DCHECK_EQ(tmp, kNoXRegister); + tmp = srs.AllocateXRegister(); + } + + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + Riscv64Label loop; + __ Bind(&loop); + __ Lwu(tmp, src_curr_addr, 0); + __ Sw(tmp, dst_curr_addr, 0); + __ Addi(src_curr_addr, src_curr_addr, element_size); + __ Addi(dst_curr_addr, dst_curr_addr, element_size); + // Bare: `TMP` shall not be clobbered. + __ Bne(src_curr_addr, src_stop_addr, &loop, /*is_bare=*/ true); + __ Bind(&done); + + if (emit_rb) { + DCHECK(read_barrier_slow_path != nullptr); + __ Bind(read_barrier_slow_path->GetExitLabel()); + } + } + } + + // We only need one card marking on the destination array. + codegen_->MarkGCCard(dest, XRegister(kNoXRegister), /* emit_null_check= */ false); + + __ Bind(intrinsic_slow_path->GetExitLabel()); +} + +enum class GetAndUpdateOp { + kSet, + kAdd, + kAnd, + kOr, + kXor +}; + +// Generate a GetAndUpdate operation. +// +// Only 32-bit and 64-bit atomics are currently supported, therefore smaller types need +// special handling. The caller emits code to prepare aligned `ptr` and adjusted `arg` +// and extract the needed bits from `old_value`. For bitwise operations, no extra +// handling is needed here. For `GetAndUpdateOp::kSet` and `GetAndUpdateOp::kAdd` we +// also use a special LR/SC sequence that uses a `mask` to update only the desired bits. +// Note: The `mask` must contain the bits to keep for `GetAndUpdateOp::kSet` and +// the bits to replace for `GetAndUpdateOp::kAdd`. +static void GenerateGetAndUpdate(CodeGeneratorRISCV64* codegen, + GetAndUpdateOp get_and_update_op, + DataType::Type type, + std::memory_order order, + XRegister ptr, + XRegister arg, + XRegister old_value, + XRegister mask, + XRegister temp) { + DCHECK_EQ(mask != kNoXRegister, temp != kNoXRegister); + DCHECK_IMPLIES(mask != kNoXRegister, type == DataType::Type::kInt32); + DCHECK_IMPLIES( + mask != kNoXRegister, + (get_and_update_op == GetAndUpdateOp::kSet) || (get_and_update_op == GetAndUpdateOp::kAdd)); + Riscv64Assembler* assembler = codegen->GetAssembler(); + AqRl amo_aqrl = GetAmoAqRl(order); + switch (get_and_update_op) { + case GetAndUpdateOp::kSet: + if (type == DataType::Type::kInt64) { + __ AmoSwapD(old_value, arg, ptr, amo_aqrl); + } else if (mask == kNoXRegister) { + DCHECK_EQ(type, DataType::Type::kInt32); + __ AmoSwapW(old_value, arg, ptr, amo_aqrl); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + DCHECK_NE(temp, kNoXRegister); + auto [load_aqrl, store_aqrl] = GetLrScAqRl(order); + Riscv64Label retry; + __ Bind(&retry); + __ LrW(old_value, ptr, load_aqrl); + __ And(temp, old_value, mask); + __ Or(temp, temp, arg); + __ ScW(temp, temp, ptr, store_aqrl); + __ Bnez(temp, &retry, /*is_bare=*/ true); // Bare: `TMP` shall not be clobbered. + } + break; + case GetAndUpdateOp::kAdd: + if (type == DataType::Type::kInt64) { + __ AmoAddD(old_value, arg, ptr, amo_aqrl); + } else if (mask == kNoXRegister) { + DCHECK_EQ(type, DataType::Type::kInt32); + __ AmoAddW(old_value, arg, ptr, amo_aqrl); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + DCHECK_NE(temp, kNoXRegister); + auto [load_aqrl, store_aqrl] = GetLrScAqRl(order); + Riscv64Label retry; + __ Bind(&retry); + __ LrW(old_value, ptr, load_aqrl); + __ Add(temp, old_value, arg); + // We use `(A ^ B) ^ A == B` and with the masking `((A ^ B) & mask) ^ A`, the result + // contains bits from `B` for bits specified in `mask` and bits from `A` elsewhere. + // Note: These instructions directly depend on each other, so it's not necessarily the + // fastest approach but for `(A ^ ~mask) | (B & mask)` we would need an extra register for + // `~mask` because ANDN is not in the "I" instruction set as required for a LR/SC sequence. + __ Xor(temp, temp, old_value); + __ And(temp, temp, mask); + __ Xor(temp, temp, old_value); + __ ScW(temp, temp, ptr, store_aqrl); + __ Bnez(temp, &retry, /*is_bare=*/ true); // Bare: `TMP` shall not be clobbered. + } + break; + case GetAndUpdateOp::kAnd: + if (type == DataType::Type::kInt64) { + __ AmoAndD(old_value, arg, ptr, amo_aqrl); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + __ AmoAndW(old_value, arg, ptr, amo_aqrl); + } + break; + case GetAndUpdateOp::kOr: + if (type == DataType::Type::kInt64) { + __ AmoOrD(old_value, arg, ptr, amo_aqrl); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + __ AmoOrW(old_value, arg, ptr, amo_aqrl); + } + break; + case GetAndUpdateOp::kXor: + if (type == DataType::Type::kInt64) { + __ AmoXorD(old_value, arg, ptr, amo_aqrl); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + __ AmoXorW(old_value, arg, ptr, amo_aqrl); + } + break; + } +} + +static void CreateUnsafeGetLocations(ArenaAllocator* allocator, + HInvoke* invoke, + CodeGeneratorRISCV64* codegen) { + bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke); + LocationSummary* locations = new (allocator) LocationSummary( + invoke, + can_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall, + kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); +} + +static void GenUnsafeGet(HInvoke* invoke, + CodeGeneratorRISCV64* codegen, + std::memory_order order, + DataType::Type type) { + DCHECK((type == DataType::Type::kInt8) || + (type == DataType::Type::kInt32) || + (type == DataType::Type::kInt64) || + (type == DataType::Type::kReference)); + LocationSummary* locations = invoke->GetLocations(); + Location object_loc = locations->InAt(1); + XRegister object = object_loc.AsRegister<XRegister>(); // Object pointer. + Location offset_loc = locations->InAt(2); + XRegister offset = offset_loc.AsRegister<XRegister>(); // Long offset. + Location out_loc = locations->Out(); + XRegister out = out_loc.AsRegister<XRegister>(); + + bool seq_cst_barrier = (order == std::memory_order_seq_cst); + bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire); + DCHECK(acquire_barrier || order == std::memory_order_relaxed); + + if (seq_cst_barrier) { + codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + + if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) { + // JdkUnsafeGetReference/JdkUnsafeGetReferenceVolatile with Baker's read barrier case. + // TODO(riscv64): Revisit when we add checking if the holder is black. + Location temp = Location::NoLocation(); + codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, + out_loc, + object, + /*offset=*/ 0, + /*index=*/ offset_loc, + temp, + /*needs_null_check=*/ false); + } else { + // Other cases. + Riscv64Assembler* assembler = codegen->GetAssembler(); + __ Add(out, object, offset); + codegen->GetInstructionVisitor()->Load(out_loc, out, /*offset=*/ 0, type); + + if (type == DataType::Type::kReference) { + codegen->MaybeGenerateReadBarrierSlow( + invoke, out_loc, out_loc, object_loc, /*offset=*/ 0u, /*index=*/ offset_loc); + } + } + + if (acquire_barrier) { + codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGet(HInvoke* invoke) { + VisitJdkUnsafeGet(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGet(HInvoke* invoke) { + VisitJdkUnsafeGet(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetVolatile(HInvoke* invoke) { + VisitJdkUnsafeGetVolatile(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetVolatile(HInvoke* invoke) { + VisitJdkUnsafeGetVolatile(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetObject(HInvoke* invoke) { + VisitJdkUnsafeGetReference(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetObject(HInvoke* invoke) { + VisitJdkUnsafeGetReference(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { + VisitJdkUnsafeGetReferenceVolatile(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { + VisitJdkUnsafeGetReferenceVolatile(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetLong(HInvoke* invoke) { + VisitJdkUnsafeGetLong(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetLong(HInvoke* invoke) { + VisitJdkUnsafeGetLong(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + VisitJdkUnsafeGetLongVolatile(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + VisitJdkUnsafeGetLongVolatile(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetByte(HInvoke* invoke) { + VisitJdkUnsafeGetByte(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetByte(HInvoke* invoke) { + VisitJdkUnsafeGetByte(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGet(HInvoke* invoke) { + CreateUnsafeGetLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGet(HInvoke* invoke) { + GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt32); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) { + CreateUnsafeGetLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) { + GenUnsafeGet(invoke, codegen_, std::memory_order_acquire, DataType::Type::kInt32); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) { + CreateUnsafeGetLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt32); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetReference(HInvoke* invoke) { + CreateUnsafeGetLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetReference(HInvoke* invoke) { + GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kReference); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) { + CreateUnsafeGetLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) { + GenUnsafeGet(invoke, codegen_, std::memory_order_acquire, DataType::Type::kReference); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) { + CreateUnsafeGetLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kReference); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetLong(HInvoke* invoke) { + CreateUnsafeGetLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetLong(HInvoke* invoke) { + GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt64); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) { + CreateUnsafeGetLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) { + GenUnsafeGet(invoke, codegen_, std::memory_order_acquire, DataType::Type::kInt64); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) { + CreateUnsafeGetLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt64); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetByte(HInvoke* invoke) { + CreateUnsafeGetLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetByte(HInvoke* invoke) { + GenUnsafeGet(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt8); +} + +static void CreateUnsafePutLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); +} + +static void GenUnsafePut(HInvoke* invoke, + CodeGeneratorRISCV64* codegen, + std::memory_order order, + DataType::Type type) { + Riscv64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + XRegister base = locations->InAt(1).AsRegister<XRegister>(); // Object pointer. + XRegister offset = locations->InAt(2).AsRegister<XRegister>(); // Long offset. + Location value = locations->InAt(3); + + { + // We use a block to end the scratch scope before the write barrier, thus + // freeing the temporary registers so they can be used in `MarkGCCard()`. + ScratchRegisterScope srs(assembler); + XRegister address = srs.AllocateXRegister(); + __ Add(address, base, offset); + GenerateSet(codegen, order, value, address, /*offset=*/ 0, type); + } + + if (type == DataType::Type::kReference) { + bool value_can_be_null = true; // TODO: Worth finding out this information? + codegen->MarkGCCard(base, value.AsRegister<XRegister>(), value_can_be_null); + } +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafePut(HInvoke* invoke) { + VisitJdkUnsafePut(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafePut(HInvoke* invoke) { + VisitJdkUnsafePut(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutOrdered(HInvoke* invoke) { + VisitJdkUnsafePutOrdered(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutOrdered(HInvoke* invoke) { + VisitJdkUnsafePutOrdered(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutVolatile(HInvoke* invoke) { + VisitJdkUnsafePutVolatile(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutVolatile(HInvoke* invoke) { + VisitJdkUnsafePutVolatile(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutObject(HInvoke* invoke) { + VisitJdkUnsafePutReference(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutObject(HInvoke* invoke) { + VisitJdkUnsafePutReference(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + VisitJdkUnsafePutObjectOrdered(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + VisitJdkUnsafePutObjectOrdered(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + VisitJdkUnsafePutReferenceVolatile(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + VisitJdkUnsafePutReferenceVolatile(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutLong(HInvoke* invoke) { + VisitJdkUnsafePutLong(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutLong(HInvoke* invoke) { + VisitJdkUnsafePutLong(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutLongOrdered(HInvoke* invoke) { + VisitJdkUnsafePutLongOrdered(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutLongOrdered(HInvoke* invoke) { + VisitJdkUnsafePutLongOrdered(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutLongVolatile(HInvoke* invoke) { + VisitJdkUnsafePutLongVolatile(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutLongVolatile(HInvoke* invoke) { + VisitJdkUnsafePutLongVolatile(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafePutByte(HInvoke* invoke) { + VisitJdkUnsafePutByte(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafePutByte(HInvoke* invoke) { + VisitJdkUnsafePutByte(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePut(HInvoke* invoke) { + CreateUnsafePutLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePut(HInvoke* invoke) { + GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt32); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutOrdered(HInvoke* invoke) { + CreateUnsafePutLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutOrdered(HInvoke* invoke) { + GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt32); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutRelease(HInvoke* invoke) { + CreateUnsafePutLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutRelease(HInvoke* invoke) { + GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt32); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutVolatile(HInvoke* invoke) { + CreateUnsafePutLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutVolatile(HInvoke* invoke) { + GenUnsafePut(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt32); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutReference(HInvoke* invoke) { + CreateUnsafePutLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutReference(HInvoke* invoke) { + GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kReference); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) { + CreateUnsafePutLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) { + GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kReference); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) { + CreateUnsafePutLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) { + GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kReference); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) { + CreateUnsafePutLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) { + GenUnsafePut(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kReference); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLong(HInvoke* invoke) { + CreateUnsafePutLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLong(HInvoke* invoke) { + GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt64); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) { + CreateUnsafePutLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) { + GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt64); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) { + CreateUnsafePutLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) { + GenUnsafePut(invoke, codegen_, std::memory_order_release, DataType::Type::kInt64); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) { + CreateUnsafePutLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) { + GenUnsafePut(invoke, codegen_, std::memory_order_seq_cst, DataType::Type::kInt64); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafePutByte(HInvoke* invoke) { + CreateUnsafePutLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafePutByte(HInvoke* invoke) { + GenUnsafePut(invoke, codegen_, std::memory_order_relaxed, DataType::Type::kInt8); +} + +static void CreateUnsafeCASLocations(ArenaAllocator* allocator, + HInvoke* invoke, + CodeGeneratorRISCV64* codegen) { + const bool can_call = codegen->EmitReadBarrier() && IsUnsafeCASReference(invoke); + LocationSummary* locations = new (allocator) LocationSummary( + invoke, + can_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall, + kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + locations->SetInAt(4, Location::RequiresRegister()); + + locations->SetOut(Location::RequiresRegister()); +} + +static void GenUnsafeCas(HInvoke* invoke, CodeGeneratorRISCV64* codegen, DataType::Type type) { + Riscv64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + XRegister out = locations->Out().AsRegister<XRegister>(); // Boolean result. + XRegister object = locations->InAt(1).AsRegister<XRegister>(); // Object pointer. + XRegister offset = locations->InAt(2).AsRegister<XRegister>(); // Long offset. + XRegister expected = locations->InAt(3).AsRegister<XRegister>(); // Expected. + XRegister new_value = locations->InAt(4).AsRegister<XRegister>(); // New value. + + // This needs to be before the temp registers, as MarkGCCard also uses scratch registers. + if (type == DataType::Type::kReference) { + // Mark card for object assuming new value is stored. + bool new_value_can_be_null = true; // TODO: Worth finding out this information? + codegen->MarkGCCard(object, new_value, new_value_can_be_null); + } + + ScratchRegisterScope srs(assembler); + XRegister tmp_ptr = srs.AllocateXRegister(); // Pointer to actual memory. + XRegister old_value; // Value in memory. + + Riscv64Label exit_loop_label; + Riscv64Label* exit_loop = &exit_loop_label; + Riscv64Label* cmp_failure = &exit_loop_label; + + ReadBarrierCasSlowPathRISCV64* slow_path = nullptr; + if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) { + // We need to store the `old_value` in a non-scratch register to make sure + // the read barrier in the slow path does not clobber it. + old_value = locations->GetTemp(0).AsRegister<XRegister>(); // The old value from main path. + // The `old_value_temp` is used first for marking the `old_value` and then for the unmarked + // reloaded old value for subsequent CAS in the slow path. We make this a scratch register + // as we do have marking entrypoints on riscv64 even for scratch registers. + XRegister old_value_temp = srs.AllocateXRegister(); + slow_path = new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathRISCV64( + invoke, + std::memory_order_seq_cst, + /*strong=*/ true, + object, + offset, + expected, + new_value, + old_value, + old_value_temp, + /*store_result=*/ old_value_temp, // Let the SC result clobber the reloaded old_value. + /*update_old_value=*/ false, + codegen); + codegen->AddSlowPath(slow_path); + exit_loop = slow_path->GetExitLabel(); + cmp_failure = slow_path->GetEntryLabel(); + } else { + old_value = srs.AllocateXRegister(); + } + + __ Add(tmp_ptr, object, offset); + + // Pre-populate the result register with failure. + __ Li(out, 0); + + GenerateCompareAndSet(assembler, + type, + std::memory_order_seq_cst, + /*strong=*/ true, + cmp_failure, + tmp_ptr, + new_value, + old_value, + /*mask=*/ kNoXRegister, + /*masked=*/ kNoXRegister, + /*store_result=*/ old_value, // Let the SC result clobber the `old_value`. + expected); + + DCHECK_EQ(slow_path != nullptr, type == DataType::Type::kReference && codegen->EmitReadBarrier()); + if (slow_path != nullptr) { + __ Bind(slow_path->GetSuccessExitLabel()); + } + + // Indicate success if we successfully execute the SC. + __ Li(out, 1); + + __ Bind(exit_loop); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafeCASInt(HInvoke* invoke) { + VisitJdkUnsafeCASInt(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafeCASInt(HInvoke* invoke) { + VisitJdkUnsafeCASInt(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafeCASLong(HInvoke* invoke) { + VisitJdkUnsafeCASLong(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafeCASLong(HInvoke* invoke) { + VisitJdkUnsafeCASLong(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafeCASObject(HInvoke* invoke) { + VisitJdkUnsafeCASObject(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafeCASObject(HInvoke* invoke) { + VisitJdkUnsafeCASObject(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCASInt(HInvoke* invoke) { + // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc). + VisitJdkUnsafeCompareAndSetInt(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCASInt(HInvoke* invoke) { + // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc). + VisitJdkUnsafeCompareAndSetInt(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCASLong(HInvoke* invoke) { + // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc). + VisitJdkUnsafeCompareAndSetLong(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCASLong(HInvoke* invoke) { + // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc). + VisitJdkUnsafeCompareAndSetLong(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCASObject(HInvoke* invoke) { + // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc). + VisitJdkUnsafeCompareAndSetReference(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCASObject(HInvoke* invoke) { + // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc). + VisitJdkUnsafeCompareAndSetReference(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) { + CreateUnsafeCASLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) { + GenUnsafeCas(invoke, codegen_, DataType::Type::kInt32); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) { + CreateUnsafeCASLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) { + GenUnsafeCas(invoke, codegen_, DataType::Type::kInt64); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) { + // The only supported read barrier implementation is the Baker-style read barriers. + if (codegen_->EmitNonBakerReadBarrier()) { + return; + } + + CreateUnsafeCASLocations(allocator_, invoke, codegen_); + if (codegen_->EmitReadBarrier()) { + DCHECK(kUseBakerReadBarrier); + // We need one non-scratch temporary register for read barrier. + LocationSummary* locations = invoke->GetLocations(); + locations->AddTemp(Location::RequiresRegister()); + } +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) { + GenUnsafeCas(invoke, codegen_, DataType::Type::kReference); +} + +static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator, + HInvoke* invoke, + CodeGeneratorRISCV64* codegen) { + const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke); + LocationSummary* locations = new (allocator) LocationSummary( + invoke, + can_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall, + kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +static void GenUnsafeGetAndUpdate(HInvoke* invoke, + DataType::Type type, + CodeGeneratorRISCV64* codegen, + GetAndUpdateOp get_and_update_op) { + Riscv64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + Location out_loc = locations->Out(); + XRegister out = out_loc.AsRegister<XRegister>(); // Result. + XRegister base = locations->InAt(1).AsRegister<XRegister>(); // Object pointer. + XRegister offset = locations->InAt(2).AsRegister<XRegister>(); // Long offset. + XRegister arg = locations->InAt(3).AsRegister<XRegister>(); // New value or addend. + + // This needs to be before the temp registers, as MarkGCCard also uses scratch registers. + if (type == DataType::Type::kReference) { + DCHECK(get_and_update_op == GetAndUpdateOp::kSet); + // Mark card for object as a new value shall be stored. + bool new_value_can_be_null = true; // TODO: Worth finding out this information? + codegen->MarkGCCard(base, /*value=*/ arg, new_value_can_be_null); + } + + ScratchRegisterScope srs(assembler); + XRegister tmp_ptr = srs.AllocateXRegister(); // Pointer to actual memory. + __ Add(tmp_ptr, base, offset); + GenerateGetAndUpdate(codegen, + get_and_update_op, + (type == DataType::Type::kReference) ? DataType::Type::kInt32 : type, + std::memory_order_seq_cst, + tmp_ptr, + arg, + /*old_value=*/ out, + /*mask=*/ kNoXRegister, + /*temp=*/ kNoXRegister); + + if (type == DataType::Type::kReference) { + __ ZextW(out, out); + if (codegen->EmitReadBarrier()) { + DCHECK(get_and_update_op == GetAndUpdateOp::kSet); + if (kUseBakerReadBarrier) { + // Use RA as temp. It is clobbered in the slow path anyway. + static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA); + SlowPathCodeRISCV64* rb_slow_path = + codegen->AddGcRootBakerBarrierBarrierSlowPath(invoke, out_loc, kBakerReadBarrierTemp); + codegen->EmitBakerReadBarierMarkingCheck(rb_slow_path, out_loc, kBakerReadBarrierTemp); + } else { + codegen->GenerateReadBarrierSlow( + invoke, + out_loc, + out_loc, + Location::RegisterLocation(base), + /*offset=*/ 0u, + /*index=*/ Location::RegisterLocation(offset)); + } + } + } +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndAddInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddInt(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndAddInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddInt(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndAddLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddLong(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndAddLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddLong(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndSetInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetInt(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndSetInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetInt(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndSetLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetLong(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndSetLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetLong(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitUnsafeGetAndSetObject(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetReference(invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitUnsafeGetAndSetObject(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetReference(invoke); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet); +} + +void IntrinsicLocationsBuilderRISCV64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet); +} + +class VarHandleSlowPathRISCV64 : public IntrinsicSlowPathRISCV64 { + public: + VarHandleSlowPathRISCV64(HInvoke* invoke, std::memory_order order) + : IntrinsicSlowPathRISCV64(invoke), + order_(order), + return_success_(false), + strong_(false), + get_and_update_op_(GetAndUpdateOp::kAdd) { + } + + Riscv64Label* GetByteArrayViewCheckLabel() { + return &byte_array_view_check_label_; + } + + Riscv64Label* GetNativeByteOrderLabel() { + return &native_byte_order_label_; + } + + void SetCompareAndSetOrExchangeArgs(bool return_success, bool strong) { + if (return_success) { + DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndSet); + } else { + DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange); + } + return_success_ = return_success; + strong_ = strong; + } + + void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) { + DCHECK(GetAccessModeTemplate() == mirror::VarHandle::AccessModeTemplate::kGetAndUpdate); + get_and_update_op_ = get_and_update_op; + } + + void EmitNativeCode(CodeGenerator* codegen_in) override { + if (GetByteArrayViewCheckLabel()->IsLinked()) { + EmitByteArrayViewCode(codegen_in); + } + IntrinsicSlowPathRISCV64::EmitNativeCode(codegen_in); + } + + private: + HInvoke* GetInvoke() const { + return GetInstruction()->AsInvoke(); + } + + mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const { + return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic()); + } + + void EmitByteArrayViewCode(CodeGenerator* codegen_in); + + Riscv64Label byte_array_view_check_label_; + Riscv64Label native_byte_order_label_; + // Shared parameter for all VarHandle intrinsics. + std::memory_order order_; + // Extra arguments for GenerateVarHandleCompareAndSetOrExchange(). + bool return_success_; + bool strong_; + // Extra argument for GenerateVarHandleGetAndUpdate(). + GetAndUpdateOp get_and_update_op_; +}; + +// Generate subtype check without read barriers. +static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorRISCV64* codegen, + SlowPathCodeRISCV64* slow_path, + XRegister object, + XRegister type, + bool object_can_be_null = true) { + Riscv64Assembler* assembler = codegen->GetAssembler(); + + const MemberOffset class_offset = mirror::Object::ClassOffset(); + const MemberOffset super_class_offset = mirror::Class::SuperClassOffset(); + + Riscv64Label success; + if (object_can_be_null) { + __ Beqz(object, &success); + } + + ScratchRegisterScope srs(assembler); + XRegister temp = srs.AllocateXRegister(); + + // Note: The `type` can be `TMP`. We're using "bare" local branches to enforce that they shall + // not be expanded and the scrach register `TMP` shall not be clobbered if taken. Taking the + // branch to the slow path can theoretically clobber `TMP` (if outside the 1 MiB range). + __ Loadwu(temp, object, class_offset.Int32Value()); + codegen->MaybeUnpoisonHeapReference(temp); + Riscv64Label loop; + __ Bind(&loop); + __ Beq(type, temp, &success, /*is_bare=*/ true); + // We may not have another scratch register for `Loadwu()`. Use `Lwu()` directly. + DCHECK(IsInt<12>(super_class_offset.Int32Value())); + __ Lwu(temp, temp, super_class_offset.Int32Value()); + codegen->MaybeUnpoisonHeapReference(temp); + __ Beqz(temp, slow_path->GetEntryLabel()); + __ J(&loop, /*is_bare=*/ true); + __ Bind(&success); +} + +// Check access mode and the primitive type from VarHandle.varType. +// Check reference arguments against the VarHandle.varType; for references this is a subclass +// check without read barrier, so it can have false negatives which we handle in the slow path. +static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke, + CodeGeneratorRISCV64* codegen, + SlowPathCodeRISCV64* slow_path, + DataType::Type type) { + mirror::VarHandle::AccessMode access_mode = + mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic()); + Primitive::Type primitive_type = DataTypeToPrimitive(type); + + Riscv64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + XRegister varhandle = locations->InAt(0).AsRegister<XRegister>(); + + const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset(); + const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset(); + const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset(); + + ScratchRegisterScope srs(assembler); + XRegister temp = srs.AllocateXRegister(); + XRegister temp2 = srs.AllocateXRegister(); + + // Check that the operation is permitted. + __ Loadw(temp, varhandle, access_mode_bit_mask_offset.Int32Value()); + DCHECK_LT(enum_cast<uint32_t>(access_mode), 31u); // We cannot avoid the shift below. + __ Slliw(temp, temp, 31 - enum_cast<uint32_t>(access_mode)); // Shift tested bit to sign bit. + __ Bgez(temp, slow_path->GetEntryLabel()); // If not permitted, go to slow path. + + // For primitive types, we do not need a read barrier when loading a reference only for loading + // constant field through the reference. For reference types, we deliberately avoid the read + // barrier, letting the slow path handle the false negatives. + __ Loadwu(temp, varhandle, var_type_offset.Int32Value()); + codegen->MaybeUnpoisonHeapReference(temp); + + // Check the varType.primitiveType field against the type we're trying to use. + __ Loadhu(temp2, temp, primitive_type_offset.Int32Value()); + if (primitive_type == Primitive::kPrimNot) { + static_assert(Primitive::kPrimNot == 0); + __ Bnez(temp2, slow_path->GetEntryLabel()); + } else { + __ Li(temp, enum_cast<int32_t>(primitive_type)); // `temp` can be clobbered. + __ Bne(temp2, temp, slow_path->GetEntryLabel()); + } + + srs.FreeXRegister(temp2); + + if (type == DataType::Type::kReference) { + // Check reference arguments against the varType. + // False negatives due to varType being an interface or array type + // or due to the missing read barrier are handled by the slow path. + size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke); + uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count; + uint32_t number_of_arguments = invoke->GetNumberOfArguments(); + for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) { + HInstruction* arg = invoke->InputAt(arg_index); + DCHECK_EQ(arg->GetType(), DataType::Type::kReference); + if (!arg->IsNullConstant()) { + XRegister arg_reg = locations->InAt(arg_index).AsRegister<XRegister>(); + GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, temp); + } + } + } +} + +static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke, + CodeGeneratorRISCV64* codegen, + SlowPathCodeRISCV64* slow_path) { + Riscv64Assembler* assembler = codegen->GetAssembler(); + XRegister varhandle = invoke->GetLocations()->InAt(0).AsRegister<XRegister>(); + + const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset(); + + ScratchRegisterScope srs(assembler); + XRegister temp = srs.AllocateXRegister(); + + // Check that the VarHandle references a static field by checking that coordinateType0 == null. + // Do not emit read barrier (or unpoison the reference) for comparing to null. + __ Loadwu(temp, varhandle, coordinate_type0_offset.Int32Value()); + __ Bnez(temp, slow_path->GetEntryLabel()); +} + +static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke, + CodeGeneratorRISCV64* codegen, + SlowPathCodeRISCV64* slow_path) { + VarHandleOptimizations optimizations(invoke); + Riscv64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + XRegister varhandle = locations->InAt(0).AsRegister<XRegister>(); + XRegister object = locations->InAt(1).AsRegister<XRegister>(); + + const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset(); + const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset(); + + // Null-check the object. + if (!optimizations.GetSkipObjectNullCheck()) { + __ Beqz(object, slow_path->GetEntryLabel()); + } + + if (!optimizations.GetUseKnownBootImageVarHandle()) { + ScratchRegisterScope srs(assembler); + XRegister temp = srs.AllocateXRegister(); + + // Check that the VarHandle references an instance field by checking that + // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the + // type compatibility check with the source object's type, which will fail for null. + __ Loadwu(temp, varhandle, coordinate_type1_offset.Int32Value()); + // No need for read barrier or unpoisoning of coordinateType1 for comparison with null. + __ Bnez(temp, slow_path->GetEntryLabel()); + + // Check that the object has the correct type. + // We deliberately avoid the read barrier, letting the slow path handle the false negatives. + __ Loadwu(temp, varhandle, coordinate_type0_offset.Int32Value()); + codegen->MaybeUnpoisonHeapReference(temp); + GenerateSubTypeObjectCheckNoReadBarrier( + codegen, slow_path, object, temp, /*object_can_be_null=*/ false); + } +} + +static void GenerateVarHandleArrayChecks(HInvoke* invoke, + CodeGeneratorRISCV64* codegen, + VarHandleSlowPathRISCV64* slow_path) { + VarHandleOptimizations optimizations(invoke); + Riscv64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + XRegister varhandle = locations->InAt(0).AsRegister<XRegister>(); + XRegister object = locations->InAt(1).AsRegister<XRegister>(); + XRegister index = locations->InAt(2).AsRegister<XRegister>(); + DataType::Type value_type = + GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u); + Primitive::Type primitive_type = DataTypeToPrimitive(value_type); + + const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset(); + const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset(); + const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset(); + const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset(); + const MemberOffset class_offset = mirror::Object::ClassOffset(); + const MemberOffset array_length_offset = mirror::Array::LengthOffset(); + + // Null-check the object. + if (!optimizations.GetSkipObjectNullCheck()) { + __ Beqz(object, slow_path->GetEntryLabel()); + } + + ScratchRegisterScope srs(assembler); + XRegister temp = srs.AllocateXRegister(); + XRegister temp2 = srs.AllocateXRegister(); + + // Check that the VarHandle references an array, byte array view or ByteBuffer by checking + // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and + // coordinateType0 shall not be null but we do not explicitly verify that. + __ Loadwu(temp, varhandle, coordinate_type1_offset.Int32Value()); + // No need for read barrier or unpoisoning of coordinateType1 for comparison with null. + __ Beqz(temp, slow_path->GetEntryLabel()); + + // Check object class against componentType0. + // + // This is an exact check and we defer other cases to the runtime. This includes + // conversion to array of superclass references, which is valid but subsequently + // requires all update operations to check that the value can indeed be stored. + // We do not want to perform such extra checks in the intrinsified code. + // + // We do this check without read barrier, so there can be false negatives which we + // defer to the slow path. There shall be no false negatives for array classes in the + // boot image (including Object[] and primitive arrays) because they are non-movable. + __ Loadwu(temp, varhandle, coordinate_type0_offset.Int32Value()); + __ Loadwu(temp2, object, class_offset.Int32Value()); + __ Bne(temp, temp2, slow_path->GetEntryLabel()); + + // Check that the coordinateType0 is an array type. We do not need a read barrier + // for loading constant reference fields (or chains of them) for comparison with null, + // nor for finally loading a constant primitive field (primitive type) below. + codegen->MaybeUnpoisonHeapReference(temp); + __ Loadwu(temp2, temp, component_type_offset.Int32Value()); + codegen->MaybeUnpoisonHeapReference(temp2); + __ Beqz(temp2, slow_path->GetEntryLabel()); + + // Check that the array component type matches the primitive type. + __ Loadhu(temp, temp2, primitive_type_offset.Int32Value()); + if (primitive_type == Primitive::kPrimNot) { + static_assert(Primitive::kPrimNot == 0); + __ Bnez(temp, slow_path->GetEntryLabel()); + } else { + // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`, + // we shall check for a byte array view in the slow path. + // The check requires the ByteArrayViewVarHandle.class to be in the boot image, + // so we cannot emit that if we're JITting without boot image. + bool boot_image_available = + codegen->GetCompilerOptions().IsBootImage() || + !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty(); + bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available; + Riscv64Label* slow_path_label = + can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel(); + __ Li(temp2, enum_cast<int32_t>(primitive_type)); + __ Bne(temp, temp2, slow_path_label); + } + + // Check for array index out of bounds. + __ Loadw(temp, object, array_length_offset.Int32Value()); + __ Bgeu(index, temp, slow_path->GetEntryLabel()); +} + +static void GenerateVarHandleCoordinateChecks(HInvoke* invoke, + CodeGeneratorRISCV64* codegen, + VarHandleSlowPathRISCV64* slow_path) { + size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke); + if (expected_coordinates_count == 0u) { + GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path); + } else if (expected_coordinates_count == 1u) { + GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path); + } else { + DCHECK_EQ(expected_coordinates_count, 2u); + GenerateVarHandleArrayChecks(invoke, codegen, slow_path); + } +} + +static VarHandleSlowPathRISCV64* GenerateVarHandleChecks(HInvoke* invoke, + CodeGeneratorRISCV64* codegen, + std::memory_order order, + DataType::Type type) { + size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke); + VarHandleOptimizations optimizations(invoke); + if (optimizations.GetUseKnownBootImageVarHandle()) { + DCHECK_NE(expected_coordinates_count, 2u); + if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) { + return nullptr; + } + } + + VarHandleSlowPathRISCV64* slow_path = + new (codegen->GetScopedAllocator()) VarHandleSlowPathRISCV64(invoke, order); + codegen->AddSlowPath(slow_path); + + if (!optimizations.GetUseKnownBootImageVarHandle()) { + GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type); + } + GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path); + + return slow_path; +} + +struct VarHandleTarget { + XRegister object; // The object holding the value to operate on. + XRegister offset; // The offset of the value to operate on. +}; + +static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) { + size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke); + LocationSummary* locations = invoke->GetLocations(); + + VarHandleTarget target; + // The temporary allocated for loading the offset. + target.offset = locations->GetTemp(0u).AsRegister<XRegister>(); + // The reference to the object that holds the value to operate on. + target.object = (expected_coordinates_count == 0u) + ? locations->GetTemp(1u).AsRegister<XRegister>() + : locations->InAt(1).AsRegister<XRegister>(); + return target; +} + +static void GenerateVarHandleTarget(HInvoke* invoke, + const VarHandleTarget& target, + CodeGeneratorRISCV64* codegen) { + Riscv64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + XRegister varhandle = locations->InAt(0).AsRegister<XRegister>(); + size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke); + + if (expected_coordinates_count <= 1u) { + if (VarHandleOptimizations(invoke).GetUseKnownBootImageVarHandle()) { + ScopedObjectAccess soa(Thread::Current()); + ArtField* target_field = GetBootImageVarHandleField(invoke); + if (expected_coordinates_count == 0u) { + ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass(); + if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) { + uint32_t boot_image_offset = CodeGenerator::GetBootImageOffset(declaring_class); + codegen->LoadBootImageRelRoEntry(target.object, boot_image_offset); + } else { + codegen->LoadTypeForBootImageIntrinsic( + target.object, + TypeReference(&declaring_class->GetDexFile(), declaring_class->GetDexTypeIndex())); + } + } + __ Li(target.offset, target_field->GetOffset().Uint32Value()); + } else { + // For static fields, we need to fill the `target.object` with the declaring class, + // so we can use `target.object` as temporary for the `ArtField*`. For instance fields, + // we do not need the declaring class, so we can forget the `ArtField*` when + // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`. + XRegister field = (expected_coordinates_count == 0) ? target.object : target.offset; + + const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset(); + const MemberOffset offset_offset = ArtField::OffsetOffset(); + + // Load the ArtField*, the offset and, if needed, declaring class. + __ Loadd(field, varhandle, art_field_offset.Int32Value()); + __ Loadwu(target.offset, field, offset_offset.Int32Value()); + if (expected_coordinates_count == 0u) { + codegen->GenerateGcRootFieldLoad( + invoke, + Location::RegisterLocation(target.object), + field, + ArtField::DeclaringClassOffset().Int32Value(), + codegen->GetCompilerReadBarrierOption()); + } + } + } else { + DCHECK_EQ(expected_coordinates_count, 2u); + DataType::Type value_type = + GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u); + MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type)); + + XRegister index = locations->InAt(2).AsRegister<XRegister>(); + __ Li(target.offset, data_offset.Int32Value()); + codegen->GetInstructionVisitor()->ShNAdd(target.offset, index, target.offset, value_type); + } +} + +static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke, + CodeGeneratorRISCV64* codegen) { + size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke); + DataType::Type return_type = invoke->GetType(); + + ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + // Require coordinates in registers. These are the object holding the value + // to operate on (except for static fields) and index (for arrays and views). + for (size_t i = 0; i != expected_coordinates_count; ++i) { + locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister()); + } + if (return_type != DataType::Type::kVoid) { + if (DataType::IsFloatingPointType(return_type)) { + locations->SetOut(Location::RequiresFpuRegister()); + } else { + locations->SetOut(Location::RequiresRegister()); + } + } + uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count; + uint32_t number_of_arguments = invoke->GetNumberOfArguments(); + for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) { + HInstruction* arg = invoke->InputAt(arg_index); + if (IsZeroBitPattern(arg)) { + locations->SetInAt(arg_index, Location::ConstantLocation(arg)); + } else if (DataType::IsFloatingPointType(arg->GetType())) { + locations->SetInAt(arg_index, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(arg_index, Location::RequiresRegister()); + } + } + + // Add a temporary for offset. + if (codegen->EmitNonBakerReadBarrier() && + GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields. + // To preserve the offset value across the non-Baker read barrier slow path + // for loading the declaring class, use a fixed callee-save register. + constexpr int first_callee_save = CTZ(kRiscv64CalleeSaveRefSpills); + locations->AddTemp(Location::RegisterLocation(first_callee_save)); + } else { + locations->AddTemp(Location::RequiresRegister()); + } + if (expected_coordinates_count == 0u) { + // Add a temporary to hold the declaring class. + locations->AddTemp(Location::RequiresRegister()); + } + + return locations; +} + +static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorRISCV64* codegen) { + VarHandleOptimizations optimizations(invoke); + if (optimizations.GetDoNotIntrinsify()) { + return; + } + + if (codegen->EmitNonBakerReadBarrier() && + invoke->GetType() == DataType::Type::kReference && + invoke->GetIntrinsic() != Intrinsics::kVarHandleGet && + invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) { + // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores + // the passed reference and reloads it from the field. This gets the memory visibility + // wrong for Acquire/Volatile operations. b/173104084 + return; + } + + CreateVarHandleCommonLocations(invoke, codegen); +} + +DataType::Type IntTypeForFloatingPointType(DataType::Type fp_type) { + DCHECK(DataType::IsFloatingPointType(fp_type)); + return (fp_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64; +} + +static void GenerateVarHandleGet(HInvoke* invoke, + CodeGeneratorRISCV64* codegen, + std::memory_order order, + bool byte_swap = false) { + DataType::Type type = invoke->GetType(); + DCHECK_NE(type, DataType::Type::kVoid); + + LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = codegen->GetAssembler(); + Location out = locations->Out(); + + VarHandleTarget target = GetVarHandleTarget(invoke); + VarHandleSlowPathRISCV64* slow_path = nullptr; + if (!byte_swap) { + slow_path = GenerateVarHandleChecks(invoke, codegen, order, type); + GenerateVarHandleTarget(invoke, target, codegen); + if (slow_path != nullptr) { + __ Bind(slow_path->GetNativeByteOrderLabel()); + } + } + + bool seq_cst_barrier = (order == std::memory_order_seq_cst); + bool acquire_barrier = seq_cst_barrier || (order == std::memory_order_acquire); + DCHECK(acquire_barrier || order == std::memory_order_relaxed); + + if (seq_cst_barrier) { + codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + + // Load the value from the target location. + if (type == DataType::Type::kReference && codegen->EmitBakerReadBarrier()) { + Location index = Location::RegisterLocation(target.offset); + // TODO(riscv64): Revisit when we add checking if the holder is black. + Location temp = Location::NoLocation(); + codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, + out, + target.object, + /*offset=*/ 0, + index, + temp, + /*needs_null_check=*/ false); + DCHECK(!byte_swap); + } else { + ScratchRegisterScope srs(assembler); + XRegister address = srs.AllocateXRegister(); + __ Add(address, target.object, target.offset); + Location load_loc = out; + DataType::Type load_type = type; + if (byte_swap && DataType::IsFloatingPointType(type)) { + load_loc = Location::RegisterLocation(target.offset); // Load to the offset temporary. + load_type = IntTypeForFloatingPointType(type); + } + codegen->GetInstructionVisitor()->Load(load_loc, address, /*offset=*/ 0, load_type); + if (type == DataType::Type::kReference) { + DCHECK(!byte_swap); + Location object_loc = Location::RegisterLocation(target.object); + Location offset_loc = Location::RegisterLocation(target.offset); + codegen->MaybeGenerateReadBarrierSlow( + invoke, out, out, object_loc, /*offset=*/ 0u, /*index=*/ offset_loc); + } else if (byte_swap) { + GenerateReverseBytes(codegen, out, load_loc.AsRegister<XRegister>(), type); + } + } + + if (acquire_barrier) { + codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + + if (slow_path != nullptr) { + DCHECK(!byte_swap); + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGet(HInvoke* invoke) { + CreateVarHandleGetLocations(invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGet(HInvoke* invoke) { + GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetOpaque(HInvoke* invoke) { + CreateVarHandleGetLocations(invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetOpaque(HInvoke* invoke) { + GenerateVarHandleGet(invoke, codegen_, std::memory_order_relaxed); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAcquire(HInvoke* invoke) { + CreateVarHandleGetLocations(invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAcquire(HInvoke* invoke) { + GenerateVarHandleGet(invoke, codegen_, std::memory_order_acquire); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetVolatile(HInvoke* invoke) { + CreateVarHandleGetLocations(invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetVolatile(HInvoke* invoke) { + GenerateVarHandleGet(invoke, codegen_, std::memory_order_seq_cst); +} + +static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorRISCV64* codegen) { + VarHandleOptimizations optimizations(invoke); + if (optimizations.GetDoNotIntrinsify()) { + return; + } + + CreateVarHandleCommonLocations(invoke, codegen); +} + +static void GenerateVarHandleSet(HInvoke* invoke, + CodeGeneratorRISCV64* codegen, + std::memory_order order, + bool byte_swap = false) { + uint32_t value_index = invoke->GetNumberOfArguments() - 1; + DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index); + + Riscv64Assembler* assembler = codegen->GetAssembler(); + Location value = invoke->GetLocations()->InAt(value_index); + + VarHandleTarget target = GetVarHandleTarget(invoke); + VarHandleSlowPathRISCV64* slow_path = nullptr; + if (!byte_swap) { + slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type); + GenerateVarHandleTarget(invoke, target, codegen); + if (slow_path != nullptr) { + __ Bind(slow_path->GetNativeByteOrderLabel()); + } + } + + { + ScratchRegisterScope srs(assembler); + XRegister address = srs.AllocateXRegister(); + __ Add(address, target.object, target.offset); + + if (byte_swap) { + DCHECK(!value.IsConstant()); // Zero uses the main path as it does not need a byte swap. + // The offset is no longer needed, so reuse the offset temporary for the byte-swapped value. + Location new_value = Location::RegisterLocation(target.offset); + if (DataType::IsFloatingPointType(value_type)) { + value_type = IntTypeForFloatingPointType(value_type); + codegen->MoveLocation(new_value, value, value_type); + value = new_value; + } + GenerateReverseBytes(codegen, new_value, value.AsRegister<XRegister>(), value_type); + value = new_value; + } + + GenerateSet(codegen, order, value, address, /*offset=*/ 0, value_type); + } + + if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) { + codegen->MarkGCCard(target.object, value.AsRegister<XRegister>(), /* emit_null_check= */ true); + } + + if (slow_path != nullptr) { + DCHECK(!byte_swap); + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSet(HInvoke* invoke) { + CreateVarHandleSetLocations(invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSet(HInvoke* invoke) { + GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSetOpaque(HInvoke* invoke) { + CreateVarHandleSetLocations(invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSetOpaque(HInvoke* invoke) { + GenerateVarHandleSet(invoke, codegen_, std::memory_order_relaxed); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSetRelease(HInvoke* invoke) { + CreateVarHandleSetLocations(invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSetRelease(HInvoke* invoke) { + GenerateVarHandleSet(invoke, codegen_, std::memory_order_release); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleSetVolatile(HInvoke* invoke) { + CreateVarHandleSetLocations(invoke, codegen_); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleSetVolatile(HInvoke* invoke) { + GenerateVarHandleSet(invoke, codegen_, std::memory_order_seq_cst); +} + +static bool ScratchXRegisterNeeded(Location loc, DataType::Type type, bool byte_swap) { + if (loc.IsConstant()) { + DCHECK(loc.GetConstant()->IsZeroBitPattern()); + return false; + } + return DataType::IsFloatingPointType(type) || DataType::Size(type) < 4u || byte_swap; +} + +static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, + CodeGeneratorRISCV64* codegen, + bool return_success) { + VarHandleOptimizations optimizations(invoke); + if (optimizations.GetDoNotIntrinsify()) { + return; + } + + uint32_t expected_index = invoke->GetNumberOfArguments() - 2; + uint32_t new_value_index = invoke->GetNumberOfArguments() - 1; + DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index); + DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index)); + + bool is_reference = (value_type == DataType::Type::kReference); + if (is_reference && codegen->EmitNonBakerReadBarrier()) { + // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores + // the passed reference and reloads it from the field. This breaks the read barriers + // in slow path in different ways. The marked old value may not actually be a to-space + // reference to the same object as `old_value`, breaking slow path assumptions. And + // for CompareAndExchange, marking the old value after comparison failure may actually + // return the reference to `expected`, erroneously indicating success even though we + // did not set the new value. (And it also gets the memory visibility wrong.) b/173104084 + return; + } + + LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen); + DCHECK_EQ(expected_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke)); + + if (codegen->EmitNonBakerReadBarrier()) { + // We need callee-save registers for both the class object and offset instead of + // the temporaries reserved in CreateVarHandleCommonLocations(). + static_assert(POPCOUNT(kRiscv64CalleeSaveRefSpills) >= 2u); + uint32_t first_callee_save = CTZ(kRiscv64CalleeSaveRefSpills); + uint32_t second_callee_save = CTZ(kRiscv64CalleeSaveRefSpills ^ (1u << first_callee_save)); + if (expected_index == 1u) { // For static fields. + DCHECK_EQ(locations->GetTempCount(), 2u); + DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister())); + DCHECK(locations->GetTemp(1u).Equals(Location::RegisterLocation(first_callee_save))); + locations->SetTempAt(0u, Location::RegisterLocation(second_callee_save)); + } else { + DCHECK_EQ(locations->GetTempCount(), 1u); + DCHECK(locations->GetTemp(0u).Equals(Location::RequiresRegister())); + locations->SetTempAt(0u, Location::RegisterLocation(first_callee_save)); + } + } + + size_t old_temp_count = locations->GetTempCount(); + DCHECK_EQ(old_temp_count, (expected_index == 1u) ? 2u : 1u); + Location expected = locations->InAt(expected_index); + Location new_value = locations->InAt(new_value_index); + size_t data_size = DataType::Size(value_type); + bool is_small = (data_size < 4u); + bool can_byte_swap = + (expected_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u); + bool is_fp = DataType::IsFloatingPointType(value_type); + size_t temps_needed = + // The offset temp is used for the `tmp_ptr`, except for the read barrier case. For read + // barrier we must preserve the offset and class pointer (if any) for the slow path and + // use a separate temp for `tmp_ptr` and we also need another temp for `old_value_temp`. + ((is_reference && codegen->EmitReadBarrier()) ? old_temp_count + 2u : 1u) + + // For small values, we need a temp for the `mask`, `masked` and maybe also for the `shift`. + (is_small ? (return_success ? 2u : 3u) : 0u) + + // Some cases need modified copies of `new_value` and `expected`. + (ScratchXRegisterNeeded(expected, value_type, can_byte_swap) ? 1u : 0u) + + (ScratchXRegisterNeeded(new_value, value_type, can_byte_swap) ? 1u : 0u) + + // We need a scratch register either for the old value or for the result of SC. + // If we need to return a floating point old value, we need a temp for each. + ((!return_success && is_fp) ? 2u : 1u); + size_t scratch_registers_available = 2u; + DCHECK_EQ(scratch_registers_available, + ScratchRegisterScope(codegen->GetAssembler()).AvailableXRegisters()); + if (temps_needed > old_temp_count + scratch_registers_available) { + locations->AddRegisterTemps(temps_needed - (old_temp_count + scratch_registers_available)); + } +} + +static XRegister PrepareXRegister(CodeGeneratorRISCV64* codegen, + Location loc, + DataType::Type type, + XRegister shift, + XRegister mask, + bool byte_swap, + ScratchRegisterScope* srs) { + DCHECK_IMPLIES(mask != kNoXRegister, shift != kNoXRegister); + DCHECK_EQ(shift == kNoXRegister, DataType::Size(type) >= 4u); + if (loc.IsConstant()) { + // The `shift`/`mask` and `byte_swap` are irrelevant for zero input. + DCHECK(loc.GetConstant()->IsZeroBitPattern()); + return Zero; + } + + Location result = loc; + if (DataType::IsFloatingPointType(type)) { + type = IntTypeForFloatingPointType(type); + result = Location::RegisterLocation(srs->AllocateXRegister()); + codegen->MoveLocation(result, loc, type); + loc = result; + } else if (byte_swap || shift != kNoXRegister) { + result = Location::RegisterLocation(srs->AllocateXRegister()); + } + if (byte_swap) { + if (type == DataType::Type::kInt16) { + type = DataType::Type::kUint16; // Do the masking as part of the byte swap. + } + GenerateReverseBytes(codegen, result, loc.AsRegister<XRegister>(), type); + loc = result; + } + if (shift != kNoXRegister) { + Riscv64Assembler* assembler = codegen->GetAssembler(); + __ Sllw(result.AsRegister<XRegister>(), loc.AsRegister<XRegister>(), shift); + DCHECK_NE(type, DataType::Type::kUint8); + if (mask != kNoXRegister && type != DataType::Type::kUint16 && type != DataType::Type::kBool) { + __ And(result.AsRegister<XRegister>(), result.AsRegister<XRegister>(), mask); + } + } + return result.AsRegister<XRegister>(); +} + +static void GenerateByteSwapAndExtract(CodeGeneratorRISCV64* codegen, + Location rd, + XRegister rs1, + XRegister shift, + DataType::Type type) { + // Apply shift before `GenerateReverseBytes()` for small types. + DCHECK_EQ(shift != kNoXRegister, DataType::Size(type) < 4u); + if (shift != kNoXRegister) { + Riscv64Assembler* assembler = codegen->GetAssembler(); + __ Srlw(rd.AsRegister<XRegister>(), rs1, shift); + rs1 = rd.AsRegister<XRegister>(); + } + // Also handles moving to FP registers. + GenerateReverseBytes(codegen, rd, rs1, type); +} + +static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, + CodeGeneratorRISCV64* codegen, + std::memory_order order, + bool return_success, + bool strong, + bool byte_swap = false) { + DCHECK(return_success || strong); + + uint32_t expected_index = invoke->GetNumberOfArguments() - 2; + uint32_t new_value_index = invoke->GetNumberOfArguments() - 1; + DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index); + DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_index)); + + Riscv64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + Location expected = locations->InAt(expected_index); + Location new_value = locations->InAt(new_value_index); + Location out = locations->Out(); + + VarHandleTarget target = GetVarHandleTarget(invoke); + VarHandleSlowPathRISCV64* slow_path = nullptr; + if (!byte_swap) { + slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type); + GenerateVarHandleTarget(invoke, target, codegen); + if (slow_path != nullptr) { + slow_path->SetCompareAndSetOrExchangeArgs(return_success, strong); + __ Bind(slow_path->GetNativeByteOrderLabel()); + } + } + + // This needs to be before we allocate the scratch registers, as MarkGCCard also uses them. + if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) { + // Mark card for object assuming new value is stored. + bool new_value_can_be_null = true; // TODO: Worth finding out this information? + codegen->MarkGCCard(target.object, new_value.AsRegister<XRegister>(), new_value_can_be_null); + } + + // Scratch registers may be needed for `new_value` and `expected`. + ScratchRegisterScope srs(assembler); + DCHECK_EQ(srs.AvailableXRegisters(), 2u); + size_t available_scratch_registers = + (ScratchXRegisterNeeded(expected, value_type, byte_swap) ? 0u : 1u) + + (ScratchXRegisterNeeded(new_value, value_type, byte_swap) ? 0u : 1u); + + // Reuse the `offset` temporary for the pointer to the target location, + // except for references that need the offset for the read barrier. + DCHECK_EQ(target.offset, locations->GetTemp(0u).AsRegister<XRegister>()); + size_t next_temp = 1u; + XRegister tmp_ptr = target.offset; + bool is_reference = (value_type == DataType::Type::kReference); + if (is_reference && codegen->EmitReadBarrier()) { + // Reserve scratch registers for `tmp_ptr` and `old_value_temp`. + DCHECK_EQ(available_scratch_registers, 2u); + available_scratch_registers = 0u; + DCHECK_EQ(expected_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke)); + next_temp = expected_index == 1u ? 2u : 1u; // Preserve the class register for static field. + tmp_ptr = srs.AllocateXRegister(); + } + __ Add(tmp_ptr, target.object, target.offset); + + auto get_temp = [&]() { + if (available_scratch_registers != 0u) { + available_scratch_registers -= 1u; + return srs.AllocateXRegister(); + } else { + XRegister temp = locations->GetTemp(next_temp).AsRegister<XRegister>(); + next_temp += 1u; + return temp; + } + }; + + XRegister shift = kNoXRegister; + XRegister mask = kNoXRegister; + XRegister masked = kNoXRegister; + size_t data_size = DataType::Size(value_type); + bool is_small = (data_size < 4u); + if (is_small) { + // When returning "success" and not the old value, we shall not need the `shift` after + // the raw CAS operation, so use the output register as a temporary here. + shift = return_success ? locations->Out().AsRegister<XRegister>() : get_temp(); + mask = get_temp(); + masked = get_temp(); + // Upper bits of the shift are not used, so we do not need to clear them. + __ Slli(shift, tmp_ptr, WhichPowerOf2(kBitsPerByte)); + __ Andi(tmp_ptr, tmp_ptr, -4); + __ Li(mask, (1 << (data_size * kBitsPerByte)) - 1); + __ Sllw(mask, mask, shift); + } + + // Move floating point values to scratch registers and apply shift, mask and byte swap if needed. + // Note that float/double CAS uses bitwise comparison, rather than the operator==. + XRegister expected_reg = + PrepareXRegister(codegen, expected, value_type, shift, mask, byte_swap, &srs); + XRegister new_value_reg = + PrepareXRegister(codegen, new_value, value_type, shift, mask, byte_swap, &srs); + bool is_fp = DataType::IsFloatingPointType(value_type); + DataType::Type cas_type = is_fp + ? IntTypeForFloatingPointType(value_type) + : (is_small ? DataType::Type::kInt32 : value_type); + + // Prepare registers for old value and the result of the store conditional. + XRegister old_value; + XRegister store_result; + if (return_success) { + // Use a temp for the old value. + old_value = get_temp(); + // For strong CAS, use the `old_value` temp also for the SC result. + // For weak CAS, put the SC result directly to `out`. + store_result = strong ? old_value : out.AsRegister<XRegister>(); + } else if (is_fp) { + // We need two temporary registers. + old_value = get_temp(); + store_result = get_temp(); + } else { + // Use the output register for the old value and a temp for the store conditional result. + old_value = out.AsRegister<XRegister>(); + store_result = get_temp(); + } + + Riscv64Label exit_loop_label; + Riscv64Label* exit_loop = &exit_loop_label; + Riscv64Label* cmp_failure = &exit_loop_label; + + ReadBarrierCasSlowPathRISCV64* rb_slow_path = nullptr; + if (is_reference && codegen->EmitReadBarrier()) { + // The `old_value_temp` is used first for marking the `old_value` and then for the unmarked + // reloaded old value for subsequent CAS in the slow path. We make this a scratch register + // as we do have marking entrypoints on riscv64 even for scratch registers. + XRegister old_value_temp = srs.AllocateXRegister(); + // For strong CAS, use the `old_value_temp` also for the SC result as the reloaded old value + // is no longer needed after the comparison. For weak CAS, store the SC result in the same + // result register as the main path. + // Note that for a strong CAS, a SC failure in the slow path can set the register to 1, so + // we cannot use that register to indicate success without resetting it to 0 at the start of + // the retry loop. Instead, we return to the success indicating instruction in the main path. + XRegister slow_path_store_result = strong ? old_value_temp : store_result; + rb_slow_path = new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathRISCV64( + invoke, + order, + strong, + target.object, + target.offset, + expected_reg, + new_value_reg, + old_value, + old_value_temp, + slow_path_store_result, + /*update_old_value=*/ !return_success, + codegen); + codegen->AddSlowPath(rb_slow_path); + exit_loop = rb_slow_path->GetExitLabel(); + cmp_failure = rb_slow_path->GetEntryLabel(); + } + + if (return_success) { + // Pre-populate the output register with failure for the case when the old value + // differs and we do not execute the store conditional. + __ Li(out.AsRegister<XRegister>(), 0); + } + GenerateCompareAndSet(codegen->GetAssembler(), + cas_type, + order, + strong, + cmp_failure, + tmp_ptr, + new_value_reg, + old_value, + mask, + masked, + store_result, + expected_reg); + if (return_success && strong) { + if (rb_slow_path != nullptr) { + // Slow path returns here on success. + __ Bind(rb_slow_path->GetSuccessExitLabel()); + } + // Load success value to the output register. + // `GenerateCompareAndSet()` does not emit code to indicate success for a strong CAS. + __ Li(out.AsRegister<XRegister>(), 1); + } else if (rb_slow_path != nullptr) { + DCHECK(!rb_slow_path->GetSuccessExitLabel()->IsLinked()); + } + __ Bind(exit_loop); + + if (return_success) { + // Nothing to do, the result register already contains 1 on success and 0 on failure. + } else if (byte_swap) { + DCHECK_IMPLIES(is_small, out.AsRegister<XRegister>() == old_value) + << " " << value_type << " " << out.AsRegister<XRegister>() << "!=" << old_value; + GenerateByteSwapAndExtract(codegen, out, old_value, shift, value_type); + } else if (is_fp) { + codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type); + } else if (is_small) { + __ Srlw(old_value, masked, shift); + if (value_type == DataType::Type::kInt8) { + __ SextB(old_value, old_value); + } else if (value_type == DataType::Type::kInt16) { + __ SextH(old_value, old_value); + } + } + + if (slow_path != nullptr) { + DCHECK(!byte_swap); + __ Bind(slow_path->GetExitLabel()); + } + + // Check that we have allocated the right number of temps. We may need more registers + // for byte swapped CAS in the slow path, so skip this check for the main path in that case. + bool has_byte_swap = (expected_index == 3u) && (!is_reference && data_size != 1u); + if ((!has_byte_swap || byte_swap) && next_temp != locations->GetTempCount()) { + // We allocate a temporary register for the class object for a static field `VarHandle` but + // we do not update the `next_temp` if it's otherwise unused after the address calculation. + CHECK_EQ(expected_index, 1u); + CHECK_EQ(next_temp, 1u); + CHECK_EQ(locations->GetTempCount(), 2u); + } +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchange(HInvoke* invoke) { + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchange(HInvoke* invoke) { + GenerateVarHandleCompareAndSetOrExchange( + invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ false, /*strong=*/ true); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) { + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) { + GenerateVarHandleCompareAndSetOrExchange( + invoke, codegen_, std::memory_order_acquire, /*return_success=*/ false, /*strong=*/ true); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) { + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ false); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) { + GenerateVarHandleCompareAndSetOrExchange( + invoke, codegen_, std::memory_order_release, /*return_success=*/ false, /*strong=*/ true); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleCompareAndSet(HInvoke* invoke) { + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleCompareAndSet(HInvoke* invoke) { + GenerateVarHandleCompareAndSetOrExchange( + invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ true); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) { + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) { + GenerateVarHandleCompareAndSetOrExchange( + invoke, codegen_, std::memory_order_seq_cst, /*return_success=*/ true, /*strong=*/ false); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) { + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) { + GenerateVarHandleCompareAndSetOrExchange( + invoke, codegen_, std::memory_order_acquire, /*return_success=*/ true, /*strong=*/ false); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) { + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) { + GenerateVarHandleCompareAndSetOrExchange( + invoke, codegen_, std::memory_order_relaxed, /*return_success=*/ true, /*strong=*/ false); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) { + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_, /*return_success=*/ true); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) { + GenerateVarHandleCompareAndSetOrExchange( + invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false); +} + +static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, + CodeGeneratorRISCV64* codegen, + GetAndUpdateOp get_and_update_op) { + VarHandleOptimizations optimizations(invoke); + if (optimizations.GetDoNotIntrinsify()) { + return; + } + + if (invoke->GetType() == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) { + // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores + // the passed reference and reloads it from the field, thus seeing the new value + // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084 + return; + } + + LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen); + uint32_t arg_index = invoke->GetNumberOfArguments() - 1; + DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke)); + DataType::Type value_type = invoke->GetType(); + DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, arg_index)); + Location arg = locations->InAt(arg_index); + + bool is_fp = DataType::IsFloatingPointType(value_type); + if (is_fp) { + if (get_and_update_op == GetAndUpdateOp::kAdd) { + // For ADD, do not use ZR for zero bit pattern (+0.0f or +0.0). + locations->SetInAt(invoke->GetNumberOfArguments() - 1u, Location::RequiresFpuRegister()); + } else { + DCHECK(get_and_update_op == GetAndUpdateOp::kSet); + } + } + + size_t data_size = DataType::Size(value_type); + bool can_byte_swap = + (arg_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u); + bool can_use_cas = (get_and_update_op == GetAndUpdateOp::kAdd) && (can_byte_swap || is_fp); + bool is_small = (data_size < 4u); + bool is_small_and = is_small && (get_and_update_op == GetAndUpdateOp::kAnd); + bool is_bitwise = + (get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd); + + size_t temps_needed = + // The offset temp is used for the `tmp_ptr`. + 1u + + // For small values, we need temps for `shift` and maybe also `mask` and `temp`. + (is_small ? (is_bitwise ? 1u : 3u) : 0u) + + // Some cases need modified copies of `arg`. + (is_small_and || ScratchXRegisterNeeded(arg, value_type, can_byte_swap) ? 1u : 0u) + + // For FP types, we need a temp for `old_value` which cannot be loaded directly to `out`. + (is_fp ? 1u : 0u); + if (can_use_cas) { + size_t cas_temps_needed = + // The offset temp is used for the `tmp_ptr`. + 1u + + // For small values, we need a temp for `shift`. + (is_small ? 1u : 0u) + + // And we always need temps for `old_value`, `new_value` and `reloaded_old_value`. + 3u; + DCHECK_GE(cas_temps_needed, temps_needed); + temps_needed = cas_temps_needed; + } + + size_t scratch_registers_available = 2u; + DCHECK_EQ(scratch_registers_available, + ScratchRegisterScope(codegen->GetAssembler()).AvailableXRegisters()); + size_t old_temp_count = locations->GetTempCount(); + DCHECK_EQ(old_temp_count, (arg_index == 1u) ? 2u : 1u); + if (temps_needed > old_temp_count + scratch_registers_available) { + locations->AddRegisterTemps(temps_needed - (old_temp_count + scratch_registers_available)); + } +} + +static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, + CodeGeneratorRISCV64* codegen, + GetAndUpdateOp get_and_update_op, + std::memory_order order, + bool byte_swap = false) { + uint32_t arg_index = invoke->GetNumberOfArguments() - 1; + DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke)); + DataType::Type value_type = invoke->GetType(); + DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, arg_index)); + + Riscv64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + Location arg = locations->InAt(arg_index); + DCHECK_IMPLIES(arg.IsConstant(), arg.GetConstant()->IsZeroBitPattern()); + Location out = locations->Out(); + + VarHandleTarget target = GetVarHandleTarget(invoke); + VarHandleSlowPathRISCV64* slow_path = nullptr; + if (!byte_swap) { + slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type); + GenerateVarHandleTarget(invoke, target, codegen); + if (slow_path != nullptr) { + slow_path->SetGetAndUpdateOp(get_and_update_op); + __ Bind(slow_path->GetNativeByteOrderLabel()); + } + } + + // This needs to be before the temp registers, as MarkGCCard also uses scratch registers. + if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) { + DCHECK(get_and_update_op == GetAndUpdateOp::kSet); + // Mark card for object, the new value shall be stored. + bool new_value_can_be_null = true; // TODO: Worth finding out this information? + codegen->MarkGCCard(target.object, arg.AsRegister<XRegister>(), new_value_can_be_null); + } + + size_t data_size = DataType::Size(value_type); + bool is_fp = DataType::IsFloatingPointType(value_type); + bool use_cas = (get_and_update_op == GetAndUpdateOp::kAdd) && (byte_swap || is_fp); + bool is_small = (data_size < 4u); + bool is_small_and = is_small && (get_and_update_op == GetAndUpdateOp::kAnd); + bool is_reference = (value_type == DataType::Type::kReference); + DataType::Type op_type = is_fp + ? IntTypeForFloatingPointType(value_type) + : (is_small || is_reference ? DataType::Type::kInt32 : value_type); + + ScratchRegisterScope srs(assembler); + DCHECK_EQ(srs.AvailableXRegisters(), 2u); + size_t available_scratch_registers = use_cas + // We use scratch registers differently for the CAS path. + ? 0u + // Reserve one scratch register for `PrepareXRegister()` or similar `arg_reg` allocation. + : (is_small_and || ScratchXRegisterNeeded(arg, value_type, byte_swap) ? 1u : 2u); + + // Reuse the `target.offset` temporary for the pointer to the target location, + // except for references that need the offset for the non-Baker read barrier. + DCHECK_EQ(target.offset, locations->GetTemp(0u).AsRegister<XRegister>()); + size_t next_temp = 1u; + XRegister tmp_ptr = target.offset; + if (is_reference && codegen->EmitNonBakerReadBarrier()) { + DCHECK_EQ(available_scratch_registers, 2u); + available_scratch_registers -= 1u; + tmp_ptr = srs.AllocateXRegister(); + } + __ Add(tmp_ptr, target.object, target.offset); + + auto get_temp = [&]() { + if (available_scratch_registers != 0u) { + available_scratch_registers -= 1u; + return srs.AllocateXRegister(); + } else { + XRegister temp = locations->GetTemp(next_temp).AsRegister<XRegister>(); + next_temp += 1u; + return temp; + } + }; + + XRegister shift = kNoXRegister; + XRegister mask = kNoXRegister; + XRegister prepare_mask = kNoXRegister; + XRegister temp = kNoXRegister; + XRegister arg_reg = kNoXRegister; + if (is_small) { + shift = get_temp(); + // Upper bits of the shift are not used, so we do not need to clear them. + __ Slli(shift, tmp_ptr, WhichPowerOf2(kBitsPerByte)); + __ Andi(tmp_ptr, tmp_ptr, -4); + switch (get_and_update_op) { + case GetAndUpdateOp::kAdd: + if (byte_swap) { + // The mask is not needed in the CAS path. + DCHECK(use_cas); + break; + } + FALLTHROUGH_INTENDED; + case GetAndUpdateOp::kSet: + mask = get_temp(); + temp = get_temp(); + __ Li(mask, (1 << (data_size * kBitsPerByte)) - 1); + __ Sllw(mask, mask, shift); + // The argument does not need to be masked for `GetAndUpdateOp::kAdd`, + // the mask shall be applied after the ADD instruction. + prepare_mask = (get_and_update_op == GetAndUpdateOp::kSet) ? mask : kNoXRegister; + break; + case GetAndUpdateOp::kAnd: + // We need to set all other bits, so we always need a temp. + arg_reg = srs.AllocateXRegister(); + if (data_size == 1u) { + __ Ori(arg_reg, InputXRegisterOrZero(arg), ~0xff); + DCHECK(!byte_swap); + } else { + DCHECK_EQ(data_size, 2u); + __ Li(arg_reg, ~0xffff); + __ Or(arg_reg, InputXRegisterOrZero(arg), arg_reg); + if (byte_swap) { + __ Rev8(arg_reg, arg_reg); + __ Rori(arg_reg, arg_reg, 48); + } + } + __ Rolw(arg_reg, arg_reg, shift); + break; + case GetAndUpdateOp::kOr: + case GetAndUpdateOp::kXor: + // Signed values need to be truncated but we're keeping `prepare_mask == kNoXRegister`. + if (value_type == DataType::Type::kInt8 && !arg.IsConstant()) { + DCHECK(!byte_swap); + arg_reg = srs.AllocateXRegister(); + __ ZextB(arg_reg, arg.AsRegister<XRegister>()); + __ Sllw(arg_reg, arg_reg, shift); + } else if (value_type == DataType::Type::kInt16 && !arg.IsConstant() && !byte_swap) { + arg_reg = srs.AllocateXRegister(); + __ ZextH(arg_reg, arg.AsRegister<XRegister>()); + __ Sllw(arg_reg, arg_reg, shift); + } // else handled by `PrepareXRegister()` below. + break; + } + } + if (arg_reg == kNoXRegister && !use_cas) { + arg_reg = PrepareXRegister(codegen, arg, value_type, shift, prepare_mask, byte_swap, &srs); + } + if (mask != kNoXRegister && get_and_update_op == GetAndUpdateOp::kSet) { + __ Not(mask, mask); // We need to flip the mask for `kSet`, see `GenerateGetAndUpdate()`. + } + + if (use_cas) { + // Allocate scratch registers for temps that can theoretically be clobbered on retry. + // (Even though the `retry` label shall never be far enough for `TMP` to be clobbered.) + DCHECK_EQ(available_scratch_registers, 0u); // Reserved for the two uses below. + XRegister old_value = srs.AllocateXRegister(); + XRegister new_value = srs.AllocateXRegister(); + // Allocate other needed temporaries. + XRegister reloaded_old_value = get_temp(); + XRegister store_result = reloaded_old_value; // Clobber reloaded old value by store result. + FRegister ftmp = is_fp ? srs.AllocateFRegister() : kNoFRegister; + + Riscv64Label retry; + __ Bind(&retry); + codegen->GetInstructionVisitor()->Load( + Location::RegisterLocation(old_value), tmp_ptr, /*offset=*/ 0, op_type); + if (byte_swap) { + GenerateByteSwapAndExtract(codegen, out, old_value, shift, value_type); + } else { + DCHECK(is_fp); + codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type); + } + if (is_fp) { + codegen->GetInstructionVisitor()->FAdd( + ftmp, out.AsFpuRegister<FRegister>(), arg.AsFpuRegister<FRegister>(), value_type); + codegen->MoveLocation( + Location::RegisterLocation(new_value), Location::FpuRegisterLocation(ftmp), op_type); + } else if (value_type == DataType::Type::kInt64) { + __ Add(new_value, out.AsRegister<XRegister>(), arg.AsRegister<XRegister>()); + } else { + DCHECK_EQ(op_type, DataType::Type::kInt32); + __ Addw(new_value, out.AsRegister<XRegister>(), arg.AsRegister<XRegister>()); + } + if (byte_swap) { + DataType::Type swap_type = op_type; + if (is_small) { + DCHECK_EQ(data_size, 2u); + // We want to update only 16 bits of the 32-bit location. The 16 bits we want to replace + // are present in both `old_value` and `out` but in different bits and byte order. + // To update the 16 bits, we can XOR the new value with the `out`, byte swap as Uint16 + // (extracting only the bits we want to update), shift and XOR with the old value. + swap_type = DataType::Type::kUint16; + __ Xor(new_value, new_value, out.AsRegister<XRegister>()); + } + GenerateReverseBytes(codegen, Location::RegisterLocation(new_value), new_value, swap_type); + if (is_small) { + __ Sllw(new_value, new_value, shift); + __ Xor(new_value, new_value, old_value); + } + } + GenerateCompareAndSet(assembler, + op_type, + order, + /*strong=*/ true, + /*cmp_failure=*/ &retry, + tmp_ptr, + new_value, + /*old_value=*/ reloaded_old_value, + /*mask=*/ kNoXRegister, + /*masked=*/ kNoXRegister, + store_result, + /*expected=*/ old_value); + } else { + XRegister old_value = is_fp ? get_temp() : out.AsRegister<XRegister>(); + GenerateGetAndUpdate( + codegen, get_and_update_op, op_type, order, tmp_ptr, arg_reg, old_value, mask, temp); + if (byte_swap) { + DCHECK_IMPLIES(is_small, out.AsRegister<XRegister>() == old_value) + << " " << value_type << " " << out.AsRegister<XRegister>() << "!=" << old_value; + GenerateByteSwapAndExtract(codegen, out, old_value, shift, value_type); + } else if (is_fp) { + codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type); + } else if (is_small) { + __ Srlw(old_value, old_value, shift); + DCHECK_NE(value_type, DataType::Type::kUint8); + if (value_type == DataType::Type::kInt8) { + __ SextB(old_value, old_value); + } else if (value_type == DataType::Type::kBool) { + __ ZextB(old_value, old_value); + } else if (value_type == DataType::Type::kInt16) { + __ SextH(old_value, old_value); + } else { + DCHECK_EQ(value_type, DataType::Type::kUint16); + __ ZextH(old_value, old_value); + } + } else if (is_reference) { + __ ZextW(old_value, old_value); + if (codegen->EmitBakerReadBarrier()) { + // Use RA as temp. It is clobbered in the slow path anyway. + static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA); + SlowPathCodeRISCV64* rb_slow_path = + codegen->AddGcRootBakerBarrierBarrierSlowPath(invoke, out, kBakerReadBarrierTemp); + codegen->EmitBakerReadBarierMarkingCheck(rb_slow_path, out, kBakerReadBarrierTemp); + } else if (codegen->EmitNonBakerReadBarrier()) { + Location base_loc = Location::RegisterLocation(target.object); + Location index = Location::RegisterLocation(target.offset); + SlowPathCodeRISCV64* rb_slow_path = codegen->AddReadBarrierSlowPath( + invoke, out, out, base_loc, /*offset=*/ 0u, index); + __ J(rb_slow_path->GetEntryLabel()); + __ Bind(rb_slow_path->GetExitLabel()); + } + } + } + + if (slow_path != nullptr) { + DCHECK(!byte_swap); + __ Bind(slow_path->GetExitLabel()); + } + + // Check that we have allocated the right number of temps. We may need more registers + // for byte swapped CAS in the slow path, so skip this check for the main path in that case. + bool has_byte_swap = (arg_index == 3u) && (!is_reference && data_size != 1u); + if ((!has_byte_swap || byte_swap) && next_temp != locations->GetTempCount()) { + // We allocate a temporary register for the class object for a static field `VarHandle` but + // we do not update the `next_temp` if it's otherwise unused after the address calculation. + CHECK_EQ(arg_index, 1u); + CHECK_EQ(next_temp, 1u); + CHECK_EQ(locations->GetTempCount(), 2u); + } +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSet(HInvoke* invoke) { + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSet(HInvoke* invoke) { + GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) { + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) { + GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) { + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) { + GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAdd(HInvoke* invoke) { + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAdd(HInvoke* invoke) { + GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) { + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) { + GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) { + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) { + GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) { + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) { + GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) { + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) { + GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) { + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) { + GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) { + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) { + GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) { + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) { + GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) { + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) { + GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) { + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) { + GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) { + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) { + GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire); +} + +void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) { + CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor); +} + +void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) { + GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release); +} + +void VarHandleSlowPathRISCV64::EmitByteArrayViewCode(CodeGenerator* codegen_in) { + DCHECK(GetByteArrayViewCheckLabel()->IsLinked()); + CodeGeneratorRISCV64* codegen = down_cast<CodeGeneratorRISCV64*>(codegen_in); + Riscv64Assembler* assembler = codegen->GetAssembler(); + HInvoke* invoke = GetInvoke(); + mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate(); + DataType::Type value_type = + GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u); + DCHECK_NE(value_type, DataType::Type::kReference); + size_t size = DataType::Size(value_type); + DCHECK_GT(size, 1u); + LocationSummary* locations = invoke->GetLocations(); + XRegister varhandle = locations->InAt(0).AsRegister<XRegister>(); + XRegister object = locations->InAt(1).AsRegister<XRegister>(); + XRegister index = locations->InAt(2).AsRegister<XRegister>(); + + MemberOffset class_offset = mirror::Object::ClassOffset(); + MemberOffset array_length_offset = mirror::Array::LengthOffset(); + MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte); + MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset(); + + __ Bind(GetByteArrayViewCheckLabel()); + + VarHandleTarget target = GetVarHandleTarget(invoke); + { + ScratchRegisterScope srs(assembler); + XRegister temp = srs.AllocateXRegister(); + XRegister temp2 = srs.AllocateXRegister(); + + // The main path checked that the coordinateType0 is an array class that matches + // the class of the actual coordinate argument but it does not match the value type. + // Check if the `varhandle` references a ByteArrayViewVarHandle instance. + __ Loadwu(temp, varhandle, class_offset.Int32Value()); + codegen->MaybeUnpoisonHeapReference(temp); + codegen->LoadClassRootForIntrinsic(temp2, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle); + __ Bne(temp, temp2, GetEntryLabel()); + + // Check for array index out of bounds. + __ Loadw(temp, object, array_length_offset.Int32Value()); + __ Bgeu(index, temp, GetEntryLabel()); + __ Addi(temp2, index, size - 1u); + __ Bgeu(temp2, temp, GetEntryLabel()); + + // Construct the target. + __ Addi(target.offset, index, data_offset.Int32Value()); + + // Alignment check. For unaligned access, go to the runtime. + DCHECK(IsPowerOfTwo(size)); + __ Andi(temp, target.offset, size - 1u); + __ Bnez(temp, GetEntryLabel()); + + // Byte order check. For native byte order return to the main path. + if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet && + IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) { + // There is no reason to differentiate between native byte order and byte-swap + // for setting a zero bit pattern. Just return to the main path. + __ J(GetNativeByteOrderLabel()); + return; + } + __ Loadbu(temp, varhandle, native_byte_order_offset.Int32Value()); + __ Bnez(temp, GetNativeByteOrderLabel()); + } + + switch (access_mode_template) { + case mirror::VarHandle::AccessModeTemplate::kGet: + GenerateVarHandleGet(invoke, codegen, order_, /*byte_swap=*/ true); + break; + case mirror::VarHandle::AccessModeTemplate::kSet: + GenerateVarHandleSet(invoke, codegen, order_, /*byte_swap=*/ true); + break; + case mirror::VarHandle::AccessModeTemplate::kCompareAndSet: + case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange: + GenerateVarHandleCompareAndSetOrExchange( + invoke, codegen, order_, return_success_, strong_, /*byte_swap=*/ true); + break; + case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: + GenerateVarHandleGetAndUpdate( + invoke, codegen, get_and_update_op_, order_, /*byte_swap=*/ true); + break; + } + __ J(GetExitLabel()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitThreadCurrentThread(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorRISCV64::VisitThreadCurrentThread(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + XRegister out = invoke->GetLocations()->Out().AsRegister<XRegister>(); + __ Loadwu(out, TR, Thread::PeerOffset<kRiscv64PointerSize>().Int32Value()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitReachabilityFence(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::Any()); +} + +void IntrinsicCodeGeneratorRISCV64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {} + +void IntrinsicLocationsBuilderRISCV64::VisitMathFmaDouble(HInvoke* invoke) { + CreateFpFpFpToFpNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathFmaDouble(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = GetAssembler(); + FRegister n = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister m = locations->InAt(1).AsFpuRegister<FRegister>(); + FRegister a = locations->InAt(2).AsFpuRegister<FRegister>(); + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + + __ FMAddD(out, n, m, a); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathFmaFloat(HInvoke* invoke) { + CreateFpFpFpToFpNoOverlapLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathFmaFloat(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = GetAssembler(); + FRegister n = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister m = locations->InAt(1).AsFpuRegister<FRegister>(); + FRegister a = locations->InAt(2).AsFpuRegister<FRegister>(); + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + + __ FMAddS(out, n, m, a); +} + + +void IntrinsicLocationsBuilderRISCV64::VisitMathCos(HInvoke* invoke) { + CreateFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathCos(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickCos, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathSin(HInvoke* invoke) { + CreateFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathSin(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickSin, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathAcos(HInvoke* invoke) { + CreateFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathAcos(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickAcos, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathAsin(HInvoke* invoke) { + CreateFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathAsin(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickAsin, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathAtan(HInvoke* invoke) { + CreateFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathAtan(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickAtan, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathAtan2(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathAtan2(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickAtan2, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathPow(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickPow, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathCbrt(HInvoke* invoke) { + CreateFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathCbrt(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickCbrt, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathCosh(HInvoke* invoke) { + CreateFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathCosh(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickCosh, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathExp(HInvoke* invoke) { + CreateFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathExp(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickExp, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathExpm1(HInvoke* invoke) { + CreateFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathExpm1(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickExpm1, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathHypot(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathHypot(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickHypot, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathLog(HInvoke* invoke) { + CreateFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathLog(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickLog, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathLog10(HInvoke* invoke) { + CreateFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathLog10(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickLog10, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathNextAfter(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathNextAfter(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickNextAfter, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathSinh(HInvoke* invoke) { + CreateFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathSinh(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickSinh, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathTan(HInvoke* invoke) { + CreateFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathTan(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickTan, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathTanh(HInvoke* invoke) { + CreateFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathTanh(HInvoke* invoke) { + codegen_->InvokeRuntime(kQuickTanh, invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathSqrt(HInvoke* invoke) { + CreateFPToFPLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathSqrt(HInvoke* invoke) { + DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64); + DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64); + + LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = GetAssembler(); + FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + + __ FSqrtD(out, in); +} + +static void GenDoubleRound(Riscv64Assembler* assembler, HInvoke* invoke, FPRoundingMode mode) { + LocationSummary* locations = invoke->GetLocations(); + FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + ScratchRegisterScope srs(assembler); + XRegister tmp = srs.AllocateXRegister(); + FRegister ftmp = srs.AllocateFRegister(); + Riscv64Label done; + + // Load 2^52 + __ LoadConst64(tmp, 0x4330000000000000L); + __ FMvDX(ftmp, tmp); + __ FAbsD(out, in); + __ FLtD(tmp, out, ftmp); + + // Set output as the input if input greater than the max + __ FMvD(out, in); + __ Beqz(tmp, &done); + + // Convert with rounding mode + __ FCvtLD(tmp, in, mode); + __ FCvtDL(ftmp, tmp, mode); + + // Set the signed bit + __ FSgnjD(out, ftmp, in); + __ Bind(&done); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathFloor(HInvoke* invoke) { + CreateFPToFPLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathFloor(HInvoke* invoke) { + GenDoubleRound(GetAssembler(), invoke, FPRoundingMode::kRDN); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathCeil(HInvoke* invoke) { + CreateFPToFPLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathCeil(HInvoke* invoke) { + GenDoubleRound(GetAssembler(), invoke, FPRoundingMode::kRUP); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathRint(HInvoke* invoke) { + CreateFPToFPLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathRint(HInvoke* invoke) { + GenDoubleRound(GetAssembler(), invoke, FPRoundingMode::kRNE); +} + +void GenMathRound(CodeGeneratorRISCV64* codegen, HInvoke* invoke, DataType::Type type) { + Riscv64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); + XRegister out = locations->Out().AsRegister<XRegister>(); + ScratchRegisterScope srs(assembler); + FRegister ftmp = srs.AllocateFRegister(); + Riscv64Label done; + + // Check NaN + codegen->GetInstructionVisitor()->FClass(out, in, type); + __ Slti(out, out, kFClassNaNMinValue); + __ Beqz(out, &done); + + if (type == DataType::Type::kFloat64) { + // Add 0.5 (0x3fe0000000000000), rounding down (towards negative infinity). + __ LoadConst64(out, 0x3fe0000000000000L); + __ FMvDX(ftmp, out); + __ FAddD(ftmp, ftmp, in, FPRoundingMode::kRDN); + + // Convert to managed `long`, rounding down (towards negative infinity). + __ FCvtLD(out, ftmp, FPRoundingMode::kRDN); + } else { + // Add 0.5 (0x3f000000), rounding down (towards negative infinity). + __ LoadConst32(out, 0x3f000000); + __ FMvWX(ftmp, out); + __ FAddS(ftmp, ftmp, in, FPRoundingMode::kRDN); + + // Convert to managed `int`, rounding down (towards negative infinity). + __ FCvtWS(out, ftmp, FPRoundingMode::kRDN); + } + + __ Bind(&done); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathRoundDouble(HInvoke* invoke) { + CreateFPToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathRoundDouble(HInvoke* invoke) { + GenMathRound(codegen_, invoke, DataType::Type::kFloat64); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathRoundFloat(HInvoke* invoke) { + CreateFPToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathRoundFloat(HInvoke* invoke) { + GenMathRound(codegen_, invoke, DataType::Type::kFloat32); +} + +void IntrinsicLocationsBuilderRISCV64::VisitMathMultiplyHigh(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorRISCV64::VisitMathMultiplyHigh(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = GetAssembler(); + DCHECK(invoke->GetType() == DataType::Type::kInt64); + + XRegister x = locations->InAt(0).AsRegister<XRegister>(); + XRegister y = locations->InAt(1).AsRegister<XRegister>(); + XRegister out = locations->Out().AsRegister<XRegister>(); + + // Get high 64 of the multiply + __ Mulh(out, x, y); +} + +#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(RISCV64, Name) +UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(MARK_UNIMPLEMENTED); +#undef MARK_UNIMPLEMENTED + +UNREACHABLE_INTRINSICS(RISCV64) + +} // namespace riscv64 +} // namespace art diff --git a/compiler/optimizing/intrinsics_riscv64.h b/compiler/optimizing/intrinsics_riscv64.h new file mode 100644 index 0000000000..8160c054ee --- /dev/null +++ b/compiler/optimizing/intrinsics_riscv64.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_RISCV64_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_RISCV64_H_ + +#include "base/macros.h" +#include "intrinsics.h" +#include "intrinsics_list.h" + +namespace art HIDDEN { + +class ArenaAllocator; +class HInvokeStaticOrDirect; +class HInvokeVirtual; + +namespace riscv64 { + +class CodeGeneratorRISCV64; +class Riscv64Assembler; + +class IntrinsicLocationsBuilderRISCV64 final : public IntrinsicVisitor { + public: + explicit IntrinsicLocationsBuilderRISCV64(ArenaAllocator* allocator, + CodeGeneratorRISCV64* codegen) + : allocator_(allocator), codegen_(codegen) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, ...) \ + void Visit##Name(HInvoke* invoke) override; + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef OPTIMIZING_INTRINSICS + + // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether + // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to + // the invoke. + bool TryDispatch(HInvoke* invoke); + + private: + ArenaAllocator* const allocator_; + CodeGeneratorRISCV64* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderRISCV64); +}; + +class IntrinsicCodeGeneratorRISCV64 final : public IntrinsicVisitor { + public: + explicit IntrinsicCodeGeneratorRISCV64(CodeGeneratorRISCV64* codegen) : codegen_(codegen) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, ...) \ + void Visit##Name(HInvoke* invoke); + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef OPTIMIZING_INTRINSICS + + private: + Riscv64Assembler* GetAssembler(); + ArenaAllocator* GetAllocator(); + + void HandleValueOf(HInvoke* invoke, + const IntrinsicVisitor::ValueOfInfo& info, + DataType::Type type); + + CodeGeneratorRISCV64* const codegen_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorRISCV64); +}; + +} // namespace riscv64 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_RISCV64_H_ diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h index 13cabdafed..590bc34ee9 100644 --- a/compiler/optimizing/intrinsics_utils.h +++ b/compiler/optimizing/intrinsics_utils.h @@ -153,24 +153,34 @@ static inline bool IsVarHandleGet(HInvoke* invoke) { return access_mode == mirror::VarHandle::AccessModeTemplate::kGet; } -static inline bool IsUnsafeGetObject(HInvoke* invoke) { +static inline bool IsUnsafeGetReference(HInvoke* invoke) { switch (invoke->GetIntrinsic()) { case Intrinsics::kUnsafeGetObject: case Intrinsics::kUnsafeGetObjectVolatile: - case Intrinsics::kJdkUnsafeGetObject: - case Intrinsics::kJdkUnsafeGetObjectVolatile: - case Intrinsics::kJdkUnsafeGetObjectAcquire: + case Intrinsics::kJdkUnsafeGetReference: + case Intrinsics::kJdkUnsafeGetReferenceVolatile: + case Intrinsics::kJdkUnsafeGetReferenceAcquire: return true; default: return false; } } -static inline bool IsUnsafeCASObject(HInvoke* invoke) { +static inline bool IsUnsafeCASReference(HInvoke* invoke) { switch (invoke->GetIntrinsic()) { case Intrinsics::kUnsafeCASObject: case Intrinsics::kJdkUnsafeCASObject: - case Intrinsics::kJdkUnsafeCompareAndSetObject: + case Intrinsics::kJdkUnsafeCompareAndSetReference: + return true; + default: + return false; + } +} + +static inline bool IsUnsafeGetAndSetReference(HInvoke* invoke) { + switch (invoke->GetIntrinsic()) { + case Intrinsics::kUnsafeGetAndSetObject: + case Intrinsics::kJdkUnsafeGetAndSetReference: return true; default: return false; diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index d2072201f8..1823bd4b4c 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -25,6 +25,7 @@ #include "data_type-inl.h" #include "entrypoints/quick/quick_entrypoints.h" #include "heap_poisoning.h" +#include "intrinsic_objects.h" #include "intrinsics.h" #include "intrinsics_utils.h" #include "lock_word.h" @@ -37,6 +38,7 @@ #include "thread-current-inl.h" #include "utils/x86/assembler_x86.h" #include "utils/x86/constants_x86.h" +#include "well_known_classes.h" namespace art HIDDEN { @@ -75,11 +77,10 @@ class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode { public: explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) { - DCHECK(gUseReadBarrier); - DCHECK(kUseBakerReadBarrier); } void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(codegen->EmitBakerReadBarrier()); CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); @@ -394,7 +395,6 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { } HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(static_or_direct != nullptr); LocationSummary* locations = new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); @@ -774,9 +774,9 @@ void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) { static void CreateSystemArrayCopyLocations(HInvoke* invoke) { // We need at least two of the positions or length to be an integer constant, // or else we won't have enough free registers. - HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); - HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); - HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull(); + HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull(); + HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull(); int num_constants = ((src_pos != nullptr) ? 1 : 0) @@ -1205,7 +1205,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, HInstruction* code_point = invoke->InputAt(1); if (code_point->IsIntConstant()) { if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > - std::numeric_limits<uint16_t>::max()) { + std::numeric_limits<uint16_t>::max()) { // Always needs the slow-path. We could directly dispatch to it, but this case should be // rare, so for simplicity just put the full slow-path down and branch unconditionally. slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); @@ -1445,7 +1445,7 @@ void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register obj = locations->InAt(0).AsRegister<Register>(); Location srcBegin = locations->InAt(1); int srcBegin_value = - srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0; + srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0; Register srcEnd = locations->InAt(2).AsRegister<Register>(); Register dst = locations->InAt(3).AsRegister<Register>(); Register dstBegin = locations->InAt(4).AsRegister<Register>(); @@ -1691,6 +1691,12 @@ static void GenUnsafeGet(HInvoke* invoke, Location output_loc = locations->Out(); switch (type) { + case DataType::Type::kInt8: { + Register output = output_loc.AsRegister<Register>(); + __ movsxb(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); + break; + } + case DataType::Type::kInt32: { Register output = output_loc.AsRegister<Register>(); __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); @@ -1699,7 +1705,7 @@ static void GenUnsafeGet(HInvoke* invoke, case DataType::Type::kReference: { Register output = output_loc.AsRegister<Register>(); - if (gUseReadBarrier) { + if (codegen->EmitReadBarrier()) { if (kUseBakerReadBarrier) { Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( @@ -1739,25 +1745,12 @@ static void GenUnsafeGet(HInvoke* invoke, } } -static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) { - switch (intrinsic) { - case Intrinsics::kUnsafeGetObject: - case Intrinsics::kUnsafeGetObjectVolatile: - case Intrinsics::kJdkUnsafeGetObject: - case Intrinsics::kJdkUnsafeGetObjectVolatile: - case Intrinsics::kJdkUnsafeGetObjectAcquire: - return true; - default: - break; - } - return false; -} - static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, + CodeGeneratorX86* codegen, DataType::Type type, bool is_volatile) { - bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); + bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke); LocationSummary* locations = new (allocator) LocationSummary(invoke, can_call @@ -1797,12 +1790,14 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { VisitJdkUnsafeGetLongVolatile(invoke); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) { - VisitJdkUnsafeGetObject(invoke); + VisitJdkUnsafeGetReference(invoke); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - VisitJdkUnsafeGetObjectVolatile(invoke); + VisitJdkUnsafeGetReferenceVolatile(invoke); +} +void IntrinsicLocationsBuilderX86::VisitUnsafeGetByte(HInvoke* invoke) { + VisitJdkUnsafeGetByte(invoke); } - void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) { VisitJdkUnsafeGet(invoke); @@ -1817,44 +1812,54 @@ void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { VisitJdkUnsafeGetLongVolatile(invoke); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) { - VisitJdkUnsafeGetObject(invoke); + VisitJdkUnsafeGetReference(invoke); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - VisitJdkUnsafeGetObjectVolatile(invoke); + VisitJdkUnsafeGetReferenceVolatile(invoke); +} +void IntrinsicCodeGeneratorX86::VisitUnsafeGetByte(HInvoke* invoke) { + VisitJdkUnsafeGetByte(invoke); } - void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGet(HInvoke* invoke) { CreateIntIntIntToIntLocations( - allocator_, invoke, DataType::Type::kInt32, /*is_volatile=*/ false); + allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ false); } void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /*is_volatile=*/ true); + CreateIntIntIntToIntLocations( + allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ true); } void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAcquire(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /*is_volatile=*/ true); + CreateIntIntIntToIntLocations( + allocator_, invoke, codegen_, DataType::Type::kInt32, /*is_volatile=*/ true); } void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLong(HInvoke* invoke) { CreateIntIntIntToIntLocations( - allocator_, invoke, DataType::Type::kInt64, /*is_volatile=*/ false); + allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ false); } void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /*is_volatile=*/ true); + CreateIntIntIntToIntLocations( + allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ true); } void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /*is_volatile=*/ true); + CreateIntIntIntToIntLocations( + allocator_, invoke, codegen_, DataType::Type::kInt64, /*is_volatile=*/ true); +} +void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReference(HInvoke* invoke) { + CreateIntIntIntToIntLocations( + allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ false); } -void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetObject(HInvoke* invoke) { +void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) { CreateIntIntIntToIntLocations( - allocator_, invoke, DataType::Type::kReference, /*is_volatile=*/ false); + allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ true); } -void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) { +void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) { CreateIntIntIntToIntLocations( - allocator_, invoke, DataType::Type::kReference, /*is_volatile=*/ true); + allocator_, invoke, codegen_, DataType::Type::kReference, /*is_volatile=*/ true); } -void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) { +void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetByte(HInvoke* invoke) { CreateIntIntIntToIntLocations( - allocator_, invoke, DataType::Type::kReference, /*is_volatile=*/ true); + allocator_, invoke, codegen_, DataType::Type::kInt8, /*is_volatile=*/ false); } void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGet(HInvoke* invoke) { @@ -1875,15 +1880,18 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) { void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) { GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_); } -void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetObject(HInvoke* invoke) { +void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReference(HInvoke* invoke) { GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_); } -void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) { +void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) { GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_); } -void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) { +void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) { GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_); } +void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetByte(HInvoke* invoke) { + GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/ false, codegen_); +} static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator, DataType::Type type, @@ -1916,13 +1924,13 @@ void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) { VisitJdkUnsafePutVolatile(invoke); } void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) { - VisitJdkUnsafePutObject(invoke); + VisitJdkUnsafePutReference(invoke); } void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { VisitJdkUnsafePutObjectOrdered(invoke); } void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - VisitJdkUnsafePutObjectVolatile(invoke); + VisitJdkUnsafePutReferenceVolatile(invoke); } void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) { VisitJdkUnsafePutLong(invoke); @@ -1933,6 +1941,9 @@ void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { VisitJdkUnsafePutLongVolatile(invoke); } +void IntrinsicLocationsBuilderX86::VisitUnsafePutByte(HInvoke* invoke) { + VisitJdkUnsafePutByte(invoke); +} void IntrinsicLocationsBuilderX86::VisitJdkUnsafePut(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( @@ -1950,7 +1961,7 @@ void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutRelease(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ true); } -void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObject(HInvoke* invoke) { +void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReference(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ false); } @@ -1958,11 +1969,11 @@ void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObjectOrdered(HInvoke* invok CreateIntIntIntIntToVoidPlusTempsLocations( allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ false); } -void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) { +void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ true); } -void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) { +void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ true); } @@ -1982,6 +1993,10 @@ void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) CreateIntIntIntIntToVoidPlusTempsLocations( allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ true); } +void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutByte(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations( + allocator_, DataType::Type::kInt8, invoke, /*is_volatile=*/ false); +} // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 // memory model. @@ -2041,13 +2056,13 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) { VisitJdkUnsafePutVolatile(invoke); } void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) { - VisitJdkUnsafePutObject(invoke); + VisitJdkUnsafePutReference(invoke); } void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { VisitJdkUnsafePutObjectOrdered(invoke); } void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - VisitJdkUnsafePutObjectVolatile(invoke); + VisitJdkUnsafePutReferenceVolatile(invoke); } void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) { VisitJdkUnsafePutLong(invoke); @@ -2058,6 +2073,9 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { VisitJdkUnsafePutLongVolatile(invoke); } +void IntrinsicCodeGeneratorX86::VisitUnsafePutByte(HInvoke* invoke) { + VisitJdkUnsafePutByte(invoke); +} void IntrinsicCodeGeneratorX86::VisitJdkUnsafePut(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_); @@ -2071,7 +2089,7 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutVolatile(HInvoke* invoke) { void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutRelease(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_); } -void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObject(HInvoke* invoke) { +void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReference(HInvoke* invoke) { GenUnsafePut( invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_); } @@ -2079,11 +2097,11 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) GenUnsafePut( invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_); } -void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) { +void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) { GenUnsafePut( invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_); } -void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) { +void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) { GenUnsafePut( invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_); } @@ -2099,13 +2117,15 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) { void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_); } +void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutByte(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt8, /*is_volatile=*/ false, codegen_); +} static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, + CodeGeneratorX86* codegen, DataType::Type type, HInvoke* invoke) { - const bool can_call = gUseReadBarrier && - kUseBakerReadBarrier && - IsUnsafeCASObject(invoke); + const bool can_call = codegen->EmitBakerReadBarrier() && IsUnsafeCASReference(invoke); LocationSummary* locations = new (allocator) LocationSummary(invoke, can_call @@ -2162,24 +2182,24 @@ void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASLong(HInvoke* invoke) { void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASObject(HInvoke* invoke) { // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc). - VisitJdkUnsafeCompareAndSetObject(invoke); + VisitJdkUnsafeCompareAndSetReference(invoke); } void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) { - CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke); + CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kInt32, invoke); } void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) { - CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke); + CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kInt64, invoke); } -void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { +void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - if (gUseReadBarrier && !kUseBakerReadBarrier) { + if (codegen_->EmitNonBakerReadBarrier()) { return; } - CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke); + CreateIntIntIntIntIntToInt(allocator_, codegen_, DataType::Type::kReference, invoke); } static void GenPrimitiveLockedCmpxchg(DataType::Type type, @@ -2304,7 +2324,7 @@ static void GenReferenceCAS(HInvoke* invoke, DCHECK_EQ(expected, EAX); DCHECK_NE(temp, temp2); - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen->EmitBakerReadBarrier()) { // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. codegen->GenerateReferenceLoadWithBakerReadBarrier( @@ -2391,7 +2411,7 @@ static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codeg if (type == DataType::Type::kReference) { // The only read barrier implementation supporting the // UnsafeCASObject intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier); Register temp = locations->GetTemp(0).AsRegister<Register>(); Register temp2 = locations->GetTemp(1).AsRegister<Register>(); @@ -2413,7 +2433,7 @@ void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { // The only read barrier implementation supporting the // UnsafeCASObject intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier); GenCAS(DataType::Type::kReference, invoke, codegen_); } @@ -2430,7 +2450,7 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASLong(HInvoke* invoke) { void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASObject(HInvoke* invoke) { // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc). - VisitJdkUnsafeCompareAndSetObject(invoke); + VisitJdkUnsafeCompareAndSetReference(invoke); } void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) { @@ -2441,13 +2461,245 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) GenCAS(DataType::Type::kInt64, invoke, codegen_); } -void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { +void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier); GenCAS(DataType::Type::kReference, invoke, codegen_); } +// Note: Unlike other architectures that use corresponding enums for the `VarHandle` +// implementation, x86 is currently using it only for `Unsafe`. +enum class GetAndUpdateOp { + kSet, + kAdd, +}; + +void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator, + HInvoke* invoke, + CodeGeneratorX86* codegen, + DataType::Type type, + GetAndUpdateOp get_and_unsafe_op) { + const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, + can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + if (type == DataType::Type::kInt64) { + // Explicitly allocate all registers. + locations->SetInAt(1, Location::RegisterLocation(EBP)); + if (get_and_unsafe_op == GetAndUpdateOp::kAdd) { + locations->AddTemp(Location::RegisterLocation(EBP)); // We shall clobber EBP. + locations->SetInAt(2, Location::Any()); // Offset shall be on the stack. + locations->SetInAt(3, Location::RegisterPairLocation(ESI, EDI)); + locations->AddTemp(Location::RegisterLocation(EBX)); + locations->AddTemp(Location::RegisterLocation(ECX)); + } else { + locations->SetInAt(2, Location::RegisterPairLocation(ESI, EDI)); + locations->SetInAt(3, Location::RegisterPairLocation(EBX, ECX)); + } + locations->SetOut(Location::RegisterPairLocation(EAX, EDX), Location::kOutputOverlap); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + // Use the same register for both the output and the new value or addend + // to take advantage of XCHG or XADD. Arbitrarily pick EAX. + locations->SetInAt(3, Location::RegisterLocation(EAX)); + locations->SetOut(Location::RegisterLocation(EAX)); + } +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndAddInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddInt(invoke); +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndAddLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddLong(invoke); +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetInt(invoke); +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetLong(invoke); +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeGetAndSetObject(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetReference(invoke); +} + +void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations( + allocator_, invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kAdd); +} + +void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations( + allocator_, invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kAdd); +} + +void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations( + allocator_, invoke, codegen_, DataType::Type::kInt32, GetAndUpdateOp::kSet); +} + +void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations( + allocator_, invoke, codegen_, DataType::Type::kInt64, GetAndUpdateOp::kSet); +} + +void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) { + // The only supported read barrier implementation is the Baker-style read barriers. + if (codegen_->EmitNonBakerReadBarrier()) { + return; + } + + CreateUnsafeGetAndUpdateLocations( + allocator_, invoke, codegen_, DataType::Type::kReference, GetAndUpdateOp::kSet); + LocationSummary* locations = invoke->GetLocations(); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RegisterLocation(ECX)); // Byte register for `MarkGCCard()`. +} + +static void GenUnsafeGetAndUpdate(HInvoke* invoke, + DataType::Type type, + CodeGeneratorX86* codegen, + GetAndUpdateOp get_and_update_op) { + X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + Location out = locations->Out(); // Result. + Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. + Location offset = locations->InAt(2); // Long offset. + Location arg = locations->InAt(3); // New value or addend. + + if (type == DataType::Type::kInt32) { + DCHECK(out.Equals(arg)); + Register out_reg = out.AsRegister<Register>(); + Address field_address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0); + if (get_and_update_op == GetAndUpdateOp::kAdd) { + __ LockXaddl(field_address, out_reg); + } else { + DCHECK(get_and_update_op == GetAndUpdateOp::kSet); + __ xchgl(out_reg, field_address); + } + } else if (type == DataType::Type::kInt64) { + // Prepare the field address. Ignore the high 32 bits of the `offset`. + Address field_address_low(kNoRegister, 0), field_address_high(kNoRegister, 0); + if (get_and_update_op == GetAndUpdateOp::kAdd) { + DCHECK(offset.IsDoubleStackSlot()); + __ addl(base, Address(ESP, offset.GetStackIndex())); // Clobbers `base`. + DCHECK(Location::RegisterLocation(base).Equals(locations->GetTemp(0))); + field_address_low = Address(base, 0); + field_address_high = Address(base, 4); + } else { + field_address_low = Address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0); + field_address_high = Address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 4); + } + // Load the old value to EDX:EAX and use LOCK CMPXCHG8B to set the new value. + NearLabel loop; + __ Bind(&loop); + __ movl(EAX, field_address_low); + __ movl(EDX, field_address_high); + if (get_and_update_op == GetAndUpdateOp::kAdd) { + DCHECK(Location::RegisterPairLocation(ESI, EDI).Equals(arg)); + __ movl(EBX, EAX); + __ movl(ECX, EDX); + __ addl(EBX, ESI); + __ adcl(ECX, EDI); + } else { + DCHECK(get_and_update_op == GetAndUpdateOp::kSet); + DCHECK(Location::RegisterPairLocation(EBX, ECX).Equals(arg)); + } + __ LockCmpxchg8b(field_address_low); + __ j(kNotEqual, &loop); // Repeat on failure. + } else { + DCHECK_EQ(type, DataType::Type::kReference); + DCHECK(get_and_update_op == GetAndUpdateOp::kSet); + Register out_reg = out.AsRegister<Register>(); + Address field_address(base, offset.AsRegisterPairLow<Register>(), TIMES_1, 0); + Register temp1 = locations->GetTemp(0).AsRegister<Register>(); + Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + + if (codegen->EmitReadBarrier()) { + DCHECK(kUseBakerReadBarrier); + // Ensure that the field contains a to-space reference. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + Location::RegisterLocation(temp2), + base, + field_address, + /*needs_null_check=*/ false, + /*always_update_field=*/ true, + &temp1); + } + + // Mark card for object as a new value shall be stored. + bool new_value_can_be_null = true; // TODO: Worth finding out this information? + DCHECK_EQ(temp2, ECX); // Byte register for `MarkGCCard()`. + codegen->MarkGCCard(temp1, temp2, base, /*value=*/ out_reg, new_value_can_be_null); + + if (kPoisonHeapReferences) { + // Use a temp to avoid poisoning base of the field address, which might happen if `out` + // is the same as `base` (for code like `unsafe.getAndSet(obj, offset, obj)`). + __ movl(temp1, out_reg); + __ PoisonHeapReference(temp1); + __ xchgl(temp1, field_address); + __ UnpoisonHeapReference(temp1); + __ movl(out_reg, temp1); + } else { + __ xchgl(out_reg, field_address); + } + } +} + +void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndAddInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddInt(invoke); +} + +void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndAddLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddLong(invoke); +} + +void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetInt(invoke); +} + +void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetLong(invoke); +} + +void IntrinsicCodeGeneratorX86::VisitUnsafeGetAndSetObject(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetReference(invoke); +} + +void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd); +} + +void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd); +} + +void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet); +} + +void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet); +} + +void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet); +} + void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) { LocationSummary* locations = new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); @@ -2843,7 +3095,7 @@ static void GenSystemArrayCopyEndAddress(X86Assembler* assembler, void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - if (gUseReadBarrier && !kUseBakerReadBarrier) { + if (codegen_->EmitNonBakerReadBarrier()) { return; } @@ -2875,7 +3127,7 @@ void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) { void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier); X86Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -2995,7 +3247,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // slow path. if (!optimizations.GetSourceIsNonPrimitiveArray()) { - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false); @@ -3006,7 +3258,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ testl(temp1, temp1); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp1` has been unpoisoned - // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // by the previous call to GenerateFieldLoadWithBakerReadBarrier. } else { // /* HeapReference<Class> */ temp1 = src->klass_ __ movl(temp1, Address(src, class_offset)); @@ -3022,7 +3274,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { if (length.Equals(Location::RegisterLocation(temp3))) { // When Baker read barriers are enabled, register `temp3`, // which in the present case contains the `length` parameter, @@ -3051,7 +3303,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ testl(temp2, temp2); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp2` has been unpoisoned - // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // by the previous call to GenerateFieldLoadWithBakerReadBarrier. __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot)); __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } @@ -3120,7 +3372,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false); @@ -3130,7 +3382,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ testl(temp1, temp1); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp1` has been unpoisoned - // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // by the previous call to GenerateFieldLoadWithBakerReadBarrier. } else { // /* HeapReference<Class> */ temp1 = src->klass_ __ movl(temp1, Address(src, class_offset)); @@ -3151,7 +3403,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // Compute the base source address in `temp1`. GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // If it is needed (in the case of the fast-path loop), the base // destination address is computed later, as `temp2` is used for // intermediate computations. @@ -3279,21 +3531,36 @@ static void RequestBaseMethodAddressInRegister(HInvoke* invoke) { } } -void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) { +#define VISIT_INTRINSIC(name, low, high, type, start_index) \ + void IntrinsicLocationsBuilderX86::Visit ##name ##ValueOf(HInvoke* invoke) { \ + InvokeRuntimeCallingConvention calling_convention; \ + IntrinsicVisitor::ComputeValueOfLocations( \ + invoke, \ + codegen_, \ + low, \ + high - low + 1, \ + Location::RegisterLocation(EAX), \ + Location::RegisterLocation(calling_convention.GetRegisterAt(0))); \ + RequestBaseMethodAddressInRegister(invoke); \ + } \ + void IntrinsicCodeGeneratorX86::Visit ##name ##ValueOf(HInvoke* invoke) { \ + IntrinsicVisitor::ValueOfInfo info = \ + IntrinsicVisitor::ComputeValueOfInfo( \ + invoke, \ + codegen_->GetCompilerOptions(), \ + WellKnownClasses::java_lang_ ##name ##_value, \ + low, \ + high - low + 1, \ + start_index); \ + HandleValueOf(invoke, info, type); \ + } + BOXED_TYPES(VISIT_INTRINSIC) +#undef VISIT_INTRINSIC + +void IntrinsicCodeGeneratorX86::HandleValueOf(HInvoke* invoke, + const IntrinsicVisitor::ValueOfInfo& info, + DataType::Type type) { DCHECK(invoke->IsInvokeStaticOrDirect()); - InvokeRuntimeCallingConvention calling_convention; - IntrinsicVisitor::ComputeIntegerValueOfLocations( - invoke, - codegen_, - Location::RegisterLocation(EAX), - Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - RequestBaseMethodAddressInRegister(invoke); -} - -void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) { - DCHECK(invoke->IsInvokeStaticOrDirect()); - IntrinsicVisitor::IntegerValueOfInfo info = - IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); X86Assembler* assembler = GetAssembler(); @@ -3304,20 +3571,25 @@ void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) { codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); }; - if (invoke->InputAt(0)->IsConstant()) { + if (invoke->InputAt(0)->IsIntConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); if (static_cast<uint32_t>(value - info.low) < info.length) { - // Just embed the j.l.Integer in the code. - DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + // Just embed the object in the code. + DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference); codegen_->LoadBootImageAddress( out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect()); } else { DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. - // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the + // TODO: If we JIT, we could allocate the object now, and store it in the // JIT object table. allocate_instance(); - __ movl(Address(out, info.value_offset), Immediate(value)); + codegen_->MoveToMemory(type, + Location::ConstantLocation(invoke->InputAt(0)->AsIntConstant()), + out, + /* dst_index= */ Register::kNoRegister, + /* dst_scale= */ TIMES_1, + /* dst_disp= */ info.value_offset); } } else { DCHECK(locations->CanCall()); @@ -3327,7 +3599,7 @@ void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) { __ cmpl(out, Immediate(info.length)); NearLabel allocate, done; __ j(kAboveEqual, &allocate); - // If the value is within the bounds, load the j.l.Integer directly from the array. + // If the value is within the bounds, load the object directly from the array. constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>); static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>), "Check heap reference size."); @@ -3355,9 +3627,14 @@ void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) { __ MaybeUnpoisonHeapReference(out); __ jmp(&done); __ Bind(&allocate); - // Otherwise allocate and initialize a new j.l.Integer. + // Otherwise allocate and initialize a new object. allocate_instance(); - __ movl(Address(out, info.value_offset), in); + codegen_->MoveToMemory(type, + Location::RegisterLocation(in), + out, + /* dst_index= */ Register::kNoRegister, + /* dst_scale= */ TIMES_1, + /* dst_disp= */ info.value_offset); __ Bind(&done); } } @@ -3377,7 +3654,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) { SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); codegen_->AddSlowPath(slow_path); - if (gUseReadBarrier) { + if (codegen_->EmitReadBarrier()) { // Check self->GetWeakRefAccessEnabled(). ThreadOffset32 offset = Thread::WeakRefAccessEnabledOffset<kX86PointerSize>(); __ fs()->cmpl(Address::Absolute(offset), @@ -3400,7 +3677,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) { // Load the value from the field. uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value(); - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, out, obj.AsRegister<Register>(), @@ -3419,7 +3696,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitReferenceRefersTo(HInvoke* invoke) { - IntrinsicVisitor::CreateReferenceRefersToLocations(invoke); + IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitReferenceRefersTo(HInvoke* invoke) { @@ -3442,7 +3719,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceRefersTo(HInvoke* invoke) { NearLabel end, return_true, return_false; __ cmpl(out, other); - if (gUseReadBarrier) { + if (codegen_->EmitReadBarrier()) { DCHECK(kUseBakerReadBarrier); __ j(kEqual, &return_true); @@ -3504,7 +3781,7 @@ void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) { locations->SetInAt(0, Location::Any()); } -void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } +void IntrinsicCodeGeneratorX86::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {} void IntrinsicLocationsBuilderX86::VisitIntegerDivideUnsigned(HInvoke* invoke) { LocationSummary* locations = new (allocator_) LocationSummary(invoke, @@ -3769,7 +4046,7 @@ static Register GenerateVarHandleFieldReference(HInvoke* invoke, const uint32_t declaring_class_offset = ArtField::DeclaringClassOffset().Uint32Value(); Register varhandle_object = locations->InAt(0).AsRegister<Register>(); - // Load the ArtField and the offset + // Load the ArtField* and the offset. __ movl(temp, Address(varhandle_object, artfield_offset)); __ movl(offset, Address(temp, offset_offset)); size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke); @@ -3781,7 +4058,7 @@ static Register GenerateVarHandleFieldReference(HInvoke* invoke, Location::RegisterLocation(temp), Address(temp, declaring_class_offset), /* fixup_label= */ nullptr, - gCompilerReadBarrierOption); + codegen->GetCompilerReadBarrierOption()); return temp; } @@ -3791,10 +4068,10 @@ static Register GenerateVarHandleFieldReference(HInvoke* invoke, return locations->InAt(1).AsRegister<Register>(); } -static void CreateVarHandleGetLocations(HInvoke* invoke) { +static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (gUseReadBarrier && !kUseBakerReadBarrier) { + if (codegen->EmitNonBakerReadBarrier()) { return; } @@ -3836,7 +4113,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke) { static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -3860,7 +4137,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) { Address field_addr(ref, offset, TIMES_1, 0); // Load the value from the field - if (type == DataType::Type::kReference && gCompilerReadBarrierOption == kWithReadBarrier) { + if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) { codegen->GenerateReferenceLoadWithBakerReadBarrier( invoke, out, ref, field_addr, /* needs_null_check= */ false); } else if (type == DataType::Type::kInt64 && @@ -3883,7 +4160,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) { } void IntrinsicLocationsBuilderX86::VisitVarHandleGet(HInvoke* invoke) { - CreateVarHandleGetLocations(invoke); + CreateVarHandleGetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGet(HInvoke* invoke) { @@ -3891,7 +4168,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGet(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitVarHandleGetVolatile(HInvoke* invoke) { - CreateVarHandleGetLocations(invoke); + CreateVarHandleGetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetVolatile(HInvoke* invoke) { @@ -3899,7 +4176,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetVolatile(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitVarHandleGetAcquire(HInvoke* invoke) { - CreateVarHandleGetLocations(invoke); + CreateVarHandleGetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetAcquire(HInvoke* invoke) { @@ -3907,17 +4184,17 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAcquire(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitVarHandleGetOpaque(HInvoke* invoke) { - CreateVarHandleGetLocations(invoke); + CreateVarHandleGetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetOpaque(HInvoke* invoke) { GenerateVarHandleGet(invoke, codegen_); } -static void CreateVarHandleSetLocations(HInvoke* invoke) { +static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (gUseReadBarrier && !kUseBakerReadBarrier) { + if (codegen->EmitNonBakerReadBarrier()) { return; } @@ -3990,7 +4267,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke) { static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4056,7 +4333,7 @@ static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) { } void IntrinsicLocationsBuilderX86::VisitVarHandleSet(HInvoke* invoke) { - CreateVarHandleSetLocations(invoke); + CreateVarHandleSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleSet(HInvoke* invoke) { @@ -4064,7 +4341,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleSet(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitVarHandleSetVolatile(HInvoke* invoke) { - CreateVarHandleSetLocations(invoke); + CreateVarHandleSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleSetVolatile(HInvoke* invoke) { @@ -4072,7 +4349,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleSetVolatile(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitVarHandleSetRelease(HInvoke* invoke) { - CreateVarHandleSetLocations(invoke); + CreateVarHandleSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleSetRelease(HInvoke* invoke) { @@ -4080,17 +4357,17 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleSetRelease(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitVarHandleSetOpaque(HInvoke* invoke) { - CreateVarHandleSetLocations(invoke); + CreateVarHandleSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleSetOpaque(HInvoke* invoke) { GenerateVarHandleSet(invoke, codegen_); } -static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) { +static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (gUseReadBarrier && !kUseBakerReadBarrier) { + if (codegen->EmitNonBakerReadBarrier()) { return; } @@ -4138,7 +4415,7 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) { static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4197,7 +4474,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege __ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX); break; case DataType::Type::kReference: { - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen->EmitBakerReadBarrier()) { // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. codegen->GenerateReferenceLoadWithBakerReadBarrier( @@ -4235,7 +4512,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege } void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSet(HInvoke* invoke) { - CreateVarHandleGetAndSetLocations(invoke); + CreateVarHandleGetAndSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSet(HInvoke* invoke) { @@ -4243,7 +4520,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSet(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) { - CreateVarHandleGetAndSetLocations(invoke); + CreateVarHandleGetAndSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) { @@ -4251,17 +4528,18 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) } void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) { - CreateVarHandleGetAndSetLocations(invoke); + CreateVarHandleGetAndSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) { GenerateVarHandleGetAndSet(invoke, codegen_); } -static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) { +static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, + CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (gUseReadBarrier && !kUseBakerReadBarrier) { + if (codegen->EmitNonBakerReadBarrier()) { return; } @@ -4325,7 +4603,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) { static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4378,7 +4656,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGenera } void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndSet(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndSet(HInvoke* invoke) { @@ -4386,7 +4664,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndSet(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) { @@ -4394,7 +4672,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) } void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) { @@ -4402,7 +4680,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* in } void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) { @@ -4410,7 +4688,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* } void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) { @@ -4418,7 +4696,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* } void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) { @@ -4426,7 +4704,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchange(HInvoke* invoke } void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) { @@ -4434,17 +4712,17 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* } void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) { GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_); } -static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) { +static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (gUseReadBarrier && !kUseBakerReadBarrier) { + if (codegen->EmitNonBakerReadBarrier()) { return; } @@ -4493,7 +4771,7 @@ static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) { static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4568,7 +4846,7 @@ static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codege } void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAdd(HInvoke* invoke) { - CreateVarHandleGetAndAddLocations(invoke); + CreateVarHandleGetAndAddLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAdd(HInvoke* invoke) { @@ -4576,7 +4854,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAdd(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) { - CreateVarHandleGetAndAddLocations(invoke); + CreateVarHandleGetAndAddLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) { @@ -4584,17 +4862,17 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) } void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) { - CreateVarHandleGetAndAddLocations(invoke); + CreateVarHandleGetAndAddLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) { GenerateVarHandleGetAndAdd(invoke, codegen_); } -static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke) { +static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (gUseReadBarrier && !kUseBakerReadBarrier) { + if (codegen->EmitNonBakerReadBarrier()) { return; } @@ -4662,7 +4940,7 @@ static void GenerateBitwiseOp(HInvoke* invoke, static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4723,7 +5001,7 @@ static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* } void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) { @@ -4731,7 +5009,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) { @@ -4739,7 +5017,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* in } void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) { @@ -4747,7 +5025,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* in } void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) { @@ -4755,7 +5033,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) } void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) { @@ -4763,7 +5041,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* i } void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) { @@ -4771,7 +5049,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* i } void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) { @@ -4779,7 +5057,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) } void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) { @@ -4787,7 +5065,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* i } void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h index 77c236d244..289a3c342c 100644 --- a/compiler/optimizing/intrinsics_x86.h +++ b/compiler/optimizing/intrinsics_x86.h @@ -19,6 +19,7 @@ #include "base/macros.h" #include "intrinsics.h" +#include "intrinsics_list.h" namespace art HIDDEN { @@ -39,9 +40,7 @@ class IntrinsicLocationsBuilderX86 final : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether @@ -64,9 +63,7 @@ class IntrinsicCodeGeneratorX86 final : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS private: @@ -74,6 +71,10 @@ class IntrinsicCodeGeneratorX86 final : public IntrinsicVisitor { ArenaAllocator* GetAllocator(); + void HandleValueOf(HInvoke* invoke, + const IntrinsicVisitor::ValueOfInfo& info, + DataType::Type type); + CodeGeneratorX86* const codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorX86); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 9d0d5f155e..493cd67c27 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -25,6 +25,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "heap_poisoning.h" #include "intrinsics.h" +#include "intrinsic_objects.h" #include "intrinsics_utils.h" #include "lock_word.h" #include "mirror/array-inl.h" @@ -35,6 +36,7 @@ #include "thread-current-inl.h" #include "utils/x86_64/assembler_x86_64.h" #include "utils/x86_64/constants_x86_64.h" +#include "well_known_classes.h" namespace art HIDDEN { @@ -71,11 +73,10 @@ class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode { public: explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) { - DCHECK(gUseReadBarrier); - DCHECK(kUseBakerReadBarrier); } void EmitNativeCode(CodeGenerator* codegen) override { + DCHECK(codegen->EmitBakerReadBarrier()); CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); @@ -222,34 +223,34 @@ static void GenIsInfinite(LocationSummary* locations, double kPositiveInfinity = std::numeric_limits<double>::infinity(); double kNegativeInfinity = -1 * kPositiveInfinity; - __ xorq(output, output); - __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity)); - __ j(kNotEqual, &done1); - __ j(kParityEven, &done2); - __ movq(output, Immediate(1)); - __ jmp(&done2); - __ Bind(&done1); - __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity)); - __ j(kNotEqual, &done2); - __ j(kParityEven, &done2); - __ movq(output, Immediate(1)); - __ Bind(&done2); + __ xorq(output, output); + __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity)); + __ j(kNotEqual, &done1); + __ j(kParityEven, &done2); + __ movq(output, Immediate(1)); + __ jmp(&done2); + __ Bind(&done1); + __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity)); + __ j(kNotEqual, &done2); + __ j(kParityEven, &done2); + __ movq(output, Immediate(1)); + __ Bind(&done2); } else { float kPositiveInfinity = std::numeric_limits<float>::infinity(); float kNegativeInfinity = -1 * kPositiveInfinity; - __ xorl(output, output); - __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity)); - __ j(kNotEqual, &done1); - __ j(kParityEven, &done2); - __ movl(output, Immediate(1)); - __ jmp(&done2); - __ Bind(&done1); - __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity)); - __ j(kNotEqual, &done2); - __ j(kParityEven, &done2); - __ movl(output, Immediate(1)); - __ Bind(&done2); + __ xorl(output, output); + __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity)); + __ j(kNotEqual, &done1); + __ j(kParityEven, &done2); + __ movl(output, Immediate(1)); + __ jmp(&done2); + __ Bind(&done1); + __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity)); + __ j(kNotEqual, &done2); + __ j(kParityEven, &done2); + __ movl(output, Immediate(1)); + __ Bind(&done2); } } @@ -617,8 +618,8 @@ void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) { static void CreateSystemArrayCopyLocations(HInvoke* invoke) { // Check to see if we have known failures that will cause us to have to bail out // to the runtime, and just generate the runtime call directly. - HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); - HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull(); + HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull(); // The positions must be non-negative. if ((src_pos != nullptr && src_pos->GetValue() < 0) || @@ -628,7 +629,7 @@ static void CreateSystemArrayCopyLocations(HInvoke* invoke) { } // The length must be > 0. - HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull(); if (length != nullptr) { int32_t len = length->GetValue(); if (len < 0) { @@ -836,7 +837,7 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) { void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - if (gUseReadBarrier && !kUseBakerReadBarrier) { + if (codegen_->EmitNonBakerReadBarrier()) { return; } @@ -887,7 +888,7 @@ static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler, void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier); X86_64Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1002,7 +1003,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // slow path. bool did_unpoison = false; - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // /* HeapReference<Class> */ temp1 = dest->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false); @@ -1014,9 +1015,8 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // /* HeapReference<Class> */ temp2 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false); - // If heap poisoning is enabled, `temp1` and `temp2` have been - // unpoisoned by the the previous calls to - // GenerateFieldLoadWithBakerReadBarrier. + // If heap poisoning is enabled, `temp1` and `temp2` have been unpoisoned + // by the previous calls to GenerateFieldLoadWithBakerReadBarrier. } else { // /* HeapReference<Class> */ temp1 = dest->klass_ __ movl(temp1, Address(dest, class_offset)); @@ -1034,14 +1034,14 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetDestinationIsNonPrimitiveArray()) { // Bail out if the destination is not a non primitive array. - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // /* HeapReference<Class> */ TMP = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false); __ testl(CpuRegister(TMP), CpuRegister(TMP)); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `TMP` has been unpoisoned by - // the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // the previous call to GenerateFieldLoadWithBakerReadBarrier. } else { // /* HeapReference<Class> */ TMP = temp1->component_type_ __ movl(CpuRegister(TMP), Address(temp1, component_offset)); @@ -1055,7 +1055,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetSourceIsNonPrimitiveArray()) { // Bail out if the source is not a non primitive array. - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // For the same reason given earlier, `temp1` is not trashed by the // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. // /* HeapReference<Class> */ TMP = temp2->component_type_ @@ -1064,7 +1064,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { __ testl(CpuRegister(TMP), CpuRegister(TMP)); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `TMP` has been unpoisoned by - // the the previous call to GenerateFieldLoadWithBakerReadBarrier. + // the previous call to GenerateFieldLoadWithBakerReadBarrier. } else { // /* HeapReference<Class> */ TMP = temp2->component_type_ __ movl(CpuRegister(TMP), Address(temp2, component_offset)); @@ -1081,7 +1081,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (optimizations.GetDestinationIsTypedObjectArray()) { NearLabel do_copy; __ j(kEqual, &do_copy); - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false); @@ -1109,7 +1109,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false); @@ -1141,7 +1141,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { GenSystemArrayCopyAddresses( GetAssembler(), type, src, src_pos, dest, dest_pos, length, temp1, temp2, temp3); - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { // SystemArrayCopy implementation for Baker read barriers (see // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier): // @@ -1424,7 +1424,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, HInstruction* code_point = invoke->InputAt(1); if (code_point->IsIntConstant()) { if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > - std::numeric_limits<uint16_t>::max()) { + std::numeric_limits<uint16_t>::max()) { // Always needs the slow-path. We could directly dispatch to it, but this case should be // rare, so for simplicity just put the full slow-path down and branch unconditionally. slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke); @@ -1655,7 +1655,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) { CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); Location srcBegin = locations->InAt(1); int srcBegin_value = - srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0; + srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0; CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>(); CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>(); CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>(); @@ -1871,7 +1871,7 @@ void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) { static void GenUnsafeGet(HInvoke* invoke, DataType::Type type, - bool is_volatile ATTRIBUTE_UNUSED, + [[maybe_unused]] bool is_volatile, CodeGeneratorX86_64* codegen) { X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); LocationSummary* locations = invoke->GetLocations(); @@ -1883,12 +1883,16 @@ static void GenUnsafeGet(HInvoke* invoke, CpuRegister output = output_loc.AsRegister<CpuRegister>(); switch (type) { + case DataType::Type::kInt8: + __ movsxb(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); + break; + case DataType::Type::kInt32: __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); break; case DataType::Type::kReference: { - if (gUseReadBarrier) { + if (codegen->EmitReadBarrier()) { if (kUseBakerReadBarrier) { Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( @@ -1915,22 +1919,10 @@ static void GenUnsafeGet(HInvoke* invoke, } } -static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) { - switch (intrinsic) { - case Intrinsics::kUnsafeGetObject: - case Intrinsics::kUnsafeGetObjectVolatile: - case Intrinsics::kJdkUnsafeGetObject: - case Intrinsics::kJdkUnsafeGetObjectVolatile: - case Intrinsics::kJdkUnsafeGetObjectAcquire: - return true; - default: - break; - } - return false; -} - -static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); +static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, + HInvoke* invoke, + CodeGeneratorX86_64* codegen) { + bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke); LocationSummary* locations = new (allocator) LocationSummary(invoke, can_call @@ -1960,40 +1952,45 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke VisitJdkUnsafeGetLongVolatile(invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) { - VisitJdkUnsafeGetObject(invoke); + VisitJdkUnsafeGetReference(invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - VisitJdkUnsafeGetObjectVolatile(invoke); + VisitJdkUnsafeGetReferenceVolatile(invoke); +} +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetByte(HInvoke* invoke) { + VisitJdkUnsafeGetByte(invoke); } void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke, codegen_); } void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke, codegen_); } void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke, codegen_); } void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke, codegen_); } void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke, codegen_); } void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke, codegen_); } -void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); +void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReference(HInvoke* invoke) { + CreateIntIntIntToIntLocations(allocator_, invoke, codegen_); } -void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); +void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(allocator_, invoke, codegen_); } -void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke); +void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) { + CreateIntIntIntToIntLocations(allocator_, invoke, codegen_); +} +void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetByte(HInvoke* invoke) { + CreateIntIntIntToIntLocations(allocator_, invoke, codegen_); } - void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) { VisitJdkUnsafeGet(invoke); @@ -2008,10 +2005,13 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { VisitJdkUnsafeGetLongVolatile(invoke); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) { - VisitJdkUnsafeGetObject(invoke); + VisitJdkUnsafeGetReference(invoke); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - VisitJdkUnsafeGetObjectVolatile(invoke); + VisitJdkUnsafeGetReferenceVolatile(invoke); +} +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetByte(HInvoke* invoke) { + VisitJdkUnsafeGetByte(invoke); } void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGet(HInvoke* invoke) { @@ -2032,16 +2032,18 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) { GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_); } -void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetObject(HInvoke* invoke) { +void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReference(HInvoke* invoke) { GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_); } -void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) { +void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) { GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_); } -void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) { +void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) { GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_); } - +void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetByte(HInvoke* invoke) { + GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/false, codegen_); +} static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator, DataType::Type type, @@ -2069,13 +2071,13 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { VisitJdkUnsafePutVolatile(invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) { - VisitJdkUnsafePutObject(invoke); + VisitJdkUnsafePutReference(invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { VisitJdkUnsafePutObjectOrdered(invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - VisitJdkUnsafePutObjectVolatile(invoke); + VisitJdkUnsafePutReferenceVolatile(invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) { VisitJdkUnsafePutLong(invoke); @@ -2086,6 +2088,9 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { VisitJdkUnsafePutLongVolatile(invoke); } +void IntrinsicLocationsBuilderX86_64::VisitUnsafePutByte(HInvoke* invoke) { + VisitJdkUnsafePut(invoke); +} void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePut(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke); @@ -2099,16 +2104,16 @@ void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke); } -void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObject(HInvoke* invoke) { +void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReference(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke); } void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke); } -void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) { +void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke); } -void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) { +void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke); } void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) { @@ -2123,6 +2128,9 @@ void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* inv void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke); } +void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutByte(HInvoke* invoke) { + CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kUint8, invoke); +} // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 // memory model. @@ -2168,13 +2176,13 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { VisitJdkUnsafePutVolatile(invoke); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) { - VisitJdkUnsafePutObject(invoke); + VisitJdkUnsafePutReference(invoke); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { VisitJdkUnsafePutObjectOrdered(invoke); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - VisitJdkUnsafePutObjectVolatile(invoke); + VisitJdkUnsafePutReferenceVolatile(invoke); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) { VisitJdkUnsafePutLong(invoke); @@ -2185,6 +2193,9 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { VisitJdkUnsafePutLongVolatile(invoke); } +void IntrinsicCodeGeneratorX86_64::VisitUnsafePutByte(HInvoke* invoke) { + VisitJdkUnsafePutByte(invoke); +} void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePut(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_); @@ -2198,7 +2209,7 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) { void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_); } -void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObject(HInvoke* invoke) { +void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReference(HInvoke* invoke) { GenUnsafePut( invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_); } @@ -2206,11 +2217,11 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObjectOrdered(HInvoke* invok GenUnsafePut( invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_); } -void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) { +void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) { GenUnsafePut( invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_); } -void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) { +void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) { GenUnsafePut( invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_); } @@ -2226,13 +2237,15 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_); } +void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutByte(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt8, /*is_volatile=*/false, codegen_); +} static void CreateUnsafeCASLocations(ArenaAllocator* allocator, - DataType::Type type, - HInvoke* invoke) { - const bool can_call = gUseReadBarrier && - kUseBakerReadBarrier && - IsUnsafeCASObject(invoke); + HInvoke* invoke, + CodeGeneratorX86_64* codegen, + DataType::Type type) { + const bool can_call = codegen->EmitBakerReadBarrier() && IsUnsafeCASReference(invoke); LocationSummary* locations = new (allocator) LocationSummary(invoke, can_call @@ -2253,7 +2266,7 @@ static void CreateUnsafeCASLocations(ArenaAllocator* allocator, // Need two temporaries for MarkGCCard. locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. locations->AddTemp(Location::RequiresRegister()); - if (gUseReadBarrier) { + if (codegen->EmitReadBarrier()) { // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier. DCHECK(kUseBakerReadBarrier); locations->AddTemp(Location::RequiresRegister()); @@ -2285,24 +2298,24 @@ void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) { void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) { // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc). - VisitJdkUnsafeCompareAndSetObject(invoke); + VisitJdkUnsafeCompareAndSetReference(invoke); } void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) { - CreateUnsafeCASLocations(allocator_, DataType::Type::kInt32, invoke); + CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kInt32); } void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) { - CreateUnsafeCASLocations(allocator_, DataType::Type::kInt64, invoke); + CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kInt64); } -void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { +void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - if (gUseReadBarrier && !kUseBakerReadBarrier) { + if (codegen_->EmitNonBakerReadBarrier()) { return; } - CreateUnsafeCASLocations(allocator_, DataType::Type::kReference, invoke); + CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kReference); } // Convert ZF into the Boolean result. @@ -2438,7 +2451,7 @@ static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen, CpuRegister temp3, bool is_cmpxchg) { // The only supported read barrier implementation is the Baker-style read barriers. - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier); X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); @@ -2447,7 +2460,7 @@ static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen, codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null); Address field_addr(base, offset, TIMES_1, 0); - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen->EmitBakerReadBarrier()) { // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. codegen->GenerateReferenceLoadWithBakerReadBarrier( @@ -2556,7 +2569,7 @@ static void GenCompareAndSetOrExchange(CodeGeneratorX86_64* codegen, CpuRegister new_value_reg = new_value.AsRegister<CpuRegister>(); CpuRegister temp1 = locations->GetTemp(temp1_index).AsRegister<CpuRegister>(); CpuRegister temp2 = locations->GetTemp(temp2_index).AsRegister<CpuRegister>(); - CpuRegister temp3 = gUseReadBarrier + CpuRegister temp3 = codegen->EmitReadBarrier() ? locations->GetTemp(temp3_index).AsRegister<CpuRegister>() : CpuRegister(kNoRegister); DCHECK(RegsAreAllDifferent({base, offset, temp1, temp2, temp3})); @@ -2611,7 +2624,7 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) { void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) { // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc). - VisitJdkUnsafeCompareAndSetObject(invoke); + VisitJdkUnsafeCompareAndSetReference(invoke); } void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) { @@ -2622,13 +2635,195 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invo GenCAS(DataType::Type::kInt64, invoke, codegen_); } -void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { +void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier); GenCAS(DataType::Type::kReference, invoke, codegen_); } +static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator, + HInvoke* invoke, + CodeGeneratorX86_64* codegen) { + const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, + can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + // Use the same register for both the output and the new value or addend + // to take advantage of XCHG or XADD. Arbitrarily pick RAX. + locations->SetInAt(3, Location::RegisterLocation(RAX)); + locations->SetOut(Location::RegisterLocation(RAX)); +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddInt(invoke); +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddLong(invoke); +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetInt(invoke); +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetLong(invoke); +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetObject(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetReference(invoke); +} + +void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_); +} + +void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_); +} + +void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_); +} + +void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) { + CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_); +} + +void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) { + // The only supported read barrier implementation is the Baker-style read barriers. + if (codegen_->EmitNonBakerReadBarrier()) { + return; + } + + CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_); + invoke->GetLocations()->AddRegisterTemps(3); +} + +enum class GetAndUpdateOp { + kSet, + kAdd, + kBitwiseAnd, + kBitwiseOr, + kBitwiseXor +}; + +static void GenUnsafeGetAndUpdate(HInvoke* invoke, + DataType::Type type, + CodeGeneratorX86_64* codegen, + GetAndUpdateOp get_and_update_op) { + X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // Result. + CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); // Object pointer. + CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); // Long offset. + DCHECK_EQ(out, locations->InAt(3).AsRegister<CpuRegister>()); // New value or addend. + Address field_address(base, offset, TIMES_1, 0); + + if (type == DataType::Type::kInt32) { + if (get_and_update_op == GetAndUpdateOp::kAdd) { + __ LockXaddl(field_address, out); + } else { + DCHECK(get_and_update_op == GetAndUpdateOp::kSet); + __ xchgl(out, field_address); + } + } else if (type == DataType::Type::kInt64) { + if (get_and_update_op == GetAndUpdateOp::kAdd) { + __ LockXaddq(field_address, out); + } else { + DCHECK(get_and_update_op == GetAndUpdateOp::kSet); + __ xchgq(out, field_address); + } + } else { + DCHECK_EQ(type, DataType::Type::kReference); + DCHECK(get_and_update_op == GetAndUpdateOp::kSet); + CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); + CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>(); + + if (codegen->EmitReadBarrier()) { + DCHECK(kUseBakerReadBarrier); + // Ensure that the field contains a to-space reference. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + Location::RegisterLocation(temp3.AsRegister()), + base, + field_address, + /*needs_null_check=*/ false, + /*always_update_field=*/ true, + &temp1, + &temp2); + } + + // Mark card for object as a new value shall be stored. + bool new_value_can_be_null = true; // TODO: Worth finding out this information? + codegen->MarkGCCard(temp1, temp2, base, /*value=*/ out, new_value_can_be_null); + + if (kPoisonHeapReferences) { + // Use a temp to avoid poisoning base of the field address, which might happen if `out` + // is the same as `base` (for code like `unsafe.getAndSet(obj, offset, obj)`). + __ movl(temp1, out); + __ PoisonHeapReference(temp1); + __ xchgl(temp1, field_address); + __ UnpoisonHeapReference(temp1); + __ movl(out, temp1); + } else { + __ xchgl(out, field_address); + } + } +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddInt(invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndAddLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndAddLong(invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetInt(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetInt(invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetLong(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetLong(invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetObject(HInvoke* invoke) { + VisitJdkUnsafeGetAndSetReference(invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd); +} + +void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd); +} + +void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet); +} + +void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet); +} + +void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) { + GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet); +} + void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) { LocationSummary* locations = new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); @@ -3053,18 +3248,60 @@ void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invok GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true); } -void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) { - InvokeRuntimeCallingConvention calling_convention; - IntrinsicVisitor::ComputeIntegerValueOfLocations( - invoke, - codegen_, - Location::RegisterLocation(RAX), - Location::RegisterLocation(calling_convention.GetRegisterAt(0))); +#define VISIT_INTRINSIC(name, low, high, type, start_index) \ + void IntrinsicLocationsBuilderX86_64::Visit ##name ##ValueOf(HInvoke* invoke) { \ + InvokeRuntimeCallingConvention calling_convention; \ + IntrinsicVisitor::ComputeValueOfLocations( \ + invoke, \ + codegen_, \ + low, \ + high - low + 1, \ + Location::RegisterLocation(RAX), \ + Location::RegisterLocation(calling_convention.GetRegisterAt(0))); \ + } \ + void IntrinsicCodeGeneratorX86_64::Visit ##name ##ValueOf(HInvoke* invoke) { \ + IntrinsicVisitor::ValueOfInfo info = \ + IntrinsicVisitor::ComputeValueOfInfo( \ + invoke, \ + codegen_->GetCompilerOptions(), \ + WellKnownClasses::java_lang_ ##name ##_value, \ + low, \ + high - low + 1, \ + start_index); \ + HandleValueOf(invoke, info, type); \ + } + BOXED_TYPES(VISIT_INTRINSIC) +#undef VISIT_INTRINSIC + +template <typename T> +static void Store(X86_64Assembler* assembler, + DataType::Type primitive_type, + const Address& address, + const T& operand) { + switch (primitive_type) { + case DataType::Type::kInt8: + case DataType::Type::kUint8: { + __ movb(address, operand); + break; + } + case DataType::Type::kInt16: + case DataType::Type::kUint16: { + __ movw(address, operand); + break; + } + case DataType::Type::kInt32: { + __ movl(address, operand); + break; + } + default: { + LOG(FATAL) << "Unrecognized ValueOf type " << primitive_type; + } + } } -void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = - IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); +void IntrinsicCodeGeneratorX86_64::HandleValueOf(HInvoke* invoke, + const IntrinsicVisitor::ValueOfInfo& info, + DataType::Type type) { LocationSummary* locations = invoke->GetLocations(); X86_64Assembler* assembler = GetAssembler(); @@ -3079,16 +3316,16 @@ void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) { if (invoke->InputAt(0)->IsIntConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); if (static_cast<uint32_t>(value - info.low) < info.length) { - // Just embed the j.l.Integer in the code. - DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + // Just embed the object in the code. + DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference); codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { DCHECK(locations->CanCall()); - // Allocate and initialize a new j.l.Integer. - // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the + // Allocate and initialize a new object. + // TODO: If we JIT, we could allocate the boxed value now, and store it in the // JIT object table. allocate_instance(); - __ movl(Address(out, info.value_offset), Immediate(value)); + Store(assembler, type, Address(out, info.value_offset), Immediate(value)); } } else { DCHECK(locations->CanCall()); @@ -3098,7 +3335,7 @@ void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) { __ cmpl(out, Immediate(info.length)); NearLabel allocate, done; __ j(kAboveEqual, &allocate); - // If the value is within the bounds, load the j.l.Integer directly from the array. + // If the value is within the bounds, load the boxed value directly from the array. DCHECK_NE(out.AsRegister(), argument.AsRegister()); codegen_->LoadBootImageAddress(argument, info.array_data_boot_image_reference); static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>), @@ -3107,9 +3344,9 @@ void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) { __ MaybeUnpoisonHeapReference(out); __ jmp(&done); __ Bind(&allocate); - // Otherwise allocate and initialize a new j.l.Integer. + // Otherwise allocate and initialize a new object. allocate_instance(); - __ movl(Address(out, info.value_offset), in); + Store(assembler, type, Address(out, info.value_offset), in); __ Bind(&done); } } @@ -3128,7 +3365,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) { SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); codegen_->AddSlowPath(slow_path); - if (gUseReadBarrier) { + if (codegen_->EmitReadBarrier()) { // Check self->GetWeakRefAccessEnabled(). ThreadOffset64 offset = Thread::WeakRefAccessEnabledOffset<kX86_64PointerSize>(); __ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true), @@ -3150,7 +3387,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) { // Load the value from the field. uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value(); - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, out, obj.AsRegister<CpuRegister>(), @@ -3169,7 +3406,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitReferenceRefersTo(HInvoke* invoke) { - IntrinsicVisitor::CreateReferenceRefersToLocations(invoke); + IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) { @@ -3191,7 +3428,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) { __ cmpl(out, other); - if (gUseReadBarrier) { + if (codegen_->EmitReadBarrier()) { DCHECK(kUseBakerReadBarrier); NearLabel calculate_result; @@ -3249,7 +3486,7 @@ void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) { locations->SetInAt(0, Location::Any()); } -void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } +void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {} static void CreateDivideUnsignedLocations(HInvoke* invoke, ArenaAllocator* allocator) { LocationSummary* locations = @@ -3332,14 +3569,6 @@ void IntrinsicCodeGeneratorX86_64::VisitMathMultiplyHigh(HInvoke* invoke) { __ imulq(y); } -enum class GetAndUpdateOp { - kSet, - kAdd, - kBitwiseAnd, - kBitwiseOr, - kBitwiseXor -}; - class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 { public: explicit VarHandleSlowPathX86_64(HInvoke* invoke) @@ -3510,7 +3739,7 @@ static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke, __ movl(temp, Address(varhandle, var_type_offset)); __ MaybeUnpoisonHeapReference(temp); - // Check check the varType.primitiveType field against the type we're trying to retrieve. + // Check the varType.primitiveType field against the type we're trying to use. __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type))); __ j(kNotEqual, slow_path->GetEntryLabel()); @@ -3754,24 +3983,24 @@ static void GenerateVarHandleTarget(HInvoke* invoke, __ movl(CpuRegister(target.offset), Immediate(target_field->GetOffset().Uint32Value())); } else { // For static fields, we need to fill the `target.object` with the declaring class, - // so we can use `target.object` as temporary for the `ArtMethod*`. For instance fields, - // we do not need the declaring class, so we can forget the `ArtMethod*` when - // we load the `target.offset`, so use the `target.offset` to hold the `ArtMethod*`. - CpuRegister method((expected_coordinates_count == 0) ? target.object : target.offset); + // so we can use `target.object` as temporary for the `ArtField*`. For instance fields, + // we do not need the declaring class, so we can forget the `ArtField*` when + // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`. + CpuRegister field((expected_coordinates_count == 0) ? target.object : target.offset); const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset(); const MemberOffset offset_offset = ArtField::OffsetOffset(); - // Load the ArtField, the offset and, if needed, declaring class. - __ movq(method, Address(varhandle, art_field_offset)); - __ movl(CpuRegister(target.offset), Address(method, offset_offset)); + // Load the ArtField*, the offset and, if needed, declaring class. + __ movq(field, Address(varhandle, art_field_offset)); + __ movl(CpuRegister(target.offset), Address(field, offset_offset)); if (expected_coordinates_count == 0u) { InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen(); instr_codegen->GenerateGcRootFieldLoad(invoke, Location::RegisterLocation(target.object), - Address(method, ArtField::DeclaringClassOffset()), - /*fixup_label=*/ nullptr, - gCompilerReadBarrierOption); + Address(field, ArtField::DeclaringClassOffset()), + /*fixup_label=*/nullptr, + codegen->GetCompilerReadBarrierOption()); } } } else { @@ -3788,9 +4017,9 @@ static void GenerateVarHandleTarget(HInvoke* invoke, } } -static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) { +static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke, CodeGeneratorX86_64* codegen) { // The only supported read barrier implementation is the Baker-style read barriers. - if (gUseReadBarrier && !kUseBakerReadBarrier) { + if (codegen->EmitNonBakerReadBarrier()) { return false; } @@ -3839,8 +4068,8 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) { return locations; } -static void CreateVarHandleGetLocations(HInvoke* invoke) { - if (!HasVarHandleIntrinsicImplementation(invoke)) { +static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) { + if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) { return; } @@ -3876,7 +4105,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, Location out = locations->Out(); if (type == DataType::Type::kReference) { - if (gUseReadBarrier) { + if (codegen->EmitReadBarrier()) { DCHECK(kUseBakerReadBarrier); codegen->GenerateReferenceLoadWithBakerReadBarrier( invoke, out, CpuRegister(target.object), src, /* needs_null_check= */ false); @@ -3900,7 +4129,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGet(HInvoke* invoke) { - CreateVarHandleGetLocations(invoke); + CreateVarHandleGetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGet(HInvoke* invoke) { @@ -3908,7 +4137,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGet(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) { - CreateVarHandleGetLocations(invoke); + CreateVarHandleGetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) { @@ -3917,7 +4146,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) { - CreateVarHandleGetLocations(invoke); + CreateVarHandleGetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) { @@ -3926,7 +4155,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) { - CreateVarHandleGetLocations(invoke); + CreateVarHandleGetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) { @@ -3934,8 +4163,8 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) { GenerateVarHandleGet(invoke, codegen_); } -static void CreateVarHandleSetLocations(HInvoke* invoke) { - if (!HasVarHandleIntrinsicImplementation(invoke)) { +static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) { + if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) { return; } @@ -4008,7 +4237,7 @@ static void GenerateVarHandleSet(HInvoke* invoke, } void IntrinsicLocationsBuilderX86_64::VisitVarHandleSet(HInvoke* invoke) { - CreateVarHandleSetLocations(invoke); + CreateVarHandleSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleSet(HInvoke* invoke) { @@ -4016,7 +4245,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleSet(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) { - CreateVarHandleSetLocations(invoke); + CreateVarHandleSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) { @@ -4024,7 +4253,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetRelease(HInvoke* invoke) { - CreateVarHandleSetLocations(invoke); + CreateVarHandleSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetRelease(HInvoke* invoke) { @@ -4032,15 +4261,16 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetRelease(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) { - CreateVarHandleSetLocations(invoke); + CreateVarHandleSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) { GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ true, /*is_atomic=*/ true); } -static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) { - if (!HasVarHandleIntrinsicImplementation(invoke)) { +static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, + CodeGeneratorX86_64* codegen) { + if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) { return; } @@ -4073,7 +4303,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) { // Need two temporaries for MarkGCCard. locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); - if (gUseReadBarrier) { + if (codegen->EmitReadBarrier()) { // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier. DCHECK(kUseBakerReadBarrier); locations->AddTemp(Location::RequiresRegister()); @@ -4088,7 +4318,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorX86_64* codegen, bool is_cmpxchg, bool byte_swap = false) { - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier); X86_64Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4133,7 +4363,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, } void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) { @@ -4141,7 +4371,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) } void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) { @@ -4149,7 +4379,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invo } void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) { @@ -4157,7 +4387,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* } void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) { @@ -4165,7 +4395,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvok } void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) { @@ -4173,7 +4403,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvok } void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) { @@ -4181,7 +4411,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchange(HInvoke* inv } void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) { @@ -4189,15 +4419,15 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvo } void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) { - CreateVarHandleCompareAndSetOrExchangeLocations(invoke); + CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) { GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true); } -static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) { - if (!HasVarHandleIntrinsicImplementation(invoke)) { +static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) { + if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) { return; } @@ -4221,7 +4451,7 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) { // Need two temporaries for MarkGCCard. locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); - if (gUseReadBarrier) { + if (codegen->EmitReadBarrier()) { // Need a third temporary for GenerateReferenceLoadWithBakerReadBarrier. DCHECK(kUseBakerReadBarrier); locations->AddTemp(Location::RequiresRegister()); @@ -4270,7 +4500,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CpuRegister temp2 = locations->GetTemp(temp_count - 2).AsRegister<CpuRegister>(); CpuRegister valreg = value.AsRegister<CpuRegister>(); - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen->EmitBakerReadBarrier()) { codegen->GenerateReferenceLoadWithBakerReadBarrier( invoke, locations->GetTemp(temp_count - 3), @@ -4339,8 +4569,8 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, } } -static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke) { - if (!HasVarHandleIntrinsicImplementation(invoke)) { +static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) { + if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) { return; } @@ -4478,8 +4708,8 @@ static void GenerateVarHandleGetAndOp(HInvoke* invoke, } } -static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) { - if (!HasVarHandleIntrinsicImplementation(invoke)) { +static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) { + if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) { return; } @@ -4650,7 +4880,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, bool need_any_store_barrier, bool need_any_any_barrier, bool byte_swap = false) { - DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier); X86_64Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4705,7 +4935,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) { - CreateVarHandleGetAndSetLocations(invoke); + CreateVarHandleGetAndSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) { @@ -4718,7 +4948,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) { - CreateVarHandleGetAndSetLocations(invoke); + CreateVarHandleGetAndSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) { @@ -4731,7 +4961,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invok } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) { - CreateVarHandleGetAndSetLocations(invoke); + CreateVarHandleGetAndSetLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) { @@ -4744,7 +4974,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invok } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) { - CreateVarHandleGetAndAddLocations(invoke); + CreateVarHandleGetAndAddLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) { @@ -4757,7 +4987,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) { - CreateVarHandleGetAndAddLocations(invoke); + CreateVarHandleGetAndAddLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) { @@ -4770,7 +5000,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invok } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) { - CreateVarHandleGetAndAddLocations(invoke); + CreateVarHandleGetAndAddLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) { @@ -4783,7 +5013,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invok } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) { @@ -4796,7 +5026,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invok } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) { @@ -4809,7 +5039,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) { @@ -4822,7 +5052,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) { @@ -4835,7 +5065,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) { @@ -4848,7 +5078,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) { @@ -4861,7 +5091,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) { @@ -4874,7 +5104,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invok } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) { @@ -4887,7 +5117,7 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke } void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) { - CreateVarHandleGetAndBitwiseOpLocations(invoke); + CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h index 59fe815a94..4a76c5c8ec 100644 --- a/compiler/optimizing/intrinsics_x86_64.h +++ b/compiler/optimizing/intrinsics_x86_64.h @@ -19,6 +19,7 @@ #include "base/macros.h" #include "intrinsics.h" +#include "intrinsics_list.h" namespace art HIDDEN { @@ -39,9 +40,7 @@ class IntrinsicLocationsBuilderX86_64 final : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether @@ -64,9 +63,7 @@ class IntrinsicCodeGeneratorX86_64 final : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) override; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS private: @@ -74,6 +71,10 @@ class IntrinsicCodeGeneratorX86_64 final : public IntrinsicVisitor { ArenaAllocator* GetAllocator(); + void HandleValueOf(HInvoke* invoke, + const IntrinsicVisitor::ValueOfInfo& info, + DataType::Type type); + CodeGeneratorX86_64* const codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorX86_64); diff --git a/compiler/optimizing/jit_patches_arm64.cc b/compiler/optimizing/jit_patches_arm64.cc new file mode 100644 index 0000000000..76ba182acb --- /dev/null +++ b/compiler/optimizing/jit_patches_arm64.cc @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generation_data.h" +#include "gc_root.h" +#include "jit_patches_arm64.h" + +namespace art HIDDEN { + +namespace arm64 { + +vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateUint32Literal( + uint32_t value) { + return uint32_literals_.GetOrCreate( + value, + [this, value]() { + return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(value); + }); +} + +vixl::aarch64::Literal<uint64_t>* JitPatchesARM64::DeduplicateUint64Literal( + uint64_t value) { + return uint64_literals_.GetOrCreate( + value, + [this, value]() { + return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint64_t>(value); + }); +} + +static void PatchJitRootUse(uint8_t* code, + const uint8_t* roots_data, + vixl::aarch64::Literal<uint32_t>* literal, + uint64_t index_in_table) { + uint32_t literal_offset = literal->GetOffset(); + uintptr_t address = + reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); + uint8_t* data = code + literal_offset; + reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address); +} + +void JitPatchesARM64::EmitJitRootPatches( + uint8_t* code, + const uint8_t* roots_data, + const CodeGenerationData& code_generation_data) const { + for (const auto& entry : jit_string_patches_) { + const StringReference& string_reference = entry.first; + vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; + uint64_t index_in_table = code_generation_data.GetJitStringRootIndex(string_reference); + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); + } + for (const auto& entry : jit_class_patches_) { + const TypeReference& type_reference = entry.first; + vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; + uint64_t index_in_table = code_generation_data.GetJitClassRootIndex(type_reference); + PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); + } +} + +vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateBootImageAddressLiteral( + uint64_t address) { + return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address)); +} + +vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateJitStringLiteral( + const DexFile& dex_file, + dex::StringIndex string_index, + Handle<mirror::String> handle, + CodeGenerationData* code_generation_data) { + code_generation_data->ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); + return jit_string_patches_.GetOrCreate( + StringReference(&dex_file, string_index), + [this]() { + return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); + }); +} + +vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateJitClassLiteral( + const DexFile& dex_file, + dex::TypeIndex type_index, + Handle<mirror::Class> handle, + CodeGenerationData* code_generation_data) { + code_generation_data->ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); + return jit_class_patches_.GetOrCreate( + TypeReference(&dex_file, type_index), + [this]() { + return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); + }); +} + +} // namespace arm64 +} // namespace art diff --git a/compiler/optimizing/jit_patches_arm64.h b/compiler/optimizing/jit_patches_arm64.h new file mode 100644 index 0000000000..f928723f58 --- /dev/null +++ b/compiler/optimizing/jit_patches_arm64.h @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_JIT_PATCHES_ARM64_H_ +#define ART_COMPILER_OPTIMIZING_JIT_PATCHES_ARM64_H_ + +#include "base/arena_allocator.h" +#include "base/arena_containers.h" +#include "dex/dex_file.h" +#include "dex/string_reference.h" +#include "dex/type_reference.h" +#include "handle.h" +#include "mirror/class.h" +#include "mirror/string.h" +#include "utils/arm64/assembler_arm64.h" + +// TODO(VIXL): Make VIXL compile with -Wshadow. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" +#include "aarch64/disasm-aarch64.h" +#include "aarch64/macro-assembler-aarch64.h" +#pragma GCC diagnostic pop + +namespace art HIDDEN { + +class CodeGenerationData; + +namespace arm64 { + +/** + * Helper for emitting string or class literals into JIT generated code, + * which can be shared between different compilers. + */ +class JitPatchesARM64 { + public: + JitPatchesARM64(Arm64Assembler* assembler, ArenaAllocator* allocator) : + assembler_(assembler), + uint32_literals_(std::less<uint32_t>(), + allocator->Adapter(kArenaAllocCodeGenerator)), + uint64_literals_(std::less<uint64_t>(), + allocator->Adapter(kArenaAllocCodeGenerator)), + jit_string_patches_(StringReferenceValueComparator(), + allocator->Adapter(kArenaAllocCodeGenerator)), + jit_class_patches_(TypeReferenceValueComparator(), + allocator->Adapter(kArenaAllocCodeGenerator)) { + } + + using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>; + using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>; + using StringToLiteralMap = ArenaSafeMap<StringReference, + vixl::aarch64::Literal<uint32_t>*, + StringReferenceValueComparator>; + using TypeToLiteralMap = ArenaSafeMap<TypeReference, + vixl::aarch64::Literal<uint32_t>*, + TypeReferenceValueComparator>; + + vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value); + vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value); + vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address); + vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral( + const DexFile& dex_file, + dex::StringIndex string_index, + Handle<mirror::String> handle, + CodeGenerationData* code_generation_data); + vixl::aarch64::Literal<uint32_t>* DeduplicateJitClassLiteral( + const DexFile& dex_file, + dex::TypeIndex type_index, + Handle<mirror::Class> handle, + CodeGenerationData* code_generation_data); + + void EmitJitRootPatches(uint8_t* code, + const uint8_t* roots_data, + const CodeGenerationData& code_generation_data) const; + + Arm64Assembler* GetAssembler() const { return assembler_; } + vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); } + + private: + Arm64Assembler* assembler_; + // Deduplication map for 32-bit literals, used for JIT for boot image addresses. + Uint32ToLiteralMap uint32_literals_; + // Deduplication map for 64-bit literals, used for JIT for method address or method code. + Uint64ToLiteralMap uint64_literals_; + // Patches for string literals in JIT compiled code. + StringToLiteralMap jit_string_patches_; + // Patches for class literals in JIT compiled code. + TypeToLiteralMap jit_class_patches_; +}; + +} // namespace arm64 + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_JIT_PATCHES_ARM64_H_ diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index 01daa23511..6f4f2b6cf6 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -55,6 +55,7 @@ void LinearizeTest::TestCode(const std::vector<uint16_t>& data, } TEST_F(LinearizeTest, CFG1) { + TEST_DISABLED_FOR_RISCV64(); // Structure of this graph (+ are back edges) // Block0 // | @@ -80,6 +81,7 @@ TEST_F(LinearizeTest, CFG1) { } TEST_F(LinearizeTest, CFG2) { + TEST_DISABLED_FOR_RISCV64(); // Structure of this graph (+ are back edges) // Block0 // | @@ -105,6 +107,7 @@ TEST_F(LinearizeTest, CFG2) { } TEST_F(LinearizeTest, CFG3) { + TEST_DISABLED_FOR_RISCV64(); // Structure of this graph (+ are back edges) // Block0 // | @@ -132,6 +135,7 @@ TEST_F(LinearizeTest, CFG3) { } TEST_F(LinearizeTest, CFG4) { + TEST_DISABLED_FOR_RISCV64(); /* Structure of this graph (+ are back edges) // Block0 // | @@ -162,6 +166,7 @@ TEST_F(LinearizeTest, CFG4) { } TEST_F(LinearizeTest, CFG5) { + TEST_DISABLED_FOR_RISCV64(); /* Structure of this graph (+ are back edges) // Block0 // | @@ -192,6 +197,7 @@ TEST_F(LinearizeTest, CFG5) { } TEST_F(LinearizeTest, CFG6) { + TEST_DISABLED_FOR_RISCV64(); // Block0 // | // Block1 @@ -218,6 +224,7 @@ TEST_F(LinearizeTest, CFG6) { } TEST_F(LinearizeTest, CFG7) { + TEST_DISABLED_FOR_RISCV64(); // Structure of this graph (+ are back edges) // Block0 // | diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index fb1a23eef4..7e488ba41d 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -47,6 +47,7 @@ HGraph* LiveRangesTest::BuildGraph(const std::vector<uint16_t>& data) { } TEST_F(LiveRangesTest, CFG1) { + TEST_DISABLED_FOR_RISCV64(); /* * Test the following snippet: * return 0; @@ -81,6 +82,7 @@ TEST_F(LiveRangesTest, CFG1) { } TEST_F(LiveRangesTest, CFG2) { + TEST_DISABLED_FOR_RISCV64(); /* * Test the following snippet: * var a = 0; @@ -125,6 +127,7 @@ TEST_F(LiveRangesTest, CFG2) { } TEST_F(LiveRangesTest, CFG3) { + TEST_DISABLED_FOR_RISCV64(); /* * Test the following snippet: * var a = 0; @@ -194,6 +197,7 @@ TEST_F(LiveRangesTest, CFG3) { } TEST_F(LiveRangesTest, Loop1) { + TEST_DISABLED_FOR_RISCV64(); /* * Test the following snippet: * var a = 0; @@ -270,6 +274,7 @@ TEST_F(LiveRangesTest, Loop1) { } TEST_F(LiveRangesTest, Loop2) { + TEST_DISABLED_FOR_RISCV64(); /* * Test the following snippet: * var a = 0; @@ -341,6 +346,7 @@ TEST_F(LiveRangesTest, Loop2) { } TEST_F(LiveRangesTest, CFG4) { + TEST_DISABLED_FOR_RISCV64(); /* * Test the following snippet: * var a = 0; diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 0b421cf9e6..6af07aea4e 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -70,6 +70,7 @@ void LivenessTest::TestCode(const std::vector<uint16_t>& data, const char* expec } TEST_F(LivenessTest, CFG1) { + TEST_DISABLED_FOR_RISCV64(); const char* expected = "Block 0\n" " live in: (0)\n" @@ -93,6 +94,7 @@ TEST_F(LivenessTest, CFG1) { } TEST_F(LivenessTest, CFG2) { + TEST_DISABLED_FOR_RISCV64(); const char* expected = "Block 0\n" " live in: (0)\n" @@ -115,6 +117,7 @@ TEST_F(LivenessTest, CFG2) { } TEST_F(LivenessTest, CFG3) { + TEST_DISABLED_FOR_RISCV64(); const char* expected = "Block 0\n" // entry block " live in: (000)\n" @@ -144,6 +147,7 @@ TEST_F(LivenessTest, CFG3) { } TEST_F(LivenessTest, CFG4) { + TEST_DISABLED_FOR_RISCV64(); // var a; // if (0 == 0) { // a = 5; @@ -192,6 +196,7 @@ TEST_F(LivenessTest, CFG4) { } TEST_F(LivenessTest, CFG5) { + TEST_DISABLED_FOR_RISCV64(); // var a = 0; // if (0 == 0) { // } else { @@ -237,6 +242,7 @@ TEST_F(LivenessTest, CFG5) { } TEST_F(LivenessTest, Loop1) { + TEST_DISABLED_FOR_RISCV64(); // Simple loop with one preheader and one back edge. // var a = 0; // while (a == a) { @@ -283,6 +289,7 @@ TEST_F(LivenessTest, Loop1) { } TEST_F(LivenessTest, Loop3) { + TEST_DISABLED_FOR_RISCV64(); // Test that the returned value stays live in a preceding loop. // var a = 0; // while (a == a) { @@ -330,6 +337,7 @@ TEST_F(LivenessTest, Loop3) { TEST_F(LivenessTest, Loop4) { + TEST_DISABLED_FOR_RISCV64(); // Make sure we support a preheader of a loop not being the first predecessor // in the predecessor list of the header. // var a = 0; @@ -382,6 +390,7 @@ TEST_F(LivenessTest, Loop4) { } TEST_F(LivenessTest, Loop5) { + TEST_DISABLED_FOR_RISCV64(); // Make sure we create a preheader of a loop when a header originally has two // incoming blocks and one back edge. // Bitsets are made of: @@ -438,6 +447,7 @@ TEST_F(LivenessTest, Loop5) { } TEST_F(LivenessTest, Loop6) { + TEST_DISABLED_FOR_RISCV64(); // Bitsets are made of: // (constant0, constant4, constant5, phi in block 2) const char* expected = @@ -489,6 +499,7 @@ TEST_F(LivenessTest, Loop6) { TEST_F(LivenessTest, Loop7) { + TEST_DISABLED_FOR_RISCV64(); // Bitsets are made of: // (constant0, constant4, constant5, phi in block 2, phi in block 6) const char* expected = @@ -543,6 +554,7 @@ TEST_F(LivenessTest, Loop7) { } TEST_F(LivenessTest, Loop8) { + TEST_DISABLED_FOR_RISCV64(); // var a = 0; // while (a == a) { // a = a + a; diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc index f1c50ac03c..474c3bd92f 100644 --- a/compiler/optimizing/load_store_analysis.cc +++ b/compiler/optimizing/load_store_analysis.cc @@ -41,7 +41,7 @@ static bool CanBinaryOpAndIndexAlias(const HBinaryOperation* idx1, // We currently only support Add and Sub operations. return true; } - if (idx1->AsBinaryOperation()->GetLeastConstantLeft() != idx2) { + if (idx1->GetLeastConstantLeft() != idx2) { // Cannot analyze [i+CONST1] and [j]. return true; } @@ -51,9 +51,9 @@ static bool CanBinaryOpAndIndexAlias(const HBinaryOperation* idx1, // Since 'i' are the same in [i+CONST] and [i], // further compare [CONST] and [0]. - int64_t l1 = idx1->IsAdd() ? - idx1->GetConstantRight()->AsIntConstant()->GetValue() : - -idx1->GetConstantRight()->AsIntConstant()->GetValue(); + int64_t l1 = idx1->IsAdd() + ? idx1->GetConstantRight()->AsIntConstant()->GetValue() + : -idx1->GetConstantRight()->AsIntConstant()->GetValue(); int64_t l2 = 0; int64_t h1 = l1 + (vector_length1 - 1); int64_t h2 = l2 + (vector_length2 - 1); @@ -68,8 +68,7 @@ static bool CanBinaryOpsAlias(const HBinaryOperation* idx1, // We currently only support Add and Sub operations. return true; } - if (idx1->AsBinaryOperation()->GetLeastConstantLeft() != - idx2->AsBinaryOperation()->GetLeastConstantLeft()) { + if (idx1->GetLeastConstantLeft() != idx2->GetLeastConstantLeft()) { // Cannot analyze [i+CONST1] and [j+CONST2]. return true; } @@ -80,54 +79,17 @@ static bool CanBinaryOpsAlias(const HBinaryOperation* idx1, // Since 'i' are the same in [i+CONST1] and [i+CONST2], // further compare [CONST1] and [CONST2]. - int64_t l1 = idx1->IsAdd() ? - idx1->GetConstantRight()->AsIntConstant()->GetValue() : - -idx1->GetConstantRight()->AsIntConstant()->GetValue(); - int64_t l2 = idx2->IsAdd() ? - idx2->GetConstantRight()->AsIntConstant()->GetValue() : - -idx2->GetConstantRight()->AsIntConstant()->GetValue(); + int64_t l1 = idx1->IsAdd() + ? idx1->GetConstantRight()->AsIntConstant()->GetValue() + : -idx1->GetConstantRight()->AsIntConstant()->GetValue(); + int64_t l2 = idx2->IsAdd() + ? idx2->GetConstantRight()->AsIntConstant()->GetValue() + : -idx2->GetConstantRight()->AsIntConstant()->GetValue(); int64_t h1 = l1 + (vector_length1 - 1); int64_t h2 = l2 + (vector_length2 - 1); return CanIntegerRangesOverlap(l1, h1, l2, h2); } -// Make sure we mark any writes/potential writes to heap-locations within partially -// escaped values as escaping. -void ReferenceInfo::PrunePartialEscapeWrites() { - DCHECK(subgraph_ != nullptr); - if (!subgraph_->IsValid()) { - // All paths escape. - return; - } - HGraph* graph = reference_->GetBlock()->GetGraph(); - ArenaBitVector additional_exclusions( - allocator_, graph->GetBlocks().size(), false, kArenaAllocLSA); - for (const HUseListNode<HInstruction*>& use : reference_->GetUses()) { - const HInstruction* user = use.GetUser(); - if (!additional_exclusions.IsBitSet(user->GetBlock()->GetBlockId()) && - subgraph_->ContainsBlock(user->GetBlock()) && - (user->IsUnresolvedInstanceFieldSet() || user->IsUnresolvedStaticFieldSet() || - user->IsInstanceFieldSet() || user->IsStaticFieldSet() || user->IsArraySet()) && - (reference_ == user->InputAt(0)) && - std::any_of(subgraph_->UnreachableBlocks().begin(), - subgraph_->UnreachableBlocks().end(), - [&](const HBasicBlock* excluded) -> bool { - return reference_->GetBlock()->GetGraph()->PathBetween(excluded, - user->GetBlock()); - })) { - // This object had memory written to it somewhere, if it escaped along - // some paths prior to the current block this write also counts as an - // escape. - additional_exclusions.SetBit(user->GetBlock()->GetBlockId()); - } - } - if (UNLIKELY(additional_exclusions.IsAnyBitSet())) { - for (uint32_t exc : additional_exclusions.Indexes()) { - subgraph_->RemoveBlock(graph->GetBlocks()[exc]); - } - } -} - bool HeapLocationCollector::InstructionEligibleForLSERemoval(HInstruction* inst) const { if (inst->IsNewInstance()) { return !inst->AsNewInstance()->NeedsChecks(); @@ -149,37 +111,6 @@ bool HeapLocationCollector::InstructionEligibleForLSERemoval(HInstruction* inst) } } -void ReferenceInfo::CollectPartialEscapes(HGraph* graph) { - ScopedArenaAllocator saa(graph->GetArenaStack()); - ArenaBitVector seen_instructions(&saa, graph->GetCurrentInstructionId(), false, kArenaAllocLSA); - // Get regular escapes. - ScopedArenaVector<HInstruction*> additional_escape_vectors(saa.Adapter(kArenaAllocLSA)); - LambdaEscapeVisitor scan_instructions([&](HInstruction* escape) -> bool { - HandleEscape(escape); - // LSE can't track heap-locations through Phi and Select instructions so we - // need to assume all escapes from these are escapes for the base reference. - if ((escape->IsPhi() || escape->IsSelect()) && !seen_instructions.IsBitSet(escape->GetId())) { - seen_instructions.SetBit(escape->GetId()); - additional_escape_vectors.push_back(escape); - } - return true; - }); - additional_escape_vectors.push_back(reference_); - while (!additional_escape_vectors.empty()) { - HInstruction* ref = additional_escape_vectors.back(); - additional_escape_vectors.pop_back(); - DCHECK(ref == reference_ || ref->IsPhi() || ref->IsSelect()) << *ref; - VisitEscapes(ref, scan_instructions); - } - - // Mark irreducible loop headers as escaping since they cannot be tracked through. - for (HBasicBlock* blk : graph->GetActiveBlocks()) { - if (blk->IsLoopHeader() && blk->GetLoopInformation()->IsIrreducible()) { - HandleEscape(blk); - } - } -} - void HeapLocationCollector::DumpReferenceStats(OptimizingCompilerStats* stats) { if (stats == nullptr) { return; @@ -197,14 +128,6 @@ void HeapLocationCollector::DumpReferenceStats(OptimizingCompilerStats* stats) { MaybeRecordStat(stats, MethodCompilationStat::kFullLSEPossible); } } - // TODO This is an estimate of the number of allocations we will be able - // to (partially) remove. As additional work is done this can be refined. - if (ri->IsPartialSingleton() && instruction->IsNewInstance() && - ri->GetNoEscapeSubgraph()->ContainsBlock(instruction->GetBlock()) && - !ri->GetNoEscapeSubgraph()->GetExcludedCohorts().empty() && - InstructionEligibleForLSERemoval(instruction)) { - MaybeRecordStat(stats, MethodCompilationStat::kPartialLSEPossible); - } } } @@ -269,6 +192,13 @@ bool HeapLocationCollector::CanArrayElementsAlias(const HInstruction* idx1, } bool LoadStoreAnalysis::Run() { + // Currently load_store analysis can't handle predicated load/stores; specifically pairs of + // memory operations with different predicates. + // TODO: support predicated SIMD. + if (graph_->HasPredicatedSIMD()) { + return false; + } + for (HBasicBlock* block : graph_->GetReversePostOrder()) { heap_location_collector_.VisitBasicBlock(block); } diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h index c46a5b9cc1..4a630ddf8f 100644 --- a/compiler/optimizing/load_store_analysis.h +++ b/compiler/optimizing/load_store_analysis.h @@ -25,65 +25,26 @@ #include "base/scoped_arena_containers.h" #include "base/stl_util.h" #include "escape.h" -#include "execution_subgraph.h" #include "nodes.h" #include "optimizing/optimizing_compiler_stats.h" namespace art HIDDEN { -enum class LoadStoreAnalysisType { - kBasic, - kNoPredicatedInstructions, - kFull, -}; - // A ReferenceInfo contains additional info about a reference such as // whether it's a singleton, returned, etc. class ReferenceInfo : public DeletableArenaObject<kArenaAllocLSA> { public: - ReferenceInfo(HInstruction* reference, - ScopedArenaAllocator* allocator, - size_t pos, - LoadStoreAnalysisType elimination_type) + ReferenceInfo(HInstruction* reference, size_t pos) : reference_(reference), position_(pos), is_singleton_(true), is_singleton_and_not_returned_(true), - is_singleton_and_not_deopt_visible_(true), - allocator_(allocator), - subgraph_(nullptr) { - // TODO We can do this in one pass. - // TODO NewArray is possible but will need to get a handle on how to deal with the dynamic loads - // for now just ignore it. - bool can_be_partial = elimination_type != LoadStoreAnalysisType::kBasic && - (/* reference_->IsNewArray() || */ reference_->IsNewInstance()); - if (can_be_partial) { - subgraph_.reset( - new (allocator) ExecutionSubgraph(reference->GetBlock()->GetGraph(), allocator)); - CollectPartialEscapes(reference_->GetBlock()->GetGraph()); - } + is_singleton_and_not_deopt_visible_(true) { CalculateEscape(reference_, nullptr, &is_singleton_, &is_singleton_and_not_returned_, &is_singleton_and_not_deopt_visible_); - if (can_be_partial) { - if (elimination_type == LoadStoreAnalysisType::kNoPredicatedInstructions) { - // This is to mark writes to partially escaped values as also part of the escaped subset. - // TODO We can avoid this if we have a 'ConditionalWrite' instruction. Will require testing - // to see if the additional branches are worth it. - PrunePartialEscapeWrites(); - } - DCHECK(subgraph_ != nullptr); - subgraph_->Finalize(); - } else { - DCHECK(subgraph_ == nullptr); - } - } - - const ExecutionSubgraph* GetNoEscapeSubgraph() const { - DCHECK(IsPartialSingleton()); - return subgraph_.get(); } HInstruction* GetReference() const { @@ -101,16 +62,6 @@ class ReferenceInfo : public DeletableArenaObject<kArenaAllocLSA> { return is_singleton_; } - // This is a singleton and there are paths that don't escape the method - bool IsPartialSingleton() const { - auto ref = GetReference(); - // TODO NewArray is possible but will need to get a handle on how to deal with the dynamic loads - // for now just ignore it. - return (/* ref->IsNewArray() || */ ref->IsNewInstance()) && - subgraph_ != nullptr && - subgraph_->IsValid(); - } - // Returns true if reference_ is a singleton and not returned to the caller or // used as an environment local of an HDeoptimize instruction. // The allocation and stores into reference_ may be eliminated for such cases. @@ -126,19 +77,6 @@ class ReferenceInfo : public DeletableArenaObject<kArenaAllocLSA> { } private: - void CollectPartialEscapes(HGraph* graph); - void HandleEscape(HBasicBlock* escape) { - DCHECK(subgraph_ != nullptr); - subgraph_->RemoveBlock(escape); - } - void HandleEscape(HInstruction* escape) { - HandleEscape(escape->GetBlock()); - } - - // Make sure we mark any writes/potential writes to heap-locations within partially - // escaped values as escaping. - void PrunePartialEscapeWrites(); - HInstruction* const reference_; const size_t position_; // position in HeapLocationCollector's ref_info_array_. @@ -149,10 +87,6 @@ class ReferenceInfo : public DeletableArenaObject<kArenaAllocLSA> { // Is singleton and not used as an environment local of HDeoptimize. bool is_singleton_and_not_deopt_visible_; - ScopedArenaAllocator* allocator_; - - std::unique_ptr<ExecutionSubgraph> subgraph_; - DISALLOW_COPY_AND_ASSIGN(ReferenceInfo); }; @@ -249,16 +183,13 @@ class HeapLocationCollector : public HGraphVisitor { // aliasing matrix of 8 heap locations. static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32; - HeapLocationCollector(HGraph* graph, - ScopedArenaAllocator* allocator, - LoadStoreAnalysisType lse_type) + HeapLocationCollector(HGraph* graph, ScopedArenaAllocator* allocator) : HGraphVisitor(graph), allocator_(allocator), ref_info_array_(allocator->Adapter(kArenaAllocLSA)), heap_locations_(allocator->Adapter(kArenaAllocLSA)), aliasing_matrix_(allocator, kInitialAliasingMatrixBitVectorSize, true, kArenaAllocLSA), - has_heap_stores_(false), - lse_type_(lse_type) { + has_heap_stores_(false) { aliasing_matrix_.ClearAllBits(); } @@ -272,12 +203,6 @@ class HeapLocationCollector : public HGraphVisitor { ref_info_array_.clear(); } - size_t CountPartialSingletons() const { - return std::count_if(ref_info_array_.begin(), - ref_info_array_.end(), - [](ReferenceInfo* ri) { return ri->IsPartialSingleton(); }); - } - size_t GetNumberOfHeapLocations() const { return heap_locations_.size(); } @@ -507,7 +432,7 @@ class HeapLocationCollector : public HGraphVisitor { ReferenceInfo* ref_info = FindReferenceInfoOf(instruction); if (ref_info == nullptr) { size_t pos = ref_info_array_.size(); - ref_info = new (allocator_) ReferenceInfo(instruction, allocator_, pos, lse_type_); + ref_info = new (allocator_) ReferenceInfo(instruction, pos); ref_info_array_.push_back(ref_info); } return ref_info; @@ -566,10 +491,6 @@ class HeapLocationCollector : public HGraphVisitor { is_vec_op); } - void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* instruction) override { - VisitFieldAccess(instruction->GetTarget(), instruction->GetFieldInfo()); - CreateReferenceInfoForReferenceType(instruction); - } void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override { VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); CreateReferenceInfoForReferenceType(instruction); @@ -610,6 +531,7 @@ class HeapLocationCollector : public HGraphVisitor { } void VisitVecLoad(HVecLoad* instruction) override { + DCHECK(!instruction->IsPredicated()); HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); DataType::Type type = instruction->GetPackedType(); @@ -618,6 +540,7 @@ class HeapLocationCollector : public HGraphVisitor { } void VisitVecStore(HVecStore* instruction) override { + DCHECK(!instruction->IsPredicated()); HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); DataType::Type type = instruction->GetPackedType(); @@ -643,25 +566,16 @@ class HeapLocationCollector : public HGraphVisitor { ArenaBitVector aliasing_matrix_; // aliasing info between each pair of locations. bool has_heap_stores_; // If there is no heap stores, LSE acts as GVN with better // alias analysis and won't be as effective. - LoadStoreAnalysisType lse_type_; DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector); }; class LoadStoreAnalysis { public: - // for_elimination controls whether we should keep track of escapes at a per-block level for - // partial LSE. explicit LoadStoreAnalysis(HGraph* graph, OptimizingCompilerStats* stats, - ScopedArenaAllocator* local_allocator, - LoadStoreAnalysisType lse_type) - : graph_(graph), - stats_(stats), - heap_location_collector_( - graph, - local_allocator, - ExecutionSubgraph::CanAnalyse(graph_) ? lse_type : LoadStoreAnalysisType::kBasic) {} + ScopedArenaAllocator* local_allocator) + : graph_(graph), stats_(stats), heap_location_collector_(graph, local_allocator) {} const HeapLocationCollector& GetHeapLocationCollector() const { return heap_location_collector_; diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc index 865febbd31..947bf04923 100644 --- a/compiler/optimizing/load_store_analysis_test.cc +++ b/compiler/optimizing/load_store_analysis_test.cc @@ -27,8 +27,6 @@ #include "dex/dex_file_types.h" #include "dex/method_reference.h" #include "entrypoints/quick/quick_entrypoints_enum.h" -#include "execution_subgraph.h" -#include "execution_subgraph_test.h" #include "gtest/gtest.h" #include "handle.h" #include "handle_scope.h" @@ -52,13 +50,6 @@ class LoadStoreAnalysisTest : public CommonCompilerTest, public OptimizingUnitTe return AdjacencyListGraph(graph_, GetAllocator(), entry_name, exit_name, adj); } - bool IsValidSubgraph(const ExecutionSubgraph* esg) { - return ExecutionSubgraphTestHelper::CalculateValidity(graph_, esg); - } - - bool IsValidSubgraph(const ExecutionSubgraph& esg) { - return ExecutionSubgraphTestHelper::CalculateValidity(graph_, &esg); - } void CheckReachability(const AdjacencyListGraph& adj, const std::vector<AdjacencyListGraph::Edge>& reach); }; @@ -102,7 +93,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) { // Test HeapLocationCollector initialization. // Should be no heap locations, no operations on the heap. ScopedArenaAllocator allocator(graph_->GetArenaStack()); - HeapLocationCollector heap_location_collector(graph_, &allocator, LoadStoreAnalysisType::kFull); + HeapLocationCollector heap_location_collector(graph_, &allocator); ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U); ASSERT_FALSE(heap_location_collector.HasHeapStores()); @@ -201,7 +192,7 @@ TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) { // Test HeapLocationCollector initialization. // Should be no heap locations, no operations on the heap. ScopedArenaAllocator allocator(graph_->GetArenaStack()); - HeapLocationCollector heap_location_collector(graph_, &allocator, LoadStoreAnalysisType::kFull); + HeapLocationCollector heap_location_collector(graph_, &allocator); ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U); ASSERT_FALSE(heap_location_collector.HasHeapStores()); @@ -283,7 +274,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) { body->AddInstruction(new (GetAllocator()) HReturnVoid()); ScopedArenaAllocator allocator(graph_->GetArenaStack()); - LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kBasic); + LoadStoreAnalysis lsa(graph_, nullptr, &allocator); lsa.Run(); const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); @@ -451,7 +442,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayAliasingTest) { entry->AddInstruction(vstore_i_add6_vlen2); ScopedArenaAllocator allocator(graph_->GetArenaStack()); - LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kBasic); + LoadStoreAnalysis lsa(graph_, nullptr, &allocator); lsa.Run(); const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); @@ -611,7 +602,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexCalculationOverflowTest) { entry->AddInstruction(arr_set_8); ScopedArenaAllocator allocator(graph_->GetArenaStack()); - LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kBasic); + LoadStoreAnalysis lsa(graph_, nullptr, &allocator); lsa.Run(); const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); @@ -702,7 +693,7 @@ TEST_F(LoadStoreAnalysisTest, TestHuntOriginalRef) { entry->AddInstruction(array_get4); ScopedArenaAllocator allocator(graph_->GetArenaStack()); - HeapLocationCollector heap_location_collector(graph_, &allocator, LoadStoreAnalysisType::kFull); + HeapLocationCollector heap_location_collector(graph_, &allocator); heap_location_collector.VisitBasicBlock(entry); // Test that the HeapLocationCollector should be able to tell @@ -817,756 +808,6 @@ TEST_F(LoadStoreAnalysisTest, ReachabilityTest3) { }); } -static bool AreExclusionsIndependent(HGraph* graph, const ExecutionSubgraph* esg) { - auto excluded = esg->GetExcludedCohorts(); - if (excluded.size() < 2) { - return true; - } - for (auto first = excluded.begin(); first != excluded.end(); ++first) { - for (auto second = excluded.begin(); second != excluded.end(); ++second) { - if (first == second) { - continue; - } - for (const HBasicBlock* entry : first->EntryBlocks()) { - for (const HBasicBlock* exit : second->ExitBlocks()) { - if (graph->PathBetween(exit, entry)) { - return false; - } - } - } - } - } - return true; -} - -// // ENTRY -// obj = new Obj(); -// if (parameter_value) { -// // LEFT -// call_func(obj); -// } else { -// // RIGHT -// obj.field = 1; -// } -// // EXIT -// obj.field; -TEST_F(LoadStoreAnalysisTest, PartialEscape) { - CreateGraph(); - AdjacencyListGraph blks(SetupFromAdjacencyList( - "entry", - "exit", - { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); - HBasicBlock* entry = blks.Get("entry"); - HBasicBlock* left = blks.Get("left"); - HBasicBlock* right = blks.Get("right"); - HBasicBlock* exit = blks.Get("exit"); - - HInstruction* bool_value = new (GetAllocator()) - HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); - HInstruction* c0 = graph_->GetIntConstant(0); - HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), - dex::TypeIndex(10), - graph_->GetDexFile(), - ScopedNullHandle<mirror::Class>(), - false, - 0, - false); - HInstruction* new_inst = - new (GetAllocator()) HNewInstance(cls, - 0, - dex::TypeIndex(10), - graph_->GetDexFile(), - false, - QuickEntrypointEnum::kQuickAllocObjectInitialized); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(if_inst); - - HInstruction* call_left = new (GetAllocator()) - HInvokeStaticOrDirect(GetAllocator(), - 1, - DataType::Type::kVoid, - 0, - { nullptr, 0 }, - nullptr, - {}, - InvokeType::kStatic, - { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, - !graph_->IsDebuggable()); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - call_left->AsInvoke()->SetRawInputAt(0, new_inst); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - - HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, - c0, - nullptr, - DataType::Type::kInt32, - MemberOffset(32), - false, - 0, - 0, - graph_->GetDexFile(), - 0); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* read_final = new (GetAllocator()) HInstanceFieldGet(new_inst, - nullptr, - DataType::Type::kInt32, - MemberOffset(32), - false, - 0, - 0, - graph_->GetDexFile(), - 0); - exit->AddInstruction(read_final); - - ScopedArenaAllocator allocator(graph_->GetArenaStack()); - LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull); - lsa.Run(); - - const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); - ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); - ASSERT_TRUE(info->IsPartialSingleton()); - const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); - - ASSERT_TRUE(esg->IsValid()); - ASSERT_TRUE(IsValidSubgraph(esg)); - ASSERT_TRUE(AreExclusionsIndependent(graph_, esg)); - std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), - esg->ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 3u); - ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end()); - - ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); -} - -// // ENTRY -// obj = new Obj(); -// if (parameter_value) { -// // LEFT -// call_func(obj); -// } else { -// // RIGHT -// obj.field = 1; -// } -// // EXIT -// obj.field2; -TEST_F(LoadStoreAnalysisTest, PartialEscape2) { - CreateGraph(); - AdjacencyListGraph blks(SetupFromAdjacencyList( - "entry", - "exit", - { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); - HBasicBlock* entry = blks.Get("entry"); - HBasicBlock* left = blks.Get("left"); - HBasicBlock* right = blks.Get("right"); - HBasicBlock* exit = blks.Get("exit"); - - HInstruction* bool_value = new (GetAllocator()) - HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); - HInstruction* c0 = graph_->GetIntConstant(0); - HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), - dex::TypeIndex(10), - graph_->GetDexFile(), - ScopedNullHandle<mirror::Class>(), - false, - 0, - false); - HInstruction* new_inst = - new (GetAllocator()) HNewInstance(cls, - 0, - dex::TypeIndex(10), - graph_->GetDexFile(), - false, - QuickEntrypointEnum::kQuickAllocObjectInitialized); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(if_inst); - - HInstruction* call_left = new (GetAllocator()) - HInvokeStaticOrDirect(GetAllocator(), - 1, - DataType::Type::kVoid, - 0, - { nullptr, 0 }, - nullptr, - {}, - InvokeType::kStatic, - { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, - !graph_->IsDebuggable()); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - call_left->AsInvoke()->SetRawInputAt(0, new_inst); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - - HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, - c0, - nullptr, - DataType::Type::kInt32, - MemberOffset(32), - false, - 0, - 0, - graph_->GetDexFile(), - 0); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* read_final = new (GetAllocator()) HInstanceFieldGet(new_inst, - nullptr, - DataType::Type::kInt32, - MemberOffset(16), - false, - 0, - 0, - graph_->GetDexFile(), - 0); - exit->AddInstruction(read_final); - - ScopedArenaAllocator allocator(graph_->GetArenaStack()); - LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull); - lsa.Run(); - - const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); - ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); - ASSERT_TRUE(info->IsPartialSingleton()); - const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); - - ASSERT_TRUE(esg->IsValid()); - ASSERT_TRUE(IsValidSubgraph(esg)); - ASSERT_TRUE(AreExclusionsIndependent(graph_, esg)); - std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), - esg->ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 3u); - ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end()); - - ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); -} - -// // ENTRY -// obj = new Obj(); -// obj.field = 10; -// if (parameter_value) { -// // LEFT -// call_func(obj); -// } else { -// // RIGHT -// obj.field = 20; -// } -// // EXIT -// obj.field; -TEST_F(LoadStoreAnalysisTest, PartialEscape3) { - CreateGraph(); - AdjacencyListGraph blks(SetupFromAdjacencyList( - "entry", - "exit", - { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); - HBasicBlock* entry = blks.Get("entry"); - HBasicBlock* left = blks.Get("left"); - HBasicBlock* right = blks.Get("right"); - HBasicBlock* exit = blks.Get("exit"); - - HInstruction* bool_value = new (GetAllocator()) - HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); - HInstruction* c10 = graph_->GetIntConstant(10); - HInstruction* c20 = graph_->GetIntConstant(20); - HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), - dex::TypeIndex(10), - graph_->GetDexFile(), - ScopedNullHandle<mirror::Class>(), - false, - 0, - false); - HInstruction* new_inst = - new (GetAllocator()) HNewInstance(cls, - 0, - dex::TypeIndex(10), - graph_->GetDexFile(), - false, - QuickEntrypointEnum::kQuickAllocObjectInitialized); - - HInstruction* write_entry = new (GetAllocator()) HInstanceFieldSet(new_inst, - c10, - nullptr, - DataType::Type::kInt32, - MemberOffset(32), - false, - 0, - 0, - graph_->GetDexFile(), - 0); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_entry); - entry->AddInstruction(if_inst); - - HInstruction* call_left = new (GetAllocator()) - HInvokeStaticOrDirect(GetAllocator(), - 1, - DataType::Type::kVoid, - 0, - { nullptr, 0 }, - nullptr, - {}, - InvokeType::kStatic, - { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, - !graph_->IsDebuggable()); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - call_left->AsInvoke()->SetRawInputAt(0, new_inst); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - - HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, - c20, - nullptr, - DataType::Type::kInt32, - MemberOffset(32), - false, - 0, - 0, - graph_->GetDexFile(), - 0); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* read_final = new (GetAllocator()) HInstanceFieldGet(new_inst, - nullptr, - DataType::Type::kInt32, - MemberOffset(32), - false, - 0, - 0, - graph_->GetDexFile(), - 0); - exit->AddInstruction(read_final); - - ScopedArenaAllocator allocator(graph_->GetArenaStack()); - LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull); - lsa.Run(); - - const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); - ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); - ASSERT_TRUE(info->IsPartialSingleton()); - const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); - - ASSERT_TRUE(esg->IsValid()); - ASSERT_TRUE(IsValidSubgraph(esg)); - ASSERT_TRUE(AreExclusionsIndependent(graph_, esg)); - std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), - esg->ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 3u); - ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end()); - - ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); -} - -// For simplicity Partial LSE considers check-casts to escape. It means we don't -// need to worry about inserting throws. -// // ENTRY -// obj = new Obj(); -// obj.field = 10; -// if (parameter_value) { -// // LEFT -// (Foo)obj; -// } else { -// // RIGHT -// obj.field = 20; -// } -// // EXIT -// obj.field; -TEST_F(LoadStoreAnalysisTest, PartialEscape4) { - CreateGraph(); - AdjacencyListGraph blks(SetupFromAdjacencyList( - "entry", - "exit", - { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); - HBasicBlock* entry = blks.Get("entry"); - HBasicBlock* left = blks.Get("left"); - HBasicBlock* right = blks.Get("right"); - HBasicBlock* exit = blks.Get("exit"); - - HInstruction* bool_value = new (GetAllocator()) - HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); - HInstruction* c10 = graph_->GetIntConstant(10); - HInstruction* c20 = graph_->GetIntConstant(20); - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - - HInstruction* write_entry = MakeIFieldSet(new_inst, c10, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_entry); - entry->AddInstruction(if_inst); - - ScopedNullHandle<mirror::Class> null_klass_; - HInstruction* cls2 = MakeClassLoad(); - HInstruction* check_cast = new (GetAllocator()) HCheckCast( - new_inst, cls2, TypeCheckKind::kExactCheck, null_klass_, 0, GetAllocator(), nullptr, nullptr); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(cls2); - left->AddInstruction(check_cast); - left->AddInstruction(goto_left); - - HInstruction* write_right = MakeIFieldSet(new_inst, c20, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* read_final = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - exit->AddInstruction(read_final); - - ScopedArenaAllocator allocator(graph_->GetArenaStack()); - LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull); - lsa.Run(); - - const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); - ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); - ASSERT_TRUE(info->IsPartialSingleton()); - const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); - - ASSERT_TRUE(esg->IsValid()); - ASSERT_TRUE(IsValidSubgraph(esg)); - ASSERT_TRUE(AreExclusionsIndependent(graph_, esg)); - std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), - esg->ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 3u); - ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end()); - - ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); -} - -// For simplicity Partial LSE considers instance-ofs with bitvectors to escape. -// // ENTRY -// obj = new Obj(); -// obj.field = 10; -// if (parameter_value) { -// // LEFT -// obj instanceof /*bitvector*/ Foo; -// } else { -// // RIGHT -// obj.field = 20; -// } -// // EXIT -// obj.field; -TEST_F(LoadStoreAnalysisTest, PartialEscape5) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList( - "entry", - "exit", - { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); - HBasicBlock* entry = blks.Get("entry"); - HBasicBlock* left = blks.Get("left"); - HBasicBlock* right = blks.Get("right"); - HBasicBlock* exit = blks.Get("exit"); - - HInstruction* bool_value = new (GetAllocator()) - HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); - HInstruction* c10 = graph_->GetIntConstant(10); - HInstruction* c20 = graph_->GetIntConstant(20); - HIntConstant* bs1 = graph_->GetIntConstant(0xffff); - HIntConstant* bs2 = graph_->GetIntConstant(0x00ff); - HInstruction* cls = MakeClassLoad(); - HInstruction* null_const = graph_->GetNullConstant(); - HInstruction* new_inst = MakeNewInstance(cls); - - HInstruction* write_entry = MakeIFieldSet(new_inst, c10, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_entry); - entry->AddInstruction(if_inst); - - ScopedNullHandle<mirror::Class> null_klass_; - HInstruction* instanceof = new (GetAllocator()) HInstanceOf(new_inst, - null_const, - TypeCheckKind::kBitstringCheck, - null_klass_, - 0, - GetAllocator(), - bs1, - bs2); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(instanceof); - left->AddInstruction(goto_left); - - HInstruction* write_right = MakeIFieldSet(new_inst, c20, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* read_final = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - exit->AddInstruction(read_final); - - ScopedArenaAllocator allocator(graph_->GetArenaStack()); - LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull); - lsa.Run(); - - const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); - ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); - ASSERT_TRUE(info->IsPartialSingleton()); - const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); - - ASSERT_TRUE(esg->IsValid()); - ASSERT_TRUE(IsValidSubgraph(esg)); - ASSERT_TRUE(AreExclusionsIndependent(graph_, esg)); - std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), - esg->ReachableBlocks().end()); - - ASSERT_EQ(contents.size(), 3u); - ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end()); - - ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); - ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); -} - -// before we had predicated-set we needed to be able to remove the store as -// well. This test makes sure that still works. -// // ENTRY -// obj = new Obj(); -// if (parameter_value) { -// // LEFT -// call_func(obj); -// } else { -// // RIGHT -// obj.f1 = 0; -// } -// // EXIT -// // call_func prevents the elimination of this store. -// obj.f2 = 0; -TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacentNoPredicated) { - CreateGraph(); - AdjacencyListGraph blks(SetupFromAdjacencyList( - "entry", - "exit", - {{"entry", "left"}, {"entry", "right"}, {"left", "exit"}, {"right", "exit"}})); - HBasicBlock* entry = blks.Get("entry"); - HBasicBlock* left = blks.Get("left"); - HBasicBlock* right = blks.Get("right"); - HBasicBlock* exit = blks.Get("exit"); - - HInstruction* bool_value = new (GetAllocator()) - HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); - HInstruction* c0 = graph_->GetIntConstant(0); - HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), - dex::TypeIndex(10), - graph_->GetDexFile(), - ScopedNullHandle<mirror::Class>(), - false, - 0, - false); - HInstruction* new_inst = - new (GetAllocator()) HNewInstance(cls, - 0, - dex::TypeIndex(10), - graph_->GetDexFile(), - false, - QuickEntrypointEnum::kQuickAllocObjectInitialized); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(if_inst); - - HInstruction* call_left = new (GetAllocator()) - HInvokeStaticOrDirect(GetAllocator(), - 1, - DataType::Type::kVoid, - 0, - {nullptr, 0}, - nullptr, - {}, - InvokeType::kStatic, - {nullptr, 0}, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, - !graph_->IsDebuggable()); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - call_left->AsInvoke()->SetRawInputAt(0, new_inst); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - - HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, - c0, - nullptr, - DataType::Type::kInt32, - MemberOffset(32), - false, - 0, - 0, - graph_->GetDexFile(), - 0); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* write_final = new (GetAllocator()) HInstanceFieldSet(new_inst, - c0, - nullptr, - DataType::Type::kInt32, - MemberOffset(16), - false, - 0, - 0, - graph_->GetDexFile(), - 0); - exit->AddInstruction(write_final); - - ScopedArenaAllocator allocator(graph_->GetArenaStack()); - graph_->ClearDominanceInformation(); - graph_->BuildDominatorTree(); - LoadStoreAnalysis lsa( - graph_, nullptr, &allocator, LoadStoreAnalysisType::kNoPredicatedInstructions); - lsa.Run(); - - const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); - ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); - ASSERT_FALSE(info->IsPartialSingleton()); -} - -// With predicated-set we can (partially) remove the store as well. -// // ENTRY -// obj = new Obj(); -// if (parameter_value) { -// // LEFT -// call_func(obj); -// } else { -// // RIGHT -// obj.f1 = 0; -// } -// // EXIT -// // call_func prevents the elimination of this store. -// obj.f2 = 0; -TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacent) { - CreateGraph(); - AdjacencyListGraph blks(SetupFromAdjacencyList( - "entry", - "exit", - { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); - HBasicBlock* entry = blks.Get("entry"); - HBasicBlock* left = blks.Get("left"); - HBasicBlock* right = blks.Get("right"); - HBasicBlock* exit = blks.Get("exit"); - - HInstruction* bool_value = new (GetAllocator()) - HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); - HInstruction* c0 = graph_->GetIntConstant(0); - HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), - dex::TypeIndex(10), - graph_->GetDexFile(), - ScopedNullHandle<mirror::Class>(), - false, - 0, - false); - HInstruction* new_inst = - new (GetAllocator()) HNewInstance(cls, - 0, - dex::TypeIndex(10), - graph_->GetDexFile(), - false, - QuickEntrypointEnum::kQuickAllocObjectInitialized); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(if_inst); - - HInstruction* call_left = new (GetAllocator()) - HInvokeStaticOrDirect(GetAllocator(), - 1, - DataType::Type::kVoid, - 0, - { nullptr, 0 }, - nullptr, - {}, - InvokeType::kStatic, - { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, - !graph_->IsDebuggable()); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - call_left->AsInvoke()->SetRawInputAt(0, new_inst); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - - HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, - c0, - nullptr, - DataType::Type::kInt32, - MemberOffset(32), - false, - 0, - 0, - graph_->GetDexFile(), - 0); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* write_final = new (GetAllocator()) HInstanceFieldSet(new_inst, - c0, - nullptr, - DataType::Type::kInt32, - MemberOffset(16), - false, - 0, - 0, - graph_->GetDexFile(), - 0); - exit->AddInstruction(write_final); - - ScopedArenaAllocator allocator(graph_->GetArenaStack()); - graph_->ClearDominanceInformation(); - graph_->BuildDominatorTree(); - LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull); - lsa.Run(); - - const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); - ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); - ASSERT_TRUE(info->IsPartialSingleton()); - const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); - - EXPECT_TRUE(esg->IsValid()) << esg->GetExcludedCohorts(); - EXPECT_TRUE(IsValidSubgraph(esg)); - std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), - esg->ReachableBlocks().end()); - - EXPECT_EQ(contents.size(), 3u); - EXPECT_TRUE(contents.find(blks.Get("left")) == contents.end()); - EXPECT_FALSE(contents.find(blks.Get("right")) == contents.end()); - EXPECT_FALSE(contents.find(blks.Get("entry")) == contents.end()); - EXPECT_FALSE(contents.find(blks.Get("exit")) == contents.end()); -} - // // ENTRY // obj = new Obj(); // if (parameter_value) { @@ -1626,7 +867,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) { HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); - call_left->AsInvoke()->SetRawInputAt(0, new_inst); + call_left->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); left->AddInstruction(goto_left); @@ -1653,7 +894,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) { graph_->GetDexFile(), 0); HInstruction* goto_right = new (GetAllocator()) HGoto(); - call_right->AsInvoke()->SetRawInputAt(0, new_inst); + call_right->SetRawInputAt(0, new_inst); right->AddInstruction(write_right); right->AddInstruction(call_right); right->AddInstruction(goto_right); @@ -1670,12 +911,12 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) { exit->AddInstruction(read_final); ScopedArenaAllocator allocator(graph_->GetArenaStack()); - LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull); + LoadStoreAnalysis lsa(graph_, nullptr, &allocator); lsa.Run(); const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); - ASSERT_FALSE(info->IsPartialSingleton()); + ASSERT_FALSE(info->IsSingleton()); } // // ENTRY @@ -1725,12 +966,12 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape2) { exit->AddInstruction(return_final); ScopedArenaAllocator allocator(graph_->GetArenaStack()); - LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull); + LoadStoreAnalysis lsa(graph_, nullptr, &allocator); lsa.Run(); const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); - ASSERT_FALSE(info->IsPartialSingleton()); + ASSERT_TRUE(info->IsSingletonAndNonRemovable()); } // // ENTRY @@ -1813,7 +1054,7 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) { HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); - call_left->AsInvoke()->SetRawInputAt(0, new_inst); + call_left->SetRawInputAt(0, new_inst); high_left->AddInstruction(call_left); high_left->AddInstruction(goto_left); @@ -1870,7 +1111,7 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) { HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, !graph_->IsDebuggable()); HInstruction* goto_low_left = new (GetAllocator()) HGoto(); - call_low_left->AsInvoke()->SetRawInputAt(0, new_inst); + call_low_left->SetRawInputAt(0, new_inst); low_left->AddInstruction(call_low_left); low_left->AddInstruction(goto_low_left); @@ -1900,12 +1141,12 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) { exit->AddInstruction(read_final); ScopedArenaAllocator allocator(graph_->GetArenaStack()); - LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull); + LoadStoreAnalysis lsa(graph_, nullptr, &allocator); lsa.Run(); const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); - ASSERT_FALSE(info->IsPartialSingleton()); + ASSERT_FALSE(info->IsSingleton()); } // // ENTRY @@ -2030,7 +1271,7 @@ TEST_F(LoadStoreAnalysisTest, PartialPhiPropagation1) { HInstruction* goto_left_merge = new (GetAllocator()) HGoto(); left_phi->SetRawInputAt(0, obj_param); left_phi->SetRawInputAt(1, new_inst); - call_left->AsInvoke()->SetRawInputAt(0, left_phi); + call_left->SetRawInputAt(0, left_phi); left_merge->AddPhi(left_phi); left_merge->AddInstruction(call_left); left_merge->AddInstruction(goto_left_merge); @@ -2065,11 +1306,11 @@ TEST_F(LoadStoreAnalysisTest, PartialPhiPropagation1) { graph_->BuildDominatorTree(); ScopedArenaAllocator allocator(graph_->GetArenaStack()); - LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kFull); + LoadStoreAnalysis lsa(graph_, nullptr, &allocator); lsa.Run(); const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); - ASSERT_FALSE(info->IsPartialSingleton()); + ASSERT_FALSE(info->IsSingleton()); } } // namespace art diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 9cabb12a9f..2e5ee84d76 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -33,13 +33,11 @@ #include "base/scoped_arena_containers.h" #include "base/transform_iterator.h" #include "escape.h" -#include "execution_subgraph.h" #include "handle.h" #include "load_store_analysis.h" #include "mirror/class_loader.h" #include "mirror/dex_cache.h" #include "nodes.h" -#include "optimizing/execution_subgraph.h" #include "optimizing_compiler_stats.h" #include "reference_type_propagation.h" #include "side_effects_analysis.h" @@ -94,9 +92,7 @@ * to maintain the validity of all heap locations during the optimization * phase, we only record substitutes at this phase and the real elimination * is delayed till the end of LSE. Loads that require a loop Phi placeholder - * replacement are recorded for processing later. We also keep track of the - * heap-value at the start load so that later partial-LSE can predicate the - * load. + * replacement are recorded for processing later. * - If the instruction is a store, it updates the heap value for the heap * location with the stored value and records the store itself so that we can * mark it for keeping if the value becomes observable. Heap values are @@ -240,79 +236,6 @@ * The time complexity of this phase is * O(instructions + instruction_uses) . * - * 5. Partial LSE - * - * Move allocations closer to their escapes and remove/predicate loads and - * stores as required. - * - * Partial singletons are objects which only escape from the function or have - * multiple names along certain execution paths. In cases where we recognize - * these partial singletons we can move the allocation and initialization - * closer to the actual escape(s). We can then perform a simplified version of - * LSE step 2 to determine the unescaped value of any reads performed after the - * object may have escaped. These are used to replace these reads with - * 'predicated-read' instructions where the value is only read if the object - * has actually escaped. We use the existence of the object itself as the - * marker of whether escape has occurred. - * - * There are several steps in this sub-pass - * - * 5.1 Group references - * - * Since all heap-locations for a single reference escape at the same time, we - * need to group the heap-locations by reference and process them at the same - * time. - * - * O(heap_locations). - * - * FIXME: The time complexity above assumes we can bucket the heap-locations in - * O(1) which is not true since we just perform a linear-scan of the heap-ref - * list. Since there are generally only a small number of heap-references which - * are partial-singletons this is fine and lower real overhead than a hash map. - * - * 5.2 Generate materializations - * - * Once we have the references we add new 'materialization blocks' on the edges - * where escape becomes inevitable. This information is calculated by the - * execution-subgraphs created during load-store-analysis. We create new - * 'materialization's in these blocks and initialize them with the value of - * each heap-location ignoring side effects (since the object hasn't escaped - * yet). Worst case this is the same time-complexity as step 3 since we may - * need to materialize phis. - * - * O(heap_locations^2 * materialization_edges) - * - * 5.3 Propagate materializations - * - * Since we use the materialization as the marker for escape we need to - * propagate it throughout the graph. Since the subgraph analysis considers any - * lifetime that escapes a loop (and hence would require a loop-phi) to be - * escaping at the loop-header we do not need to create any loop-phis to do - * this. - * - * O(edges) - * - * NB: Currently the subgraph analysis considers all objects to have their - * lifetimes start at the entry block. This simplifies that analysis enormously - * but means that we cannot distinguish between an escape in a loop where the - * lifetime does not escape the loop (in which case this pass could optimize) - * and one where it does escape the loop (in which case the whole loop is - * escaping). This is a shortcoming that would be good to fix at some point. - * - * 5.4 Propagate partial values - * - * We need to replace loads and stores to the partial reference with predicated - * ones that have default non-escaping values. Again this is the same as step 3. - * - * O(heap_locations^2 * edges) - * - * 5.5 Final fixup - * - * Now all we need to do is replace and remove uses of the old reference with the - * appropriate materialization. - * - * O(instructions + uses) - * * FIXME: The time complexities described above assumes that the * HeapLocationCollector finds a heap location for an instruction in O(1) * time but it is currently O(heap_locations); this can be fixed by adding @@ -324,7 +247,6 @@ namespace art HIDDEN { #define LSE_VLOG \ if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO) -class PartialLoadStoreEliminationHelper; class HeapRefHolder; // Use HGraphDelegateVisitor for which all VisitInvokeXXX() delegate to VisitInvoke(). @@ -332,7 +254,6 @@ class LSEVisitor final : private HGraphDelegateVisitor { public: LSEVisitor(HGraph* graph, const HeapLocationCollector& heap_location_collector, - bool perform_partial_lse, OptimizingCompilerStats* stats); void Run(); @@ -615,27 +536,7 @@ class LSEVisitor final : private HGraphDelegateVisitor { return PhiPlaceholderIndex(phi_placeholder.GetPhiPlaceholder()); } - bool IsEscapingObject(ReferenceInfo* info, HBasicBlock* block, size_t index) { - return !info->IsSingletonAndRemovable() && - !(info->IsPartialSingleton() && IsPartialNoEscape(block, index)); - } - - bool IsPartialNoEscape(HBasicBlock* blk, size_t idx) { - auto* ri = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo(); - if (!ri->IsPartialSingleton()) { - return false; - } - ArrayRef<const ExecutionSubgraph::ExcludedCohort> cohorts = - ri->GetNoEscapeSubgraph()->GetExcludedCohorts(); - return std::none_of(cohorts.cbegin(), - cohorts.cend(), - [&](const ExecutionSubgraph::ExcludedCohort& ex) -> bool { - // Make sure we haven't yet and never will escape. - return ex.PrecedesBlock(blk) || - ex.ContainsBlock(blk) || - ex.SucceedsBlock(blk); - }); - } + bool IsEscapingObject(ReferenceInfo* info) { return !info->IsSingletonAndRemovable(); } PhiPlaceholder GetPhiPlaceholderAt(size_t off) const { DCHECK_LT(off, num_phi_placeholders_); @@ -652,9 +553,7 @@ class LSEVisitor final : private HGraphDelegateVisitor { } Value Replacement(Value value) const { - DCHECK(value.NeedsPhi() || - (current_phase_ == Phase::kPartialElimination && value.IsMergedUnknown())) - << value << " phase: " << current_phase_; + DCHECK(value.NeedsPhi()) << value << " phase: " << current_phase_; Value replacement = phi_placeholder_replacements_[PhiPlaceholderIndex(value)]; DCHECK(replacement.IsUnknown() || replacement.IsInstruction()); DCHECK(replacement.IsUnknown() || @@ -663,35 +562,6 @@ class LSEVisitor final : private HGraphDelegateVisitor { } Value ReplacementOrValue(Value value) const { - if (current_phase_ == Phase::kPartialElimination) { - // In this phase we are materializing the default values which are used - // only if the partial singleton did not escape, so we can replace - // a partial unknown with the prior value. - if (value.IsPartialUnknown()) { - value = value.GetPriorValue().ToValue(); - } - if ((value.IsMergedUnknown() || value.NeedsPhi()) && - phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) { - value = phi_placeholder_replacements_[PhiPlaceholderIndex(value)]; - DCHECK(!value.IsMergedUnknown()); - DCHECK(!value.NeedsPhi()); - } else if (value.IsMergedUnknown()) { - return Value::ForLoopPhiPlaceholder(value.GetPhiPlaceholder()); - } - if (value.IsInstruction() && value.GetInstruction()->IsInstanceFieldGet()) { - DCHECK_LT(static_cast<size_t>(value.GetInstruction()->GetId()), - substitute_instructions_for_loads_.size()); - HInstruction* substitute = - substitute_instructions_for_loads_[value.GetInstruction()->GetId()]; - if (substitute != nullptr) { - DCHECK(substitute->IsPredicatedInstanceFieldGet()); - return Value::ForInstruction(substitute); - } - } - DCHECK_IMPLIES(value.IsInstruction(), - FindSubstitute(value.GetInstruction()) == value.GetInstruction()); - return value; - } if (value.NeedsPhi() && phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) { return Replacement(value); } else { @@ -752,8 +622,8 @@ class LSEVisitor final : private HGraphDelegateVisitor { HInstruction* FindSubstitute(HInstruction* instruction) const { size_t id = static_cast<size_t>(instruction->GetId()); if (id >= substitute_instructions_for_loads_.size()) { - // New Phi (may not be in the graph yet), default value or PredicatedInstanceFieldGet. - DCHECK_IMPLIES(IsLoad(instruction), instruction->IsPredicatedInstanceFieldGet()); + // New Phi (may not be in the graph yet), or default value. + DCHECK(!IsLoad(instruction)); return instruction; } HInstruction* substitute = substitute_instructions_for_loads_[id]; @@ -789,7 +659,6 @@ class LSEVisitor final : private HGraphDelegateVisitor { static bool IsLoad(HInstruction* instruction) { // Unresolved load is not treated as a load. return instruction->IsInstanceFieldGet() || - instruction->IsPredicatedInstanceFieldGet() || instruction->IsStaticFieldGet() || instruction->IsVecLoad() || instruction->IsArrayGet(); @@ -818,12 +687,7 @@ class LSEVisitor final : private HGraphDelegateVisitor { if (value.IsPureUnknown() || value.IsPartialUnknown()) { return; } - if (value.IsMergedUnknown()) { - kept_merged_unknowns_.SetBit(PhiPlaceholderIndex(value)); - phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value)); - return; - } - if (value.NeedsPhi()) { + if (value.IsMergedUnknown() || value.NeedsPhi()) { phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value)); } else { HInstruction* instruction = value.GetInstruction(); @@ -843,9 +707,7 @@ class LSEVisitor final : private HGraphDelegateVisitor { // We use this function when reading a location with unknown value and // therefore we cannot know what exact store wrote that unknown value. // But we can have a phi placeholder here marking multiple stores to keep. - DCHECK( - !heap_values[i].stored_by.IsInstruction() || - heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo()->IsPartialSingleton()); + DCHECK(!heap_values[i].stored_by.IsInstruction()); KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::PureUnknown(); } else if (heap_location_collector_.MayAlias(i, loc_index)) { @@ -925,7 +787,6 @@ class LSEVisitor final : private HGraphDelegateVisitor { enum class Phase { kLoadElimination, kStoreElimination, - kPartialElimination, }; bool MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const; @@ -958,21 +819,6 @@ class LSEVisitor final : private HGraphDelegateVisitor { void FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder, DataType::Type type); void FindStoresWritingOldValues(); void FinishFullLSE(); - void PrepareForPartialPhiComputation(); - // Create materialization block and materialization object for the given predecessor of entry. - HInstruction* SetupPartialMaterialization(PartialLoadStoreEliminationHelper& helper, - HeapRefHolder&& holder, - size_t pred_idx, - HBasicBlock* blk); - // Returns the value that would be read by the 'read' instruction on - // 'orig_new_inst' if 'orig_new_inst' has not escaped. - HInstruction* GetPartialValueAt(HNewInstance* orig_new_inst, HInstruction* read); - void MovePartialEscapes(); - - void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* instruction) override { - LOG(FATAL) << "Visited instruction " << instruction->DumpWithoutArgs() - << " but LSE should be the only source of predicated-ifield-gets!"; - } void HandleAcquireLoad(HInstruction* instruction) { DCHECK((instruction->IsInstanceFieldGet() && instruction->AsInstanceFieldGet()->IsVolatile()) || @@ -1080,10 +926,12 @@ class LSEVisitor final : private HGraphDelegateVisitor { } void VisitVecLoad(HVecLoad* instruction) override { + DCHECK(!instruction->IsPredicated()); VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction)); } void VisitVecStore(HVecStore* instruction) override { + DCHECK(!instruction->IsPredicated()); size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction); VisitSetLocation(instruction, idx, instruction->GetValue()); } @@ -1107,7 +955,7 @@ class LSEVisitor final : private HGraphDelegateVisitor { // Finalizable objects always escape. const bool finalizable_object = reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable(); - if (!finalizable_object && !IsEscapingObject(info, block, i)) { + if (!finalizable_object && !IsEscapingObject(info)) { // Check whether the reference for a store is used by an environment local of // the HDeoptimize. If not, the singleton is not observed after deoptimization. const HUseList<HEnvironment*>& env_uses = reference->GetEnvUses(); @@ -1131,7 +979,7 @@ class LSEVisitor final : private HGraphDelegateVisitor { ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); - if (must_keep_stores || IsEscapingObject(ref_info, block, i)) { + if (must_keep_stores || IsEscapingObject(ref_info)) { KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::PureUnknown(); } @@ -1214,30 +1062,9 @@ class LSEVisitor final : private HGraphDelegateVisitor { heap_values_for_[instruction->GetBlock()->GetBlockId()]; for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); - HBasicBlock* blk = instruction->GetBlock(); // We don't need to do anything if the reference has not escaped at this point. - // This is true if either we (1) never escape or (2) sometimes escape but - // there is no possible execution where we have done so at this time. NB - // We count being in the excluded cohort as escaping. Technically, this is - // a bit over-conservative (since we can have multiple non-escaping calls - // before a single escaping one) but this simplifies everything greatly. - auto partial_singleton_did_not_escape = [](ReferenceInfo* ref_info, HBasicBlock* blk) { - DCHECK(ref_info->IsPartialSingleton()); - if (!ref_info->GetNoEscapeSubgraph()->ContainsBlock(blk)) { - return false; - } - ArrayRef<const ExecutionSubgraph::ExcludedCohort> cohorts = - ref_info->GetNoEscapeSubgraph()->GetExcludedCohorts(); - return std::none_of(cohorts.begin(), - cohorts.end(), - [&](const ExecutionSubgraph::ExcludedCohort& cohort) { - return cohort.PrecedesBlock(blk); - }); - }; - if (!can_throw_inside_a_try && - (ref_info->IsSingleton() || - // partial and we aren't currently escaping and we haven't escaped yet. - (ref_info->IsPartialSingleton() && partial_singleton_did_not_escape(ref_info, blk)))) { + // This is true if we never escape. + if (!can_throw_inside_a_try && ref_info->IsSingleton()) { // Singleton references cannot be seen by the callee. } else { if (can_throw || side_effects.DoesAnyRead() || side_effects.DoesAnyWrite()) { @@ -1313,7 +1140,7 @@ class LSEVisitor final : private HGraphDelegateVisitor { heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass()); heap_values[i].stored_by = Value::PureUnknown(); } - } else if (inside_a_try || IsEscapingObject(info, block, i)) { + } else if (inside_a_try || IsEscapingObject(info)) { // Since NewInstance can throw, we presume all previous stores could be visible. KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::PureUnknown(); @@ -1348,7 +1175,7 @@ class LSEVisitor final : private HGraphDelegateVisitor { // Array elements are set to default heap values. heap_values[i].value = Value::Default(); heap_values[i].stored_by = Value::PureUnknown(); - } else if (inside_a_try || IsEscapingObject(info, block, i)) { + } else if (inside_a_try || IsEscapingObject(info)) { // Since NewArray can throw, we presume all previous stores could be visible. KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::PureUnknown(); @@ -1361,12 +1188,6 @@ class LSEVisitor final : private HGraphDelegateVisitor { DCHECK(!instruction->CanThrow()); } - bool ShouldPerformPartialLSE() const { - return perform_partial_lse_ && !GetGraph()->IsCompilingOsr(); - } - - bool perform_partial_lse_; - const HeapLocationCollector& heap_location_collector_; // Use local allocator for allocating memory. @@ -1423,10 +1244,6 @@ class LSEVisitor final : private HGraphDelegateVisitor { // The invalid heap value is used to mark Phi placeholders that cannot be replaced. ScopedArenaVector<Value> phi_placeholder_replacements_; - // Merged-unknowns that must have their predecessor values kept to ensure - // partially escaped values are written - ArenaBitVector kept_merged_unknowns_; - ScopedArenaVector<HInstruction*> singleton_new_instances_; // The field infos for each heap location (if relevant). @@ -1434,7 +1251,6 @@ class LSEVisitor final : private HGraphDelegateVisitor { Phase current_phase_; - friend class PartialLoadStoreEliminationHelper; friend struct ScopedRestoreHeapValues; friend std::ostream& operator<<(std::ostream& os, const Value& v); @@ -1455,8 +1271,6 @@ std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase) { return oss << "kLoadElimination"; case LSEVisitor::Phase::kStoreElimination: return oss << "kStoreElimination"; - case LSEVisitor::Phase::kPartialElimination: - return oss << "kPartialElimination"; } } @@ -1580,10 +1394,8 @@ std::ostream& operator<<(std::ostream& os, const LSEVisitor::Value& v) { LSEVisitor::LSEVisitor(HGraph* graph, const HeapLocationCollector& heap_location_collector, - bool perform_partial_lse, OptimizingCompilerStats* stats) : HGraphDelegateVisitor(graph, stats), - perform_partial_lse_(perform_partial_lse), heap_location_collector_(heap_location_collector), allocator_(graph->GetArenaStack()), num_phi_placeholders_(GetGraph()->GetBlocks().size() * @@ -1613,10 +1425,6 @@ LSEVisitor::LSEVisitor(HGraph* graph, phi_placeholder_replacements_(num_phi_placeholders_, Value::Invalid(), allocator_.Adapter(kArenaAllocLSE)), - kept_merged_unknowns_(&allocator_, - /*start_bits=*/num_phi_placeholders_, - /*expandable=*/false, - kArenaAllocLSE), singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)), field_infos_(heap_location_collector_.GetNumberOfHeapLocations(), allocator_.Adapter(kArenaAllocLSE)), @@ -1856,8 +1664,7 @@ void LSEVisitor::MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value); DCHECK(!pred_value.IsPureUnknown()) << pred_value << " block " << current_block->GetBlockId() << " pred: " << predecessor->GetBlockId(); - if (pred_value.NeedsNonLoopPhi() || - (current_phase_ == Phase::kPartialElimination && pred_value.IsMergedUnknown())) { + if (pred_value.NeedsNonLoopPhi()) { // We need to process the Phi placeholder first. work_queue.push_back(pred_value.GetPhiPlaceholder()); } else if (pred_value.IsDefault()) { @@ -1888,12 +1695,6 @@ void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) { RecordFieldInfo(&instruction->GetFieldInfo(), idx); } DCHECK(record.value.IsUnknown() || record.value.Equals(ReplacementOrValue(record.value))); - // If we are unknown, we either come from somewhere untracked or we can reconstruct the partial - // value. - DCHECK(!record.value.IsPureUnknown() || - heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo() == nullptr || - !heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()->IsPartialSingleton()) - << "In " << GetGraph()->PrettyMethod() << ": " << record.value << " for " << *instruction; intermediate_values_.insert({instruction, record.value}); loads_and_stores_.push_back({ instruction, idx }); if ((record.value.IsDefault() || record.value.NeedsNonLoopPhi()) && @@ -2302,9 +2103,7 @@ bool LSEVisitor::MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_inde for (HBasicBlock* predecessor : block->GetPredecessors()) { Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value); if (value.NeedsNonLoopPhi()) { - DCHECK(current_phase_ == Phase::kLoadElimination || - current_phase_ == Phase::kPartialElimination) - << current_phase_; + DCHECK(current_phase_ == Phase::kLoadElimination) << current_phase_; MaterializeNonLoopPhis(value.GetPhiPlaceholder(), type); value = Replacement(value); } @@ -2765,22 +2564,9 @@ void LSEVisitor::SearchPhiPlaceholdersForKeptStores() { work_queue.push_back(index); } const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks(); - std::optional<ArenaBitVector> not_kept_stores; - if (stats_) { - not_kept_stores.emplace(GetGraph()->GetAllocator(), - kept_stores_.GetBitSizeOf(), - false, - ArenaAllocKind::kArenaAllocLSE); - } while (!work_queue.empty()) { uint32_t cur_phi_idx = work_queue.back(); PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(cur_phi_idx); - // Only writes to partial-escapes need to be specifically kept. - bool is_partial_kept_merged_unknown = - kept_merged_unknowns_.IsBitSet(cur_phi_idx) && - heap_location_collector_.GetHeapLocation(phi_placeholder.GetHeapLocation()) - ->GetReferenceInfo() - ->IsPartialSingleton(); work_queue.pop_back(); size_t idx = phi_placeholder.GetHeapLocation(); HBasicBlock* block = blocks[phi_placeholder.GetBlockId()]; @@ -2800,11 +2586,6 @@ void LSEVisitor::SearchPhiPlaceholdersForKeptStores() { if (!stored_by.IsUnknown() && (i == idx || MayAliasOnBackEdge(block, idx, i))) { if (stored_by.NeedsPhi()) { size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by); - if (is_partial_kept_merged_unknown) { - // Propagate merged-unknown keep since otherwise this might look - // like a partial escape we can remove. - kept_merged_unknowns_.SetBit(phi_placeholder_index); - } if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) { phi_placeholders_to_search_for_kept_stores_.SetBit(phi_placeholder_index); work_queue.push_back(phi_placeholder_index); @@ -2815,24 +2596,12 @@ void LSEVisitor::SearchPhiPlaceholdersForKeptStores() { DCHECK(ri != nullptr) << "No heap value for " << stored_by.GetInstruction()->DebugName() << " id: " << stored_by.GetInstruction()->GetId() << " block: " << stored_by.GetInstruction()->GetBlock()->GetBlockId(); - if (!is_partial_kept_merged_unknown && IsPartialNoEscape(predecessor, idx)) { - if (not_kept_stores) { - not_kept_stores->SetBit(stored_by.GetInstruction()->GetId()); - } - } else { - kept_stores_.SetBit(stored_by.GetInstruction()->GetId()); - } + kept_stores_.SetBit(stored_by.GetInstruction()->GetId()); } } } } } - if (not_kept_stores) { - // a - b := (a & ~b) - not_kept_stores->Subtract(&kept_stores_); - auto num_removed = not_kept_stores->NumSetBits(); - MaybeRecordStat(stats_, MethodCompilationStat::kPartialStoreRemoved, num_removed); - } } void LSEVisitor::UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record) { @@ -3022,934 +2791,8 @@ void LSEVisitor::Run() { // 4. Replace loads and remove unnecessary stores and singleton allocations. FinishFullLSE(); - - // 5. Move partial escapes down and fixup with PHIs. - current_phase_ = Phase::kPartialElimination; - MovePartialEscapes(); } -// Clear unknown loop-phi results. Here we'll be able to use partial-unknowns so we need to -// retry all of them with more information about where they come from. -void LSEVisitor::PrepareForPartialPhiComputation() { - std::replace_if( - phi_placeholder_replacements_.begin(), - phi_placeholder_replacements_.end(), - [](const Value& val) { return !val.IsDefault() && !val.IsInstruction(); }, - Value::Invalid()); -} - -class PartialLoadStoreEliminationHelper { - public: - PartialLoadStoreEliminationHelper(LSEVisitor* lse, ScopedArenaAllocator* alloc) - : lse_(lse), - alloc_(alloc), - new_ref_phis_(alloc_->Adapter(kArenaAllocLSE)), - heap_refs_(alloc_->Adapter(kArenaAllocLSE)), - max_preds_per_block_((*std::max_element(GetGraph()->GetActiveBlocks().begin(), - GetGraph()->GetActiveBlocks().end(), - [](HBasicBlock* a, HBasicBlock* b) { - return a->GetNumberOfPredecessors() < - b->GetNumberOfPredecessors(); - })) - ->GetNumberOfPredecessors()), - materialization_blocks_(GetGraph()->GetBlocks().size() * max_preds_per_block_, - nullptr, - alloc_->Adapter(kArenaAllocLSE)), - first_materialization_block_id_(GetGraph()->GetBlocks().size()) { - size_t num_partial_singletons = lse_->heap_location_collector_.CountPartialSingletons(); - heap_refs_.reserve(num_partial_singletons); - new_ref_phis_.reserve(num_partial_singletons * GetGraph()->GetBlocks().size()); - CollectInterestingHeapRefs(); - } - - ~PartialLoadStoreEliminationHelper() { - if (heap_refs_.empty()) { - return; - } - ReferenceTypePropagation rtp_fixup(GetGraph(), - Handle<mirror::DexCache>(), - /* is_first_run= */ false); - rtp_fixup.Visit(ArrayRef<HInstruction* const>(new_ref_phis_)); - GetGraph()->ClearLoopInformation(); - GetGraph()->ClearDominanceInformation(); - GetGraph()->ClearReachabilityInformation(); - GetGraph()->BuildDominatorTree(); - GetGraph()->ComputeReachabilityInformation(); - } - - class IdxToHeapLoc { - public: - explicit IdxToHeapLoc(const HeapLocationCollector* hlc) : collector_(hlc) {} - HeapLocation* operator()(size_t idx) const { - return collector_->GetHeapLocation(idx); - } - - private: - const HeapLocationCollector* collector_; - }; - - - class HeapReferenceData { - public: - using LocIterator = IterationRange<TransformIterator<BitVector::IndexIterator, IdxToHeapLoc>>; - HeapReferenceData(PartialLoadStoreEliminationHelper* helper, - HNewInstance* new_inst, - const ExecutionSubgraph* subgraph, - ScopedArenaAllocator* alloc) - : new_instance_(new_inst), - helper_(helper), - heap_locs_(alloc, - helper->lse_->heap_location_collector_.GetNumberOfHeapLocations(), - /* expandable= */ false, - kArenaAllocLSE), - materializations_( - // We generally won't need to create too many materialization blocks and we can expand - // this as needed so just start off with 2x. - 2 * helper->lse_->GetGraph()->GetBlocks().size(), - nullptr, - alloc->Adapter(kArenaAllocLSE)), - collector_(helper->lse_->heap_location_collector_), - subgraph_(subgraph) {} - - LocIterator IterateLocations() { - auto idxs = heap_locs_.Indexes(); - return MakeTransformRange(idxs, IdxToHeapLoc(&collector_)); - } - - void AddHeapLocation(size_t idx) { - heap_locs_.SetBit(idx); - } - - const ExecutionSubgraph* GetNoEscapeSubgraph() const { - return subgraph_; - } - - bool IsPostEscape(HBasicBlock* blk) { - return std::any_of( - subgraph_->GetExcludedCohorts().cbegin(), - subgraph_->GetExcludedCohorts().cend(), - [&](const ExecutionSubgraph::ExcludedCohort& ec) { return ec.PrecedesBlock(blk); }); - } - - bool InEscapeCohort(HBasicBlock* blk) { - return std::any_of( - subgraph_->GetExcludedCohorts().cbegin(), - subgraph_->GetExcludedCohorts().cend(), - [&](const ExecutionSubgraph::ExcludedCohort& ec) { return ec.ContainsBlock(blk); }); - } - - bool BeforeAllEscapes(HBasicBlock* b) { - return std::none_of(subgraph_->GetExcludedCohorts().cbegin(), - subgraph_->GetExcludedCohorts().cend(), - [&](const ExecutionSubgraph::ExcludedCohort& ec) { - return ec.PrecedesBlock(b) || ec.ContainsBlock(b); - }); - } - - HNewInstance* OriginalNewInstance() const { - return new_instance_; - } - - // Collect and replace all uses. We need to perform this twice since we will - // generate PHIs and additional uses as we create the default-values for - // pred-gets. These values might be other references that are also being - // partially eliminated. By running just the replacement part again we are - // able to avoid having to keep another whole in-progress partial map - // around. Since we will have already handled all the other uses in the - // first pass the second one will be quite fast. - void FixupUses(bool first_pass) { - ScopedArenaAllocator saa(GetGraph()->GetArenaStack()); - // Replace uses with materialized values. - ScopedArenaVector<InstructionUse<HInstruction>> to_replace(saa.Adapter(kArenaAllocLSE)); - ScopedArenaVector<HInstruction*> to_remove(saa.Adapter(kArenaAllocLSE)); - // Do we need to add a constructor-fence. - ScopedArenaVector<InstructionUse<HConstructorFence>> constructor_fences( - saa.Adapter(kArenaAllocLSE)); - ScopedArenaVector<InstructionUse<HInstruction>> to_predicate(saa.Adapter(kArenaAllocLSE)); - - CollectReplacements(to_replace, to_remove, constructor_fences, to_predicate); - - if (!first_pass) { - // If another partial creates new references they can only be in Phis or pred-get defaults - // so they must be in the to_replace group. - DCHECK(to_predicate.empty()); - DCHECK(constructor_fences.empty()); - DCHECK(to_remove.empty()); - } - - ReplaceInput(to_replace); - RemoveAndReplaceInputs(to_remove); - CreateConstructorFences(constructor_fences); - PredicateInstructions(to_predicate); - - CHECK(OriginalNewInstance()->GetUses().empty()) - << OriginalNewInstance()->GetUses() << ", " << OriginalNewInstance()->GetEnvUses(); - } - - void AddMaterialization(HBasicBlock* blk, HInstruction* ins) { - if (blk->GetBlockId() >= materializations_.size()) { - // Make sure the materialization array is large enough, try to avoid - // re-sizing too many times by giving extra space. - materializations_.resize(blk->GetBlockId() * 2, nullptr); - } - DCHECK(materializations_[blk->GetBlockId()] == nullptr) - << "Already have a materialization in block " << blk->GetBlockId() << ": " - << *materializations_[blk->GetBlockId()] << " when trying to set materialization to " - << *ins; - materializations_[blk->GetBlockId()] = ins; - LSE_VLOG << "In block " << blk->GetBlockId() << " materialization is " << *ins; - helper_->NotifyNewMaterialization(ins); - } - - bool HasMaterialization(HBasicBlock* blk) const { - return blk->GetBlockId() < materializations_.size() && - materializations_[blk->GetBlockId()] != nullptr; - } - - HInstruction* GetMaterialization(HBasicBlock* blk) const { - if (materializations_.size() <= blk->GetBlockId() || - materializations_[blk->GetBlockId()] == nullptr) { - // This must be a materialization block added after the partial LSE of - // the current reference finished. Since every edge can only have at - // most one materialization block added to it we can just check the - // blocks predecessor. - DCHECK(helper_->IsMaterializationBlock(blk)); - blk = helper_->FindDominatingNonMaterializationBlock(blk); - DCHECK(!helper_->IsMaterializationBlock(blk)); - } - DCHECK_GT(materializations_.size(), blk->GetBlockId()); - DCHECK(materializations_[blk->GetBlockId()] != nullptr); - return materializations_[blk->GetBlockId()]; - } - - void GenerateMaterializationValueFromPredecessors(HBasicBlock* blk) { - DCHECK(std::none_of(GetNoEscapeSubgraph()->GetExcludedCohorts().begin(), - GetNoEscapeSubgraph()->GetExcludedCohorts().end(), - [&](const ExecutionSubgraph::ExcludedCohort& cohort) { - return cohort.IsEntryBlock(blk); - })); - DCHECK(!HasMaterialization(blk)); - if (blk->IsExitBlock()) { - return; - } else if (blk->IsLoopHeader()) { - // See comment in execution_subgraph.h. Currently we act as though every - // allocation for partial elimination takes place in the entry block. - // This simplifies the analysis by making it so any escape cohort - // expands to contain any loops it is a part of. This is something that - // we should rectify at some point. In either case however we can still - // special case the loop-header since (1) currently the loop can't have - // any merges between different cohort entries since the pre-header will - // be the earliest place entry can happen and (2) even if the analysis - // is improved to consider lifetime of the object WRT loops any values - // which would require loop-phis would have to make the whole loop - // escape anyway. - // This all means we can always use value from the pre-header when the - // block is the loop-header and we didn't already create a - // materialization block. (NB when we do improve the analysis we will - // need to modify the materialization creation code to deal with this - // correctly.) - HInstruction* pre_header_val = - GetMaterialization(blk->GetLoopInformation()->GetPreHeader()); - AddMaterialization(blk, pre_header_val); - return; - } - ScopedArenaAllocator saa(GetGraph()->GetArenaStack()); - ScopedArenaVector<HInstruction*> pred_vals(saa.Adapter(kArenaAllocLSE)); - pred_vals.reserve(blk->GetNumberOfPredecessors()); - for (HBasicBlock* pred : blk->GetPredecessors()) { - DCHECK(HasMaterialization(pred)); - pred_vals.push_back(GetMaterialization(pred)); - } - GenerateMaterializationValueFromPredecessorsDirect(blk, pred_vals); - } - - void GenerateMaterializationValueFromPredecessorsForEntry( - HBasicBlock* entry, const ScopedArenaVector<HInstruction*>& pred_vals) { - DCHECK(std::any_of(GetNoEscapeSubgraph()->GetExcludedCohorts().begin(), - GetNoEscapeSubgraph()->GetExcludedCohorts().end(), - [&](const ExecutionSubgraph::ExcludedCohort& cohort) { - return cohort.IsEntryBlock(entry); - })); - GenerateMaterializationValueFromPredecessorsDirect(entry, pred_vals); - } - - private: - template <typename InstructionType> - struct InstructionUse { - InstructionType* instruction_; - size_t index_; - }; - - void ReplaceInput(const ScopedArenaVector<InstructionUse<HInstruction>>& to_replace) { - for (auto& [ins, idx] : to_replace) { - HInstruction* merged_inst = GetMaterialization(ins->GetBlock()); - if (ins->IsPhi() && merged_inst->IsPhi() && ins->GetBlock() == merged_inst->GetBlock()) { - // Phis we just pass through the appropriate inputs. - ins->ReplaceInput(merged_inst->InputAt(idx), idx); - } else { - ins->ReplaceInput(merged_inst, idx); - } - } - } - - void RemoveAndReplaceInputs(const ScopedArenaVector<HInstruction*>& to_remove) { - for (HInstruction* ins : to_remove) { - if (ins->GetBlock() == nullptr) { - // Already dealt with. - continue; - } - DCHECK(BeforeAllEscapes(ins->GetBlock())) << *ins; - if (ins->IsInstanceFieldGet() || ins->IsInstanceFieldSet()) { - bool instruction_has_users = - ins->IsInstanceFieldGet() && (!ins->GetUses().empty() || !ins->GetEnvUses().empty()); - if (instruction_has_users) { - // Make sure any remaining users of read are replaced. - HInstruction* replacement = - helper_->lse_->GetPartialValueAt(OriginalNewInstance(), ins); - // NB ReplaceInput will remove a use from the list so this is - // guaranteed to finish eventually. - while (!ins->GetUses().empty()) { - const HUseListNode<HInstruction*>& use = ins->GetUses().front(); - use.GetUser()->ReplaceInput(replacement, use.GetIndex()); - } - while (!ins->GetEnvUses().empty()) { - const HUseListNode<HEnvironment*>& use = ins->GetEnvUses().front(); - use.GetUser()->ReplaceInput(replacement, use.GetIndex()); - } - } else { - DCHECK(ins->GetUses().empty()) - << "Instruction has users!\n" - << ins->DumpWithArgs() << "\nUsers are " << ins->GetUses(); - DCHECK(ins->GetEnvUses().empty()) - << "Instruction has users!\n" - << ins->DumpWithArgs() << "\nUsers are " << ins->GetEnvUses(); - } - ins->GetBlock()->RemoveInstruction(ins); - } else { - // Can only be obj == other, obj != other, obj == obj (!?) or, obj != obj (!?) - // Since PHIs are escapes as far as LSE is concerned and we are before - // any escapes these are the only 4 options. - DCHECK(ins->IsEqual() || ins->IsNotEqual()) << *ins; - HInstruction* replacement; - if (UNLIKELY(ins->InputAt(0) == ins->InputAt(1))) { - replacement = ins->IsEqual() ? GetGraph()->GetIntConstant(1) - : GetGraph()->GetIntConstant(0); - } else { - replacement = ins->IsEqual() ? GetGraph()->GetIntConstant(0) - : GetGraph()->GetIntConstant(1); - } - ins->ReplaceWith(replacement); - ins->GetBlock()->RemoveInstruction(ins); - } - } - } - - void CreateConstructorFences( - const ScopedArenaVector<InstructionUse<HConstructorFence>>& constructor_fences) { - if (!constructor_fences.empty()) { - uint32_t pc = constructor_fences.front().instruction_->GetDexPc(); - for (auto& [cf, idx] : constructor_fences) { - if (cf->GetInputs().size() == 1) { - cf->GetBlock()->RemoveInstruction(cf); - } else { - cf->RemoveInputAt(idx); - } - } - for (const ExecutionSubgraph::ExcludedCohort& ec : - GetNoEscapeSubgraph()->GetExcludedCohorts()) { - for (HBasicBlock* blk : ec.EntryBlocks()) { - for (HBasicBlock* materializer : - Filter(MakeIterationRange(blk->GetPredecessors()), - [&](HBasicBlock* blk) { return helper_->IsMaterializationBlock(blk); })) { - HInstruction* new_cf = new (GetGraph()->GetAllocator()) HConstructorFence( - GetMaterialization(materializer), pc, GetGraph()->GetAllocator()); - materializer->InsertInstructionBefore(new_cf, materializer->GetLastInstruction()); - } - } - } - } - } - - void PredicateInstructions( - const ScopedArenaVector<InstructionUse<HInstruction>>& to_predicate) { - for (auto& [ins, idx] : to_predicate) { - if (UNLIKELY(ins->GetBlock() == nullptr)) { - // Already handled due to obj == obj; - continue; - } else if (ins->IsInstanceFieldGet()) { - // IFieldGet[obj] => PredicatedIFieldGet[PartialValue, obj] - HInstruction* new_fget = new (GetGraph()->GetAllocator()) HPredicatedInstanceFieldGet( - ins->AsInstanceFieldGet(), - GetMaterialization(ins->GetBlock()), - helper_->lse_->GetPartialValueAt(OriginalNewInstance(), ins)); - MaybeRecordStat(helper_->lse_->stats_, MethodCompilationStat::kPredicatedLoadAdded); - ins->GetBlock()->InsertInstructionBefore(new_fget, ins); - if (ins->GetType() == DataType::Type::kReference) { - // Reference info is the same - new_fget->SetReferenceTypeInfoIfValid(ins->GetReferenceTypeInfo()); - } - // In this phase, substitute instructions are used only for the predicated get - // default values which are used only if the partial singleton did not escape, - // so the out value of the `new_fget` for the relevant cases is the same as - // the default value. - // TODO: Use the default value for materializing default values used by - // other predicated loads to avoid some unnecessary Phis. (This shall - // complicate the search for replacement in `ReplacementOrValue()`.) - DCHECK(helper_->lse_->substitute_instructions_for_loads_[ins->GetId()] == nullptr); - helper_->lse_->substitute_instructions_for_loads_[ins->GetId()] = new_fget; - ins->ReplaceWith(new_fget); - ins->ReplaceEnvUsesDominatedBy(ins, new_fget); - CHECK(ins->GetEnvUses().empty() && ins->GetUses().empty()) - << "Instruction: " << *ins << " uses: " << ins->GetUses() - << ", env: " << ins->GetEnvUses(); - ins->GetBlock()->RemoveInstruction(ins); - } else if (ins->IsInstanceFieldSet()) { - // Any predicated sets shouldn't require movement. - ins->AsInstanceFieldSet()->SetIsPredicatedSet(); - MaybeRecordStat(helper_->lse_->stats_, MethodCompilationStat::kPredicatedStoreAdded); - HInstruction* merged_inst = GetMaterialization(ins->GetBlock()); - ins->ReplaceInput(merged_inst, idx); - } else { - // comparisons need to be split into 2. - DCHECK(ins->IsEqual() || ins->IsNotEqual()) << "bad instruction " << *ins; - bool this_is_first = idx == 0; - if (ins->InputAt(0) == ins->InputAt(1)) { - // This is a obj == obj or obj != obj. - // No idea why anyone would do this but whatever. - ins->ReplaceWith(GetGraph()->GetIntConstant(ins->IsEqual() ? 1 : 0)); - ins->GetBlock()->RemoveInstruction(ins); - continue; - } else { - HInstruction* is_escaped = new (GetGraph()->GetAllocator()) - HNotEqual(GetMaterialization(ins->GetBlock()), GetGraph()->GetNullConstant()); - HInstruction* combine_inst = - ins->IsEqual() ? static_cast<HInstruction*>(new (GetGraph()->GetAllocator()) HAnd( - DataType::Type::kBool, is_escaped, ins)) - : static_cast<HInstruction*>(new (GetGraph()->GetAllocator()) HOr( - DataType::Type::kBool, is_escaped, ins)); - ins->ReplaceInput(GetMaterialization(ins->GetBlock()), this_is_first ? 0 : 1); - ins->GetBlock()->InsertInstructionBefore(is_escaped, ins); - ins->GetBlock()->InsertInstructionAfter(combine_inst, ins); - ins->ReplaceWith(combine_inst); - combine_inst->ReplaceInput(ins, 1); - } - } - } - } - - // Figure out all the instructions we need to - // fixup/replace/remove/duplicate. Since this requires an iteration of an - // intrusive linked list we want to do it only once and collect all the data - // here. - void CollectReplacements( - ScopedArenaVector<InstructionUse<HInstruction>>& to_replace, - ScopedArenaVector<HInstruction*>& to_remove, - ScopedArenaVector<InstructionUse<HConstructorFence>>& constructor_fences, - ScopedArenaVector<InstructionUse<HInstruction>>& to_predicate) { - size_t size = new_instance_->GetUses().SizeSlow(); - to_replace.reserve(size); - to_remove.reserve(size); - constructor_fences.reserve(size); - to_predicate.reserve(size); - for (auto& use : new_instance_->GetUses()) { - HBasicBlock* blk = - helper_->FindDominatingNonMaterializationBlock(use.GetUser()->GetBlock()); - if (InEscapeCohort(blk)) { - LSE_VLOG << "Replacing " << *new_instance_ << " use in " << *use.GetUser() << " with " - << *GetMaterialization(blk); - to_replace.push_back({use.GetUser(), use.GetIndex()}); - } else if (IsPostEscape(blk)) { - LSE_VLOG << "User " << *use.GetUser() << " after escapes!"; - // The fields + cmp are normal uses. Phi can only be here if it was - // generated by full LSE so whatever store+load that created the phi - // is the escape. - if (use.GetUser()->IsPhi()) { - to_replace.push_back({use.GetUser(), use.GetIndex()}); - } else { - DCHECK(use.GetUser()->IsFieldAccess() || - use.GetUser()->IsEqual() || - use.GetUser()->IsNotEqual()) - << *use.GetUser() << "@" << use.GetIndex(); - to_predicate.push_back({use.GetUser(), use.GetIndex()}); - } - } else if (use.GetUser()->IsConstructorFence()) { - LSE_VLOG << "User " << *use.GetUser() << " being moved to materialization!"; - constructor_fences.push_back({use.GetUser()->AsConstructorFence(), use.GetIndex()}); - } else { - LSE_VLOG << "User " << *use.GetUser() << " not contained in cohort!"; - to_remove.push_back(use.GetUser()); - } - } - DCHECK_EQ( - to_replace.size() + to_remove.size() + constructor_fences.size() + to_predicate.size(), - size); - } - - void GenerateMaterializationValueFromPredecessorsDirect( - HBasicBlock* blk, const ScopedArenaVector<HInstruction*>& pred_vals) { - DCHECK(!pred_vals.empty()); - bool all_equal = std::all_of(pred_vals.begin() + 1, pred_vals.end(), [&](HInstruction* val) { - return val == pred_vals.front(); - }); - if (LIKELY(all_equal)) { - AddMaterialization(blk, pred_vals.front()); - } else { - // Make a PHI for the predecessors. - HPhi* phi = new (GetGraph()->GetAllocator()) HPhi( - GetGraph()->GetAllocator(), kNoRegNumber, pred_vals.size(), DataType::Type::kReference); - for (const auto& [ins, off] : ZipCount(MakeIterationRange(pred_vals))) { - phi->SetRawInputAt(off, ins); - } - blk->AddPhi(phi); - AddMaterialization(blk, phi); - } - } - - HGraph* GetGraph() const { - return helper_->GetGraph(); - } - - HNewInstance* new_instance_; - PartialLoadStoreEliminationHelper* helper_; - ArenaBitVector heap_locs_; - ScopedArenaVector<HInstruction*> materializations_; - const HeapLocationCollector& collector_; - const ExecutionSubgraph* subgraph_; - }; - - ArrayRef<HeapReferenceData> GetHeapRefs() { - return ArrayRef<HeapReferenceData>(heap_refs_); - } - - bool IsMaterializationBlock(HBasicBlock* blk) const { - return blk->GetBlockId() >= first_materialization_block_id_; - } - - HBasicBlock* GetOrCreateMaterializationBlock(HBasicBlock* entry, size_t pred_num) { - size_t idx = GetMaterializationBlockIndex(entry, pred_num); - HBasicBlock* blk = materialization_blocks_[idx]; - if (blk == nullptr) { - blk = new (GetGraph()->GetAllocator()) HBasicBlock(GetGraph()); - GetGraph()->AddBlock(blk); - LSE_VLOG << "creating materialization block " << blk->GetBlockId() << " on edge " - << entry->GetPredecessors()[pred_num]->GetBlockId() << "->" << entry->GetBlockId(); - blk->AddInstruction(new (GetGraph()->GetAllocator()) HGoto()); - materialization_blocks_[idx] = blk; - } - return blk; - } - - HBasicBlock* GetMaterializationBlock(HBasicBlock* entry, size_t pred_num) { - HBasicBlock* out = materialization_blocks_[GetMaterializationBlockIndex(entry, pred_num)]; - DCHECK(out != nullptr) << "No materialization block for edge " << entry->GetBlockId() << "->" - << entry->GetPredecessors()[pred_num]->GetBlockId(); - return out; - } - - IterationRange<ArenaVector<HBasicBlock*>::const_iterator> IterateMaterializationBlocks() { - return MakeIterationRange(GetGraph()->GetBlocks().begin() + first_materialization_block_id_, - GetGraph()->GetBlocks().end()); - } - - void FixupPartialObjectUsers() { - for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : GetHeapRefs()) { - // Use the materialized instances to replace original instance - ref_data.FixupUses(/*first_pass=*/true); - CHECK(ref_data.OriginalNewInstance()->GetUses().empty()) - << ref_data.OriginalNewInstance()->GetUses() << ", " - << ref_data.OriginalNewInstance()->GetEnvUses(); - } - // This can cause new uses to be created due to the creation of phis/pred-get defaults - for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : GetHeapRefs()) { - // Only need to handle new phis/pred-get defaults. DCHECK that's all we find. - ref_data.FixupUses(/*first_pass=*/false); - CHECK(ref_data.OriginalNewInstance()->GetUses().empty()) - << ref_data.OriginalNewInstance()->GetUses() << ", " - << ref_data.OriginalNewInstance()->GetEnvUses(); - } - } - - // Finds the first block which either is or dominates the given block which is - // not a materialization block - HBasicBlock* FindDominatingNonMaterializationBlock(HBasicBlock* blk) { - if (LIKELY(!IsMaterializationBlock(blk))) { - // Not a materialization block so itself. - return blk; - } else if (blk->GetNumberOfPredecessors() != 0) { - // We're far enough along that the materialization blocks have been - // inserted into the graph so no need to go searching. - return blk->GetSinglePredecessor(); - } - // Search through the materialization blocks to find where it will be - // inserted. - for (auto [mat, idx] : ZipCount(MakeIterationRange(materialization_blocks_))) { - if (mat == blk) { - size_t cur_pred_idx = idx % max_preds_per_block_; - HBasicBlock* entry = GetGraph()->GetBlocks()[idx / max_preds_per_block_]; - return entry->GetPredecessors()[cur_pred_idx]; - } - } - LOG(FATAL) << "Unable to find materialization block position for " << blk->GetBlockId() << "!"; - return nullptr; - } - - void InsertMaterializationBlocks() { - for (auto [mat, idx] : ZipCount(MakeIterationRange(materialization_blocks_))) { - if (mat == nullptr) { - continue; - } - size_t cur_pred_idx = idx % max_preds_per_block_; - HBasicBlock* entry = GetGraph()->GetBlocks()[idx / max_preds_per_block_]; - HBasicBlock* pred = entry->GetPredecessors()[cur_pred_idx]; - mat->InsertBetween(pred, entry); - LSE_VLOG << "Adding materialization block " << mat->GetBlockId() << " on edge " - << pred->GetBlockId() << "->" << entry->GetBlockId(); - } - } - - // Replace any env-uses remaining of the partial singletons with the - // appropriate phis and remove the instructions. - void RemoveReplacedInstructions() { - for (HeapReferenceData& ref_data : GetHeapRefs()) { - CHECK(ref_data.OriginalNewInstance()->GetUses().empty()) - << ref_data.OriginalNewInstance()->GetUses() << ", " - << ref_data.OriginalNewInstance()->GetEnvUses() - << " inst is: " << ref_data.OriginalNewInstance(); - const auto& env_uses = ref_data.OriginalNewInstance()->GetEnvUses(); - while (!env_uses.empty()) { - const HUseListNode<HEnvironment*>& use = env_uses.front(); - HInstruction* merged_inst = - ref_data.GetMaterialization(use.GetUser()->GetHolder()->GetBlock()); - LSE_VLOG << "Replacing env use of " << *use.GetUser()->GetHolder() << "@" << use.GetIndex() - << " with " << *merged_inst; - use.GetUser()->ReplaceInput(merged_inst, use.GetIndex()); - } - ref_data.OriginalNewInstance()->GetBlock()->RemoveInstruction(ref_data.OriginalNewInstance()); - } - } - - // We need to make sure any allocations dominate their environment uses. - // Technically we could probably remove the env-uses and be fine but this is easy. - void ReorderMaterializationsForEnvDominance() { - for (HBasicBlock* blk : IterateMaterializationBlocks()) { - ScopedArenaAllocator alloc(alloc_->GetArenaStack()); - ArenaBitVector still_unsorted( - &alloc, GetGraph()->GetCurrentInstructionId(), false, kArenaAllocLSE); - // This is guaranteed to be very short (since we will abandon LSE if there - // are >= kMaxNumberOfHeapLocations (32) heap locations so that is the - // absolute maximum size this list can be) so doing a selection sort is - // fine. This avoids the need to do a complicated recursive check to - // ensure transitivity for std::sort. - ScopedArenaVector<HNewInstance*> materializations(alloc.Adapter(kArenaAllocLSE)); - materializations.reserve(GetHeapRefs().size()); - for (HInstruction* ins : - MakeSTLInstructionIteratorRange(HInstructionIterator(blk->GetInstructions()))) { - if (ins->IsNewInstance()) { - materializations.push_back(ins->AsNewInstance()); - still_unsorted.SetBit(ins->GetId()); - } - } - using Iter = ScopedArenaVector<HNewInstance*>::iterator; - Iter unsorted_start = materializations.begin(); - Iter unsorted_end = materializations.end(); - // selection sort. Required since the only check we can easily perform a - // is-before-all-unsorted check. - while (unsorted_start != unsorted_end) { - bool found_instruction = false; - for (Iter candidate = unsorted_start; candidate != unsorted_end; ++candidate) { - HNewInstance* ni = *candidate; - if (std::none_of(ni->GetAllEnvironments().cbegin(), - ni->GetAllEnvironments().cend(), - [&](const HEnvironment* env) { - return std::any_of( - env->GetEnvInputs().cbegin(), - env->GetEnvInputs().cend(), - [&](const HInstruction* env_element) { - return env_element != nullptr && - still_unsorted.IsBitSet(env_element->GetId()); - }); - })) { - still_unsorted.ClearBit(ni->GetId()); - std::swap(*unsorted_start, *candidate); - ++unsorted_start; - found_instruction = true; - break; - } - } - CHECK(found_instruction) << "Unable to select next materialization instruction." - << " Environments have a dependency loop!"; - } - // Reverse so we as we prepend them we end up with the correct order. - auto reverse_iter = MakeIterationRange(materializations.rbegin(), materializations.rend()); - for (HNewInstance* ins : reverse_iter) { - if (blk->GetFirstInstruction() != ins) { - // Don't do checks since that makes sure the move is safe WRT - // ins->CanBeMoved which for NewInstance is false. - ins->MoveBefore(blk->GetFirstInstruction(), /*do_checks=*/false); - } - } - } - } - - private: - void CollectInterestingHeapRefs() { - // Get all the partials we need to move around. - for (size_t i = 0; i < lse_->heap_location_collector_.GetNumberOfHeapLocations(); ++i) { - ReferenceInfo* ri = lse_->heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); - if (ri->IsPartialSingleton() && - ri->GetReference()->GetBlock() != nullptr && - ri->GetNoEscapeSubgraph()->ContainsBlock(ri->GetReference()->GetBlock())) { - RecordHeapRefField(ri->GetReference()->AsNewInstance(), i); - } - } - } - - void RecordHeapRefField(HNewInstance* ni, size_t loc) { - DCHECK(ni != nullptr); - // This is likely to be very short so just do a linear search. - auto it = std::find_if(heap_refs_.begin(), heap_refs_.end(), [&](HeapReferenceData& data) { - return data.OriginalNewInstance() == ni; - }); - HeapReferenceData& cur_ref = - (it == heap_refs_.end()) - ? heap_refs_.emplace_back(this, - ni, - lse_->heap_location_collector_.GetHeapLocation(loc) - ->GetReferenceInfo() - ->GetNoEscapeSubgraph(), - alloc_) - : *it; - cur_ref.AddHeapLocation(loc); - } - - - void NotifyNewMaterialization(HInstruction* ins) { - if (ins->IsPhi()) { - new_ref_phis_.push_back(ins->AsPhi()); - } - } - - size_t GetMaterializationBlockIndex(HBasicBlock* blk, size_t pred_num) const { - DCHECK_LT(blk->GetBlockId(), first_materialization_block_id_) - << "block is a materialization block!"; - DCHECK_LT(pred_num, max_preds_per_block_); - return blk->GetBlockId() * max_preds_per_block_ + pred_num; - } - - HGraph* GetGraph() const { - return lse_->GetGraph(); - } - - LSEVisitor* lse_; - ScopedArenaAllocator* alloc_; - ScopedArenaVector<HInstruction*> new_ref_phis_; - ScopedArenaVector<HeapReferenceData> heap_refs_; - size_t max_preds_per_block_; - // An array of (# of non-materialization blocks) * max_preds_per_block - // arranged in block-id major order. Since we can only have at most one - // materialization block on each edge this is the maximum possible number of - // materialization blocks. - ScopedArenaVector<HBasicBlock*> materialization_blocks_; - size_t first_materialization_block_id_; - - friend void LSEVisitor::MovePartialEscapes(); -}; - -// Work around c++ type checking annoyances with not being able to forward-declare inner types. -class HeapRefHolder - : public std::reference_wrapper<PartialLoadStoreEliminationHelper::HeapReferenceData> {}; - -HInstruction* LSEVisitor::SetupPartialMaterialization(PartialLoadStoreEliminationHelper& helper, - HeapRefHolder&& holder, - size_t pred_idx, - HBasicBlock* entry) { - PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data = holder.get(); - HBasicBlock* old_pred = entry->GetPredecessors()[pred_idx]; - HInstruction* new_inst = ref_data.OriginalNewInstance(); - if (UNLIKELY(!new_inst->GetBlock()->Dominates(entry))) { - LSE_VLOG << "Initial materialization in non-dominating block " << entry->GetBlockId() - << " is null!"; - return GetGraph()->GetNullConstant(); - } - HBasicBlock* bb = helper.GetOrCreateMaterializationBlock(entry, pred_idx); - CHECK(bb != nullptr) << "entry " << entry->GetBlockId() << " -> " << old_pred->GetBlockId(); - HNewInstance* repl_create = new_inst->Clone(GetGraph()->GetAllocator())->AsNewInstance(); - repl_create->SetPartialMaterialization(); - bb->InsertInstructionBefore(repl_create, bb->GetLastInstruction()); - repl_create->CopyEnvironmentFrom(new_inst->GetEnvironment()); - MaybeRecordStat(stats_, MethodCompilationStat::kPartialAllocationMoved); - LSE_VLOG << "In blk " << bb->GetBlockId() << " initial materialization is " << *repl_create; - ref_data.AddMaterialization(bb, repl_create); - const FieldInfo* info = nullptr; - for (const HeapLocation* loc : ref_data.IterateLocations()) { - size_t loc_off = heap_location_collector_.GetHeapLocationIndex(loc); - info = field_infos_[loc_off]; - DCHECK(loc->GetIndex() == nullptr); - Value value = ReplacementOrValue(heap_values_for_[old_pred->GetBlockId()][loc_off].value); - if (value.NeedsLoopPhi() || value.IsMergedUnknown()) { - Value repl = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())]; - DCHECK(repl.IsDefault() || repl.IsInvalid() || repl.IsInstruction()) - << repl << " from " << value << " pred is " << old_pred->GetBlockId(); - if (!repl.IsInvalid()) { - value = repl; - } else { - FullyMaterializePhi(value.GetPhiPlaceholder(), info->GetFieldType()); - value = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())]; - } - } else if (value.NeedsNonLoopPhi()) { - Value repl = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())]; - DCHECK(repl.IsDefault() || repl.IsInvalid() || repl.IsInstruction()) - << repl << " from " << value << " pred is " << old_pred->GetBlockId(); - if (!repl.IsInvalid()) { - value = repl; - } else { - MaterializeNonLoopPhis(value.GetPhiPlaceholder(), info->GetFieldType()); - value = phi_placeholder_replacements_[PhiPlaceholderIndex(value.GetPhiPlaceholder())]; - } - } - DCHECK(value.IsDefault() || value.IsInstruction()) - << GetGraph()->PrettyMethod() << ": " << value; - - if (!value.IsDefault() && - // shadow$_klass_ doesn't need to be manually initialized. - MemberOffset(loc->GetOffset()) != mirror::Object::ClassOffset()) { - CHECK(info != nullptr); - HInstruction* set_value = - new (GetGraph()->GetAllocator()) HInstanceFieldSet(repl_create, - value.GetInstruction(), - field_infos_[loc_off]->GetField(), - loc->GetType(), - MemberOffset(loc->GetOffset()), - false, - field_infos_[loc_off]->GetFieldIndex(), - loc->GetDeclaringClassDefIndex(), - field_infos_[loc_off]->GetDexFile(), - 0u); - bb->InsertInstructionAfter(set_value, repl_create); - LSE_VLOG << "Adding " << *set_value << " for materialization setup!"; - } - } - return repl_create; -} - -HInstruction* LSEVisitor::GetPartialValueAt(HNewInstance* orig_new_inst, HInstruction* read) { - size_t loc = heap_location_collector_.GetFieldHeapLocation(orig_new_inst, &read->GetFieldInfo()); - Value pred = ReplacementOrValue(intermediate_values_.find(read)->second); - LSE_VLOG << "using " << pred << " as default value for " << *read; - if (pred.IsInstruction()) { - return pred.GetInstruction(); - } else if (pred.IsMergedUnknown() || pred.NeedsPhi()) { - FullyMaterializePhi(pred.GetPhiPlaceholder(), - heap_location_collector_.GetHeapLocation(loc)->GetType()); - HInstruction* res = Replacement(pred).GetInstruction(); - LSE_VLOG << pred << " materialized to " << res->DumpWithArgs(); - return res; - } else if (pred.IsDefault()) { - HInstruction* res = GetDefaultValue(read->GetType()); - LSE_VLOG << pred << " materialized to " << res->DumpWithArgs(); - return res; - } - LOG(FATAL) << "Unable to find unescaped value at " << read->DumpWithArgs() - << "! This should be impossible! Value is " << pred; - UNREACHABLE(); -} - -void LSEVisitor::MovePartialEscapes() { - if (!ShouldPerformPartialLSE()) { - return; - } - - ScopedArenaAllocator saa(allocator_.GetArenaStack()); - PartialLoadStoreEliminationHelper helper(this, &saa); - - // Since for PHIs we now will have more information (since we know the object - // hasn't escaped) we need to clear the old phi-replacements where we weren't - // able to find the value. - PrepareForPartialPhiComputation(); - - for (PartialLoadStoreEliminationHelper::HeapReferenceData& ref_data : helper.GetHeapRefs()) { - LSE_VLOG << "Creating materializations for " << *ref_data.OriginalNewInstance(); - // Setup entry and exit blocks. - for (const auto& excluded_cohort : ref_data.GetNoEscapeSubgraph()->GetExcludedCohorts()) { - // Setup materialization blocks. - for (HBasicBlock* entry : excluded_cohort.EntryBlocksReversePostOrder()) { - // Setup entries. - // TODO Assuming we correctly break critical edges every entry block - // must have only a single predecessor so we could just put all this - // stuff in there. OTOH simplifier can do it for us and this is simpler - // to implement - giving clean separation between the original graph and - // materialization blocks - so for now we might as well have these new - // blocks. - ScopedArenaAllocator pred_alloc(saa.GetArenaStack()); - ScopedArenaVector<HInstruction*> pred_vals(pred_alloc.Adapter(kArenaAllocLSE)); - pred_vals.reserve(entry->GetNumberOfPredecessors()); - for (const auto& [pred, pred_idx] : - ZipCount(MakeIterationRange(entry->GetPredecessors()))) { - DCHECK(!helper.IsMaterializationBlock(pred)); - if (excluded_cohort.IsEntryBlock(pred)) { - pred_vals.push_back(ref_data.GetMaterialization(pred)); - continue; - } else { - pred_vals.push_back(SetupPartialMaterialization(helper, {ref_data}, pred_idx, entry)); - } - } - ref_data.GenerateMaterializationValueFromPredecessorsForEntry(entry, pred_vals); - } - - // Setup exit block heap-values for later phi-generation. - for (HBasicBlock* exit : excluded_cohort.ExitBlocks()) { - // mark every exit of cohorts as having a value so we can easily - // materialize the PHIs. - // TODO By setting this we can easily use the normal MaterializeLoopPhis - // (via FullyMaterializePhis) in order to generate the default-values - // for predicated-gets. This has the unfortunate side effect of creating - // somewhat more phis than are really needed (in some cases). We really - // should try to eventually know that we can lower these PHIs to only - // the non-escaping value in cases where it is possible. Currently this - // is done to some extent in instruction_simplifier but we have more - // information here to do the right thing. - for (const HeapLocation* loc : ref_data.IterateLocations()) { - size_t loc_off = heap_location_collector_.GetHeapLocationIndex(loc); - // This Value::Default() is only used to fill in PHIs used as the - // default value for PredicatedInstanceFieldGets. The actual value - // stored there is meaningless since the Predicated-iget will use the - // actual field value instead on these paths. - heap_values_for_[exit->GetBlockId()][loc_off].value = Value::Default(); - } - } - } - - // string materialization through the graph. - // // Visit RPO to PHI the materialized object through the cohort. - for (HBasicBlock* blk : GetGraph()->GetReversePostOrder()) { - // NB This doesn't include materialization blocks. - DCHECK(!helper.IsMaterializationBlock(blk)) - << "Materialization blocks should not be in RPO yet."; - if (ref_data.HasMaterialization(blk)) { - continue; - } else if (ref_data.BeforeAllEscapes(blk)) { - ref_data.AddMaterialization(blk, GetGraph()->GetNullConstant()); - continue; - } else { - ref_data.GenerateMaterializationValueFromPredecessors(blk); - } - } - } - - // Once we've generated all the materializations we can update the users. - helper.FixupPartialObjectUsers(); - - // Actually put materialization blocks into the graph - helper.InsertMaterializationBlocks(); - - // Get rid of the original instructions. - helper.RemoveReplacedInstructions(); - - // Ensure everything is ordered correctly in the materialization blocks. This - // involves moving every NewInstance to the top and ordering them so that any - // required env-uses are correctly ordered. - helper.ReorderMaterializationsForEnvDominance(); -} void LSEVisitor::FinishFullLSE() { // Remove recorded load instructions that should be eliminated. @@ -4004,9 +2847,8 @@ class LSEVisitorWrapper : public DeletableArenaObject<kArenaAllocLSE> { public: LSEVisitorWrapper(HGraph* graph, const HeapLocationCollector& heap_location_collector, - bool perform_partial_lse, OptimizingCompilerStats* stats) - : lse_visitor_(graph, heap_location_collector, perform_partial_lse, stats) {} + : lse_visitor_(graph, heap_location_collector, stats) {} void Run() { lse_visitor_.Run(); @@ -4016,7 +2858,7 @@ class LSEVisitorWrapper : public DeletableArenaObject<kArenaAllocLSE> { LSEVisitor lse_visitor_; }; -bool LoadStoreElimination::Run(bool enable_partial_lse) { +bool LoadStoreElimination::Run() { if (graph_->IsDebuggable()) { // Debugger may set heap values or trigger deoptimization of callers. // Skip this optimization. @@ -4029,11 +2871,7 @@ bool LoadStoreElimination::Run(bool enable_partial_lse) { // O(1) though. graph_->ComputeReachabilityInformation(); ScopedArenaAllocator allocator(graph_->GetArenaStack()); - LoadStoreAnalysis lsa(graph_, - stats_, - &allocator, - enable_partial_lse ? LoadStoreAnalysisType::kFull - : LoadStoreAnalysisType::kBasic); + LoadStoreAnalysis lsa(graph_, stats_, &allocator); lsa.Run(); const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); if (heap_location_collector.GetNumberOfHeapLocations() == 0) { @@ -4041,8 +2879,15 @@ bool LoadStoreElimination::Run(bool enable_partial_lse) { return false; } - std::unique_ptr<LSEVisitorWrapper> lse_visitor(new (&allocator) LSEVisitorWrapper( - graph_, heap_location_collector, enable_partial_lse, stats_)); + // Currently load_store analysis can't handle predicated load/stores; specifically pairs of + // memory operations with different predicates. + // TODO: support predicated SIMD. + if (graph_->HasPredicatedSIMD()) { + return false; + } + + std::unique_ptr<LSEVisitorWrapper> lse_visitor( + new (&allocator) LSEVisitorWrapper(graph_, heap_location_collector, stats_)); lse_visitor->Run(); return true; } diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h index 42de803ebd..e77168547d 100644 --- a/compiler/optimizing/load_store_elimination.h +++ b/compiler/optimizing/load_store_elimination.h @@ -26,10 +26,6 @@ class SideEffectsAnalysis; class LoadStoreElimination : public HOptimization { public: - // Whether or not we should attempt partial Load-store-elimination which - // requires additional blocks and predicated instructions. - static constexpr bool kEnablePartialLSE = false; - // Controls whether to enable VLOG(compiler) logs explaining the transforms taking place. static constexpr bool kVerboseLoggingMode = false; @@ -38,12 +34,7 @@ class LoadStoreElimination : public HOptimization { const char* name = kLoadStoreEliminationPassName) : HOptimization(graph, name, stats) {} - bool Run() override { - return Run(kEnablePartialLSE); - } - - // Exposed for testing. - bool Run(bool enable_partial_lse); + bool Run(); static constexpr const char* kLoadStoreEliminationPassName = "load_store_elimination"; diff --git a/compiler/optimizing/load_store_elimination_test.cc b/compiler/optimizing/load_store_elimination_test.cc index 1ee109980f..0775051eb4 100644 --- a/compiler/optimizing/load_store_elimination_test.cc +++ b/compiler/optimizing/load_store_elimination_test.cc @@ -68,47 +68,27 @@ class LoadStoreEliminationTestBase : public SuperTest, public OptimizingUnitTest } } - void PerformLSE(bool with_partial = true) { + void PerformLSE() { graph_->BuildDominatorTree(); LoadStoreElimination lse(graph_, /*stats=*/nullptr); - lse.Run(with_partial); + lse.Run(); std::ostringstream oss; EXPECT_TRUE(CheckGraph(oss)) << oss.str(); } - void PerformLSEWithPartial(const AdjacencyListGraph& blks) { - // PerformLSE expects this to be empty. + void PerformLSE(const AdjacencyListGraph& blks) { + // PerformLSE expects this to be empty, and the creation of + // an `AdjacencyListGraph` computes it. graph_->ClearDominanceInformation(); if (kDebugLseTests) { LOG(INFO) << "Pre LSE " << blks; } - PerformLSE(/*with_partial=*/ true); + PerformLSE(); if (kDebugLseTests) { LOG(INFO) << "Post LSE " << blks; } } - void PerformLSENoPartial(const AdjacencyListGraph& blks) { - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - if (kDebugLseTests) { - LOG(INFO) << "Pre LSE " << blks; - } - PerformLSE(/*with_partial=*/ false); - if (kDebugLseTests) { - LOG(INFO) << "Post LSE " << blks; - } - } - - void PerformSimplifications(const AdjacencyListGraph& blks) { - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - if (kDebugLseTests) { - LOG(INFO) << "Post simplification " << blks; - } - } - // Create instructions shared among tests. void CreateEntryBlockInstructions() { HInstruction* c1 = graph_->GetIntConstant(1); @@ -327,190 +307,6 @@ std::ostream& operator<<(std::ostream& os, const TestOrder& ord) { } } -class OrderDependentTestGroup - : public LoadStoreEliminationTestBase<CommonCompilerTestWithParam<TestOrder>> {}; - -// Various configs we can use for testing. Currently used in PartialComparison tests. -struct PartialComparisonKind { - public: - enum class Type : uint8_t { kEquals, kNotEquals }; - enum class Target : uint8_t { kNull, kValue, kSelf }; - enum class Position : uint8_t { kLeft, kRight }; - - const Type type_; - const Target target_; - const Position position_; - - bool IsDefinitelyFalse() const { - return !IsPossiblyTrue(); - } - bool IsPossiblyFalse() const { - return !IsDefinitelyTrue(); - } - bool IsDefinitelyTrue() const { - if (target_ == Target::kSelf) { - return type_ == Type::kEquals; - } else if (target_ == Target::kNull) { - return type_ == Type::kNotEquals; - } else { - return false; - } - } - bool IsPossiblyTrue() const { - if (target_ == Target::kSelf) { - return type_ == Type::kEquals; - } else if (target_ == Target::kNull) { - return type_ == Type::kNotEquals; - } else { - return true; - } - } - std::ostream& Dump(std::ostream& os) const { - os << "PartialComparisonKind{" << (type_ == Type::kEquals ? "kEquals" : "kNotEquals") << ", " - << (target_ == Target::kNull ? "kNull" : (target_ == Target::kSelf ? "kSelf" : "kValue")) - << ", " << (position_ == Position::kLeft ? "kLeft" : "kRight") << "}"; - return os; - } -}; - -std::ostream& operator<<(std::ostream& os, const PartialComparisonKind& comp) { - return comp.Dump(os); -} - -class PartialComparisonTestGroup - : public LoadStoreEliminationTestBase<CommonCompilerTestWithParam<PartialComparisonKind>> { - public: - enum class ComparisonPlacement { - kBeforeEscape, - kInEscape, - kAfterEscape, - }; - void CheckFinalInstruction(HInstruction* ins, ComparisonPlacement placement) { - using Target = PartialComparisonKind::Target; - using Type = PartialComparisonKind::Type; - using Position = PartialComparisonKind::Position; - PartialComparisonKind kind = GetParam(); - if (ins->IsIntConstant()) { - if (kind.IsDefinitelyTrue()) { - EXPECT_TRUE(ins->AsIntConstant()->IsTrue()) << kind << " " << *ins; - } else if (kind.IsDefinitelyFalse()) { - EXPECT_TRUE(ins->AsIntConstant()->IsFalse()) << kind << " " << *ins; - } else { - EXPECT_EQ(placement, ComparisonPlacement::kBeforeEscape); - EXPECT_EQ(kind.target_, Target::kValue); - // We are before escape so value is not the object - if (kind.type_ == Type::kEquals) { - EXPECT_TRUE(ins->AsIntConstant()->IsFalse()) << kind << " " << *ins; - } else { - EXPECT_TRUE(ins->AsIntConstant()->IsTrue()) << kind << " " << *ins; - } - } - return; - } - EXPECT_NE(placement, ComparisonPlacement::kBeforeEscape) - << "For comparisons before escape we should always be able to transform into a constant." - << " Instead we got:" << std::endl << ins->DumpWithArgs(); - if (placement == ComparisonPlacement::kInEscape) { - // Should be the same type. - ASSERT_TRUE(ins->IsEqual() || ins->IsNotEqual()) << *ins; - HInstruction* other = kind.position_ == Position::kLeft ? ins->AsBinaryOperation()->GetRight() - : ins->AsBinaryOperation()->GetLeft(); - if (kind.target_ == Target::kSelf) { - EXPECT_INS_EQ(ins->AsBinaryOperation()->GetLeft(), ins->AsBinaryOperation()->GetRight()) - << " ins is: " << *ins; - } else if (kind.target_ == Target::kNull) { - EXPECT_INS_EQ(other, graph_->GetNullConstant()) << " ins is: " << *ins; - } else { - EXPECT_TRUE(other->IsStaticFieldGet()) << " ins is: " << *ins; - } - if (kind.type_ == Type::kEquals) { - EXPECT_TRUE(ins->IsEqual()) << *ins; - } else { - EXPECT_TRUE(ins->IsNotEqual()) << *ins; - } - } else { - ASSERT_EQ(placement, ComparisonPlacement::kAfterEscape); - if (kind.type_ == Type::kEquals) { - // obj == <anything> can only be true if (1) it's obj == obj or (2) obj has escaped. - ASSERT_TRUE(ins->IsAnd()) << ins->DumpWithArgs(); - EXPECT_TRUE(ins->InputAt(1)->IsEqual()) << ins->DumpWithArgs(); - } else { - // obj != <anything> is true if (2) obj has escaped. - ASSERT_TRUE(ins->IsOr()) << ins->DumpWithArgs(); - EXPECT_TRUE(ins->InputAt(1)->IsNotEqual()) << ins->DumpWithArgs(); - } - // Check the first part of AND is the obj-has-escaped - ASSERT_TRUE(ins->InputAt(0)->IsNotEqual()) << ins->DumpWithArgs(); - EXPECT_TRUE(ins->InputAt(0)->InputAt(0)->IsPhi()) << ins->DumpWithArgs(); - EXPECT_TRUE(ins->InputAt(0)->InputAt(1)->IsNullConstant()) << ins->DumpWithArgs(); - // Check the second part of AND is the eq other - EXPECT_INS_EQ(ins->InputAt(1)->InputAt(kind.position_ == Position::kLeft ? 0 : 1), - ins->InputAt(0)->InputAt(0)) - << ins->DumpWithArgs(); - } - } - - struct ComparisonInstructions { - void AddSetup(HBasicBlock* blk) const { - for (HInstruction* i : setup_instructions_) { - blk->AddInstruction(i); - } - } - - void AddEnvironment(HEnvironment* env) const { - for (HInstruction* i : setup_instructions_) { - if (i->NeedsEnvironment()) { - i->CopyEnvironmentFrom(env); - } - } - } - - const std::vector<HInstruction*> setup_instructions_; - HInstruction* const cmp_; - }; - - ComparisonInstructions GetComparisonInstructions(HInstruction* partial) { - PartialComparisonKind kind = GetParam(); - std::vector<HInstruction*> setup; - HInstruction* target_other; - switch (kind.target_) { - case PartialComparisonKind::Target::kSelf: - target_other = partial; - break; - case PartialComparisonKind::Target::kNull: - target_other = graph_->GetNullConstant(); - break; - case PartialComparisonKind::Target::kValue: { - HInstruction* cls = MakeClassLoad(); - HInstruction* static_read = - new (GetAllocator()) HStaticFieldGet(cls, - /* field= */ nullptr, - DataType::Type::kReference, - /* field_offset= */ MemberOffset(40), - /* is_volatile= */ false, - /* field_idx= */ 0, - /* declaring_class_def_index= */ 0, - graph_->GetDexFile(), - /* dex_pc= */ 0); - setup.push_back(cls); - setup.push_back(static_read); - target_other = static_read; - break; - } - } - HInstruction* target_left; - HInstruction* target_right; - std::tie(target_left, target_right) = kind.position_ == PartialComparisonKind::Position::kLeft - ? std::pair{partial, target_other} - : std::pair{target_other, partial}; - HInstruction* cmp = - kind.type_ == PartialComparisonKind::Type::kEquals - ? static_cast<HInstruction*>(new (GetAllocator()) HEqual(target_left, target_right)) - : static_cast<HInstruction*>(new (GetAllocator()) HNotEqual(target_left, target_right)); - return {setup, cmp}; - } -}; - TEST_F(LoadStoreEliminationTest, ArrayGetSetElimination) { CreateTestControlFlowGraph(); @@ -573,7 +369,8 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue2) { AddVecStore(entry_block_, array_, j_); HInstruction* vstore = AddVecStore(entry_block_, array_, i_); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vstore)); @@ -589,7 +386,8 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue3) { AddVecStore(entry_block_, array_, i_add1_); HInstruction* vstore = AddVecStore(entry_block_, array_, i_); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vstore)); @@ -634,7 +432,8 @@ TEST_F(LoadStoreEliminationTest, OverlappingLoadStore) { AddArraySet(entry_block_, array_, i_, c1); HInstruction* vload5 = AddVecLoad(entry_block_, array_, i_); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(load1)); @@ -668,7 +467,8 @@ TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithoutSideEffects) { // a[j] = 1; HInstruction* array_set = AddArraySet(return_block_, array_, j_, c1); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(array_set)); @@ -701,12 +501,13 @@ TEST_F(LoadStoreEliminationTest, StoreAfterSIMDLoopWithSideEffects) { // b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3]; AddVecStore(loop_, array_, phi_); HInstruction* vload = AddVecLoad(loop_, array_, phi_); - AddVecStore(loop_, array_b, phi_, vload->AsVecLoad()); + AddVecStore(loop_, array_b, phi_, vload); // a[j] = 0; HInstruction* a_set = AddArraySet(return_block_, array_, j_, c0); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(vload)); @@ -740,12 +541,13 @@ TEST_F(LoadStoreEliminationTest, LoadAfterSIMDLoopWithSideEffects) { // b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3]; AddVecStore(loop_, array_, phi_); HInstruction* vload = AddVecLoad(loop_, array_, phi_); - AddVecStore(loop_, array_b, phi_, vload->AsVecLoad()); + AddVecStore(loop_, array_b, phi_, vload); // x = a[j]; HInstruction* load = AddArrayGet(return_block_, array_, j_); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(vload)); @@ -786,7 +588,8 @@ TEST_F(LoadStoreEliminationTest, MergePredecessorVecStores) { // down: a[i,... i + 3] = [1,...1] HInstruction* vstore4 = AddVecStore(down, array_, i_, vdata); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(vstore2)); @@ -874,10 +677,11 @@ TEST_F(LoadStoreEliminationTest, RedundantVStoreVLoadInLoop) { // a[i,... i + 3] = [1,...1] HInstruction* vstore1 = AddVecStore(loop_, array_a, phi_); HInstruction* vload = AddVecLoad(loop_, array_a, phi_); - HInstruction* vstore2 = AddVecStore(loop_, array_b, phi_, vload->AsVecLoad()); + HInstruction* vstore2 = AddVecStore(loop_, array_b, phi_, vload); HInstruction* vstore3 = AddVecStore(loop_, array_a, phi_, vstore1->InputAt(2)); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vstore1)); @@ -963,9 +767,10 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueInLoopWithoutWriteSideEffects) // v = a[i,... i + 3] // array[0,... 3] = v HInstruction* vload = AddVecLoad(loop_, array_a, phi_); - HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); + HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload)); @@ -987,9 +792,10 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValue) { // v = a[0,... 3] // array[0,... 3] = v HInstruction* vload = AddVecLoad(pre_header_, array_a, c0); - HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); + HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload)); @@ -1063,10 +869,11 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValueInLoopWithoutWriteSideE // array[0] = v1 HInstruction* vload = AddVecLoad(loop_, array_a, phi_); HInstruction* load = AddArrayGet(loop_, array_a, phi_); - HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); + HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload); HInstruction* store = AddArraySet(return_block_, array_, c0, load); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload)); @@ -1094,10 +901,11 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValue) { // array[0] = v1 HInstruction* vload = AddVecLoad(pre_header_, array_a, c0); HInstruction* load = AddArrayGet(pre_header_, array_a, c0); - HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); + HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload); HInstruction* store = AddArraySet(return_block_, array_, c0, load); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload)); @@ -1126,10 +934,11 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoadInLoopWithoutWriteSide // array[128,... 131] = v1 HInstruction* vload1 = AddVecLoad(loop_, array_a, phi_); HInstruction* vload2 = AddVecLoad(loop_, array_a, phi_); - HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad()); - HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad()); + HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1); + HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload1)); @@ -1157,10 +966,11 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoad) { // array[128,... 131] = v1 HInstruction* vload1 = AddVecLoad(pre_header_, array_a, c0); HInstruction* vload2 = AddVecLoad(pre_header_, array_a, c0); - HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad()); - HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad()); + HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1); + HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2); - graph_->SetHasSIMD(true); + // TODO: enable LSE for graphs with predicated SIMD. + graph_->SetHasTraditionalSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload1)); @@ -2069,7 +1879,7 @@ TEST_F(LoadStoreEliminationTest, PartialUnknownMerge) { SetupExit(exit); - PerformLSENoPartial(blks); + PerformLSE(blks); EXPECT_INS_RETAINED(read_bottom); EXPECT_INS_RETAINED(write_c1); @@ -2084,84 +1894,6 @@ TEST_F(LoadStoreEliminationTest, PartialUnknownMerge) { // // LEFT // obj.field = 1; // call_func(obj); -// foo_r = obj.field -// } else { -// // TO BE ELIMINATED -// obj.field = 2; -// // RIGHT -// // TO BE ELIMINATED -// foo_l = obj.field; -// } -// EXIT -// return PHI(foo_l, foo_r) -TEST_F(LoadStoreEliminationTest, PartialLoadElimination) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit_REAL", - { { "entry", "left" }, - { "entry", "right" }, - { "left", "exit" }, - { "right", "exit" }, - { "exit", "exit_REAL" } })); - HBasicBlock* entry = blks.Get("entry"); - HBasicBlock* left = blks.Get("left"); - HBasicBlock* right = blks.Get("right"); - HBasicBlock* exit = blks.Get("exit"); - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c1 = graph_->GetIntConstant(1); - HInstruction* c2 = graph_->GetIntConstant(2); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_left = MakeIFieldSet(new_inst, c1, MemberOffset(32)); - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* read_left = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(16)); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(write_left); - left->AddInstruction(call_left); - left->AddInstruction(read_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(16)); - HInstruction* read_right = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(16)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(read_right); - right->AddInstruction(goto_right); - - HInstruction* phi_final = MakePhi({read_left, read_right}); - HInstruction* return_exit = new (GetAllocator()) HReturn(phi_final); - exit->AddPhi(phi_final->AsPhi()); - exit->AddInstruction(return_exit); - - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - PerformLSE(); - - ASSERT_TRUE(IsRemoved(read_right)); - ASSERT_FALSE(IsRemoved(read_left)); - ASSERT_FALSE(IsRemoved(phi_final)); - ASSERT_TRUE(phi_final->GetInputs()[1] == c2); - ASSERT_TRUE(phi_final->GetInputs()[0] == read_left); - ASSERT_TRUE(IsRemoved(write_right)); -} - -// // ENTRY -// obj = new Obj(); -// if (parameter_value) { -// // LEFT -// obj.field = 1; -// call_func(obj); // // We don't know what obj.field is now we aren't able to eliminate the read below! // } else { // // DO NOT ELIMINATE @@ -2217,7 +1949,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved) { exit->AddInstruction(read_bottom); exit->AddInstruction(return_exit); - PerformLSENoPartial(blks); + PerformLSE(blks); EXPECT_INS_RETAINED(read_bottom) << *read_bottom; EXPECT_INS_RETAINED(write_right) << *write_right; @@ -2308,7 +2040,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved2) { exit->AddInstruction(read_bottom); exit->AddInstruction(return_exit); - PerformLSENoPartial(blks); + PerformLSE(blks); EXPECT_INS_RETAINED(read_bottom); EXPECT_INS_RETAINED(write_right_first); @@ -2320,2090 +2052,6 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved2) { // if (parameter_value) { // // LEFT // // DO NOT ELIMINATE -// escape(obj); -// obj.field = 1; -// } else { -// // RIGHT -// // ELIMINATE -// obj.field = 2; -// } -// EXIT -// ELIMINATE -// return obj.field -TEST_F(LoadStoreEliminationTest, PartialLoadElimination2) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); -#undef GET_BLOCK - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c1 = graph_->GetIntConstant(1); - HInstruction* c2 = graph_->GetIntConstant(2); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* write_left = MakeIFieldSet(new_inst, c1, MemberOffset(32)); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left); - left->AddInstruction(write_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - PerformLSE(); - - EXPECT_INS_REMOVED(read_bottom); - EXPECT_INS_REMOVED(write_right); - EXPECT_INS_RETAINED(write_left); - EXPECT_INS_RETAINED(call_left); -} - -template<typename Iter, typename Func> -typename Iter::value_type FindOrNull(Iter begin, Iter end, Func func) { - static_assert(std::is_pointer_v<typename Iter::value_type>); - auto it = std::find_if(begin, end, func); - if (it == end) { - return nullptr; - } else { - return *it; - } -} - -// // ENTRY -// Obj new_inst = new Obj(); -// new_inst.foo = 12; -// Obj obj; -// Obj out; -// int first; -// if (param0) { -// // ESCAPE_ROUTE -// if (param1) { -// // LEFT_START -// if (param2) { -// // LEFT_LEFT -// obj = new_inst; -// } else { -// // LEFT_RIGHT -// obj = obj_param; -// } -// // LEFT_MERGE -// // technically the phi is enough to cause an escape but might as well be -// // thorough. -// // obj = phi[new_inst, param] -// escape(obj); -// out = obj; -// } else { -// // RIGHT -// out = obj_param; -// } -// // EXIT -// // Can't do anything with this since we don't have good tracking for the heap-locations -// // out = phi[param, phi[new_inst, param]] -// first = out.foo -// } else { -// new_inst.foo = 15; -// first = 13; -// } -// // first = phi[out.foo, 13] -// return first + new_inst.foo; -TEST_F(LoadStoreEliminationTest, PartialPhiPropagation) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "escape_route"}, - {"entry", "noescape_route"}, - {"escape_route", "left"}, - {"escape_route", "right"}, - {"left", "left_left"}, - {"left", "left_right"}, - {"left_left", "left_merge"}, - {"left_right", "left_merge"}, - {"left_merge", "escape_end"}, - {"right", "escape_end"}, - {"escape_end", "breturn"}, - {"noescape_route", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); - GET_BLOCK(left_left); - GET_BLOCK(left_right); - GET_BLOCK(left_merge); - GET_BLOCK(escape_end); - GET_BLOCK(escape_route); - GET_BLOCK(noescape_route); -#undef GET_BLOCK - EnsurePredecessorOrder(escape_end, {left_merge, right}); - EnsurePredecessorOrder(left_merge, {left_left, left_right}); - EnsurePredecessorOrder(breturn, {escape_end, noescape_route}); - HInstruction* param0 = MakeParam(DataType::Type::kBool); - HInstruction* param1 = MakeParam(DataType::Type::kBool); - HInstruction* param2 = MakeParam(DataType::Type::kBool); - HInstruction* obj_param = MakeParam(DataType::Type::kReference); - HInstruction* c12 = graph_->GetIntConstant(12); - HInstruction* c13 = graph_->GetIntConstant(13); - HInstruction* c15 = graph_->GetIntConstant(15); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* store = MakeIFieldSet(new_inst, c12, MemberOffset(32)); - HInstruction* if_param0 = new (GetAllocator()) HIf(param0); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(store); - entry->AddInstruction(if_param0); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* store_noescape = MakeIFieldSet(new_inst, c15, MemberOffset(32)); - noescape_route->AddInstruction(store_noescape); - noescape_route->AddInstruction(new (GetAllocator()) HGoto()); - - escape_route->AddInstruction(new (GetAllocator()) HIf(param1)); - - HInstruction* if_left = new (GetAllocator()) HIf(param2); - left->AddInstruction(if_left); - - HInstruction* goto_left_left = new (GetAllocator()) HGoto(); - left_left->AddInstruction(goto_left_left); - - HInstruction* goto_left_right = new (GetAllocator()) HGoto(); - left_right->AddInstruction(goto_left_right); - - HPhi* left_phi = MakePhi({obj_param, new_inst}); - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { left_phi }); - HInstruction* goto_left_merge = new (GetAllocator()) HGoto(); - left_merge->AddPhi(left_phi); - left_merge->AddInstruction(call_left); - left_merge->AddInstruction(goto_left_merge); - left_phi->SetCanBeNull(true); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(goto_right); - - HPhi* escape_end_phi = MakePhi({left_phi, obj_param}); - HInstruction* read_escape_end = - MakeIFieldGet(escape_end_phi, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* goto_escape_end = new (GetAllocator()) HGoto(); - escape_end->AddPhi(escape_end_phi); - escape_end->AddInstruction(read_escape_end); - escape_end->AddInstruction(goto_escape_end); - - HPhi* return_phi = MakePhi({read_escape_end, c13}); - HInstruction* read_exit = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* add_exit = new (GetAllocator()) HAdd(DataType::Type::kInt32, return_phi, read_exit); - HInstruction* return_exit = new (GetAllocator()) HReturn(add_exit); - breturn->AddPhi(return_phi); - breturn->AddInstruction(read_exit); - breturn->AddInstruction(add_exit); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_); - std::vector<HPhi*> all_return_phis; - std::tie(all_return_phis) = FindAllInstructions<HPhi>(graph_, breturn); - EXPECT_EQ(all_return_phis.size(), 3u); - EXPECT_INS_RETAINED(return_phi); - EXPECT_TRUE(std::find(all_return_phis.begin(), all_return_phis.end(), return_phi) != - all_return_phis.end()); - HPhi* instance_phi = - FindOrNull(all_return_phis.begin(), all_return_phis.end(), [&](HPhi* phi) { - return phi != return_phi && phi->GetType() == DataType::Type::kReference; - }); - ASSERT_NE(instance_phi, nullptr); - HPhi* value_phi = FindOrNull(all_return_phis.begin(), all_return_phis.end(), [&](HPhi* phi) { - return phi != return_phi && phi->GetType() == DataType::Type::kInt32; - }); - ASSERT_NE(value_phi, nullptr); - EXPECT_INS_EQ( - instance_phi->InputAt(0), - FindSingleInstruction<HNewInstance>(graph_, escape_route->GetSinglePredecessor())); - // Check materialize block - EXPECT_INS_EQ(FindSingleInstruction<HInstanceFieldSet>( - graph_, escape_route->GetSinglePredecessor()) - ->InputAt(1), - c12); - - EXPECT_INS_EQ(instance_phi->InputAt(1), graph_->GetNullConstant()); - EXPECT_INS_EQ(value_phi->InputAt(0), graph_->GetIntConstant(0)); - EXPECT_INS_EQ(value_phi->InputAt(1), c15); - EXPECT_INS_REMOVED(store_noescape); - EXPECT_INS_EQ(pred_get->GetTarget(), instance_phi); - EXPECT_INS_EQ(pred_get->GetDefaultValue(), value_phi); -} - -// // ENTRY -// // To be moved -// // NB Order important. By having alloc and store of obj1 before obj2 that -// // ensure we'll build the materialization for obj1 first (just due to how -// // we iterate.) -// obj1 = new Obj(); -// obj2 = new Obj(); // has env[obj1] -// // Swap the order of these -// obj1.foo = param_obj1; -// obj2.foo = param_obj2; -// if (param1) { -// // LEFT -// obj2.foo = obj1; -// if (param2) { -// // LEFT_LEFT -// escape(obj2); -// } else {} -// } else {} -// return select(param3, obj1.foo, obj2.foo); -// EXIT -TEST_P(OrderDependentTestGroup, PredicatedUse) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "left_left"}, - {"left", "left_right"}, - {"left_left", "left_end"}, - {"left_right", "left_end"}, - {"left_end", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(right); - GET_BLOCK(left); - GET_BLOCK(left_left); - GET_BLOCK(left_right); - GET_BLOCK(left_end); -#undef GET_BLOCK - TestOrder order = GetParam(); - EnsurePredecessorOrder(breturn, {left_end, right}); - EnsurePredecessorOrder(left_end, {left_left, left_right}); - HInstruction* param1 = MakeParam(DataType::Type::kBool); - HInstruction* param2 = MakeParam(DataType::Type::kBool); - HInstruction* param3 = MakeParam(DataType::Type::kBool); - HInstruction* param_obj1 = MakeParam(DataType::Type::kReference); - HInstruction* param_obj2 = MakeParam(DataType::Type::kReference); - - HInstruction* cls1 = MakeClassLoad(); - HInstruction* cls2 = MakeClassLoad(); - HInstruction* new_inst1 = MakeNewInstance(cls1); - HInstruction* new_inst2 = MakeNewInstance(cls2); - HInstruction* store1 = MakeIFieldSet(new_inst1, param_obj1, MemberOffset(32)); - HInstruction* store2 = MakeIFieldSet(new_inst2, param_obj2, MemberOffset(32)); - HInstruction* null_const = graph_->GetNullConstant(); - HInstruction* if_inst = new (GetAllocator()) HIf(param1); - entry->AddInstruction(cls1); - entry->AddInstruction(cls2); - entry->AddInstruction(new_inst1); - entry->AddInstruction(new_inst2); - if (order == TestOrder::kSameAsAlloc) { - entry->AddInstruction(store1); - entry->AddInstruction(store2); - } else { - entry->AddInstruction(store2); - entry->AddInstruction(store1); - } - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls1, {}); - cls2->CopyEnvironmentFrom(cls1->GetEnvironment()); - new_inst1->CopyEnvironmentFrom(cls1->GetEnvironment()); - new_inst2->CopyEnvironmentFrom(cls1->GetEnvironment()); - - // This is the escape of new_inst1 - HInstruction* store_left = MakeIFieldSet(new_inst2, new_inst1, MemberOffset(32)); - HInstruction* if_left = new (GetAllocator()) HIf(param2); - left->AddInstruction(store_left); - left->AddInstruction(if_left); - - HInstruction* call_left_left = MakeInvoke(DataType::Type::kVoid, { new_inst2 }); - HInstruction* goto_left_left = new (GetAllocator()) HGoto(); - left_left->AddInstruction(call_left_left); - left_left->AddInstruction(goto_left_left); - call_left_left->CopyEnvironmentFrom(new_inst2->GetEnvironment()); - - left_right->AddInstruction(new (GetAllocator()) HGoto()); - left_end->AddInstruction(new (GetAllocator()) HGoto()); - - right->AddInstruction(new (GetAllocator()) HGoto()); - - // Used to distinguish the pred-gets without having to dig through the - // multiple phi layers. - constexpr uint32_t kRead1DexPc = 10; - constexpr uint32_t kRead2DexPc = 20; - HInstruction* read1 = - MakeIFieldGet(new_inst1, DataType::Type::kReference, MemberOffset(32), kRead1DexPc); - read1->SetReferenceTypeInfo( - ReferenceTypeInfo::CreateUnchecked(graph_->GetHandleCache()->GetObjectClassHandle(), false)); - HInstruction* read2 = - MakeIFieldGet(new_inst2, DataType::Type::kReference, MemberOffset(32), kRead2DexPc); - read2->SetReferenceTypeInfo( - ReferenceTypeInfo::CreateUnchecked(graph_->GetHandleCache()->GetObjectClassHandle(), false)); - HInstruction* sel_return = new (GetAllocator()) HSelect(param3, read1, read2, 0); - HInstruction* return_exit = new (GetAllocator()) HReturn(sel_return); - breturn->AddInstruction(read1); - breturn->AddInstruction(read2); - breturn->AddInstruction(sel_return); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_RETAINED(call_left_left); - EXPECT_INS_REMOVED(read1); - EXPECT_INS_REMOVED(read2); - EXPECT_INS_REMOVED(new_inst1); - EXPECT_INS_REMOVED(new_inst2); - EXPECT_TRUE(new_inst1->GetUses().empty()) << *new_inst1 << " " << new_inst1->GetUses(); - EXPECT_TRUE(new_inst2->GetUses().empty()) << *new_inst2 << " " << new_inst2->GetUses(); - EXPECT_INS_RETAINED(sel_return); - // Make sure the selector is the same - EXPECT_INS_EQ(sel_return->InputAt(2), param3); - std::vector<HPredicatedInstanceFieldGet*> pred_gets; - std::tie(pred_gets) = FindAllInstructions<HPredicatedInstanceFieldGet>(graph_, breturn); - HPredicatedInstanceFieldGet* pred1 = FindOrNull(pred_gets.begin(), pred_gets.end(), [&](auto i) { - return i->GetDexPc() == kRead1DexPc; - }); - HPredicatedInstanceFieldGet* pred2 = FindOrNull(pred_gets.begin(), pred_gets.end(), [&](auto i) { - return i->GetDexPc() == kRead2DexPc; - }); - ASSERT_NE(pred1, nullptr); - ASSERT_NE(pred2, nullptr); - EXPECT_INS_EQ(sel_return->InputAt(0), pred2); - EXPECT_INS_EQ(sel_return->InputAt(1), pred1); - // Check targets - EXPECT_TRUE(pred1->GetTarget()->IsPhi()) << pred1->DumpWithArgs(); - EXPECT_TRUE(pred2->GetTarget()->IsPhi()) << pred2->DumpWithArgs(); - HInstruction* mat1 = FindSingleInstruction<HNewInstance>(graph_, left->GetSinglePredecessor()); - HInstruction* mat2 = - FindSingleInstruction<HNewInstance>(graph_, left_left->GetSinglePredecessor()); - EXPECT_INS_EQ(pred1->GetTarget()->InputAt(0), mat1); - EXPECT_INS_EQ(pred1->GetTarget()->InputAt(1), null_const); - EXPECT_TRUE(pred2->GetTarget()->InputAt(0)->IsPhi()) << pred2->DumpWithArgs(); - EXPECT_INS_EQ(pred2->GetTarget()->InputAt(0)->InputAt(0), mat2); - EXPECT_INS_EQ(pred2->GetTarget()->InputAt(0)->InputAt(1), null_const); - EXPECT_INS_EQ(pred2->GetTarget()->InputAt(1), null_const); - // Check default values. - EXPECT_TRUE(pred1->GetDefaultValue()->IsPhi()) << pred1->DumpWithArgs(); - EXPECT_TRUE(pred2->GetDefaultValue()->IsPhi()) << pred2->DumpWithArgs(); - EXPECT_INS_EQ(pred1->GetDefaultValue()->InputAt(0), null_const); - EXPECT_INS_EQ(pred1->GetDefaultValue()->InputAt(1), param_obj1); - EXPECT_TRUE(pred2->GetDefaultValue()->InputAt(0)->IsPhi()) << pred2->DumpWithArgs(); - EXPECT_INS_EQ(pred2->GetDefaultValue()->InputAt(0)->InputAt(0), null_const); - EXPECT_INS_EQ(pred2->GetDefaultValue()->InputAt(0)->InputAt(1), mat1); - EXPECT_INS_EQ(pred2->GetDefaultValue()->InputAt(1), param_obj2); -} - -// // ENTRY -// // To be moved -// // NB Order important. By having alloc and store of obj1 before obj2 that -// // ensure we'll build the materialization for obj1 first (just due to how -// // we iterate.) -// obj1 = new Obj(); -// obj.foo = 12; -// obj2 = new Obj(); // has env[obj1] -// obj2.foo = 15; -// if (param1) { -// // LEFT -// // Need to update env to nullptr -// escape(obj1/2); -// if (param2) { -// // LEFT_LEFT -// escape(obj2/1); -// } else {} -// } else {} -// return obj1.foo + obj2.foo; -// EXIT -TEST_P(OrderDependentTestGroup, PredicatedEnvUse) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "left_left"}, - {"left", "left_right"}, - {"left_left", "left_end"}, - {"left_right", "left_end"}, - {"left_end", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(right); - GET_BLOCK(left); - GET_BLOCK(left_left); - GET_BLOCK(left_right); - GET_BLOCK(left_end); -#undef GET_BLOCK - TestOrder order = GetParam(); - EnsurePredecessorOrder(breturn, {left_end, right}); - EnsurePredecessorOrder(left_end, {left_left, left_right}); - HInstruction* param1 = MakeParam(DataType::Type::kBool); - HInstruction* param2 = MakeParam(DataType::Type::kBool); - HInstruction* c12 = graph_->GetIntConstant(12); - HInstruction* c15 = graph_->GetIntConstant(15); - - HInstruction* cls1 = MakeClassLoad(); - HInstruction* cls2 = MakeClassLoad(); - HInstruction* new_inst1 = MakeNewInstance(cls1); - HInstruction* store1 = MakeIFieldSet(new_inst1, c12, MemberOffset(32)); - HInstruction* new_inst2 = MakeNewInstance(cls2); - HInstruction* store2 = MakeIFieldSet(new_inst2, c15, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(param1); - entry->AddInstruction(cls1); - entry->AddInstruction(cls2); - entry->AddInstruction(new_inst1); - entry->AddInstruction(store1); - entry->AddInstruction(new_inst2); - entry->AddInstruction(store2); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls1, {}); - cls2->CopyEnvironmentFrom(cls1->GetEnvironment()); - new_inst1->CopyEnvironmentFrom(cls1->GetEnvironment()); - ManuallyBuildEnvFor(new_inst2, {new_inst1}); - - HInstruction* first_inst = new_inst1; - HInstruction* second_inst = new_inst2; - - if (order == TestOrder::kReverseOfAlloc) { - std::swap(first_inst, second_inst); - } - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { first_inst }); - HInstruction* if_left = new (GetAllocator()) HIf(param2); - left->AddInstruction(call_left); - left->AddInstruction(if_left); - call_left->CopyEnvironmentFrom(new_inst2->GetEnvironment()); - - HInstruction* call_left_left = MakeInvoke(DataType::Type::kVoid, { second_inst }); - HInstruction* goto_left_left = new (GetAllocator()) HGoto(); - left_left->AddInstruction(call_left_left); - left_left->AddInstruction(goto_left_left); - call_left_left->CopyEnvironmentFrom(new_inst2->GetEnvironment()); - - left_right->AddInstruction(new (GetAllocator()) HGoto()); - left_end->AddInstruction(new (GetAllocator()) HGoto()); - - right->AddInstruction(new (GetAllocator()) HGoto()); - - HInstruction* read1 = MakeIFieldGet(new_inst1, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* read2 = MakeIFieldGet(new_inst2, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* add_return = new (GetAllocator()) HAdd(DataType::Type::kInt32, read1, read2); - HInstruction* return_exit = new (GetAllocator()) HReturn(add_return); - breturn->AddInstruction(read1); - breturn->AddInstruction(read2); - breturn->AddInstruction(add_return); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - HNewInstance* moved_new_inst1; - HInstanceFieldSet* moved_set1; - HNewInstance* moved_new_inst2; - HInstanceFieldSet* moved_set2; - HBasicBlock* first_mat_block = left->GetSinglePredecessor(); - HBasicBlock* second_mat_block = left_left->GetSinglePredecessor(); - if (order == TestOrder::kReverseOfAlloc) { - std::swap(first_mat_block, second_mat_block); - } - std::tie(moved_new_inst1, moved_set1) = - FindSingleInstructions<HNewInstance, HInstanceFieldSet>(graph_, first_mat_block); - std::tie(moved_new_inst2, moved_set2) = - FindSingleInstructions<HNewInstance, HInstanceFieldSet>(graph_, second_mat_block); - std::vector<HPredicatedInstanceFieldGet*> pred_gets; - std::vector<HPhi*> phis; - std::tie(pred_gets, phis) = FindAllInstructions<HPredicatedInstanceFieldGet, HPhi>(graph_); - EXPECT_NE(moved_new_inst1, nullptr); - EXPECT_NE(moved_new_inst2, nullptr); - EXPECT_NE(moved_set1, nullptr); - EXPECT_NE(moved_set2, nullptr); - EXPECT_INS_EQ(moved_set1->InputAt(1), c12); - EXPECT_INS_EQ(moved_set2->InputAt(1), c15); - EXPECT_INS_RETAINED(call_left); - EXPECT_INS_RETAINED(call_left_left); - EXPECT_INS_REMOVED(store1); - EXPECT_INS_REMOVED(store2); - EXPECT_INS_REMOVED(read1); - EXPECT_INS_REMOVED(read2); - EXPECT_INS_EQ(moved_new_inst2->GetEnvironment()->GetInstructionAt(0), - order == TestOrder::kSameAsAlloc - ? moved_new_inst1 - : static_cast<HInstruction*>(graph_->GetNullConstant())); -} - -// // ENTRY -// obj1 = new Obj1(); -// obj2 = new Obj2(); -// val1 = 3; -// val2 = 13; -// // The exact order the stores are written affects what the order we perform -// // partial LSE on the values -// obj1/2.field = val1/2; -// obj2/1.field = val2/1; -// if (parameter_value) { -// // LEFT -// escape(obj1); -// escape(obj2); -// } else { -// // RIGHT -// // ELIMINATE -// obj1.field = 2; -// obj2.field = 12; -// } -// EXIT -// predicated-ELIMINATE -// return obj1.field + obj2.field -TEST_P(OrderDependentTestGroup, FieldSetOrderEnv) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); -#undef GET_BLOCK - TestOrder order = GetParam(); - EnsurePredecessorOrder(breturn, {left, right}); - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c2 = graph_->GetIntConstant(2); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c12 = graph_->GetIntConstant(12); - HInstruction* c13 = graph_->GetIntConstant(13); - - HInstruction* cls1 = MakeClassLoad(); - HInstruction* cls2 = MakeClassLoad(); - HInstruction* new_inst1 = MakeNewInstance(cls1); - HInstruction* new_inst2 = MakeNewInstance(cls2); - HInstruction* write_entry1 = MakeIFieldSet(new_inst1, c3, MemberOffset(32)); - HInstruction* write_entry2 = MakeIFieldSet(new_inst2, c13, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls1); - entry->AddInstruction(cls2); - entry->AddInstruction(new_inst1); - entry->AddInstruction(new_inst2); - if (order == TestOrder::kSameAsAlloc) { - entry->AddInstruction(write_entry1); - entry->AddInstruction(write_entry2); - } else { - entry->AddInstruction(write_entry2); - entry->AddInstruction(write_entry1); - } - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls1, {}); - cls2->CopyEnvironmentFrom(cls1->GetEnvironment()); - new_inst1->CopyEnvironmentFrom(cls1->GetEnvironment()); - ManuallyBuildEnvFor(new_inst2, {new_inst1}); - - HInstruction* call_left1 = MakeInvoke(DataType::Type::kVoid, { new_inst1 }); - HInstruction* call_left2 = MakeInvoke(DataType::Type::kVoid, { new_inst2 }); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left1); - left->AddInstruction(call_left2); - left->AddInstruction(goto_left); - call_left1->CopyEnvironmentFrom(cls1->GetEnvironment()); - call_left2->CopyEnvironmentFrom(cls1->GetEnvironment()); - - HInstruction* write_right1 = MakeIFieldSet(new_inst1, c2, MemberOffset(32)); - HInstruction* write_right2 = MakeIFieldSet(new_inst2, c12, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right1); - right->AddInstruction(write_right2); - right->AddInstruction(goto_right); - - HInstruction* read_bottom1 = MakeIFieldGet(new_inst1, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* read_bottom2 = MakeIFieldGet(new_inst2, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* combine = - new (GetAllocator()) HAdd(DataType::Type::kInt32, read_bottom1, read_bottom2); - HInstruction* return_exit = new (GetAllocator()) HReturn(combine); - breturn->AddInstruction(read_bottom1); - breturn->AddInstruction(read_bottom2); - breturn->AddInstruction(combine); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_REMOVED(write_entry1); - EXPECT_INS_REMOVED(write_entry2); - EXPECT_INS_REMOVED(read_bottom1); - EXPECT_INS_REMOVED(read_bottom2); - EXPECT_INS_REMOVED(write_right1); - EXPECT_INS_REMOVED(write_right2); - EXPECT_INS_RETAINED(call_left1); - EXPECT_INS_RETAINED(call_left2); - std::vector<HPhi*> merges; - std::vector<HPredicatedInstanceFieldGet*> pred_gets; - std::vector<HNewInstance*> materializations; - std::tie(merges, pred_gets) = - FindAllInstructions<HPhi, HPredicatedInstanceFieldGet>(graph_, breturn); - std::tie(materializations) = FindAllInstructions<HNewInstance>(graph_); - ASSERT_EQ(merges.size(), 4u); - ASSERT_EQ(pred_gets.size(), 2u); - ASSERT_EQ(materializations.size(), 2u); - HPhi* merge_value_return1 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kInt32 && p->InputAt(1) == c2; - }); - HPhi* merge_value_return2 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kInt32 && p->InputAt(1) == c12; - }); - HNewInstance* mat_alloc1 = FindOrNull(materializations.begin(), - materializations.end(), - [&](HNewInstance* n) { return n->InputAt(0) == cls1; }); - HNewInstance* mat_alloc2 = FindOrNull(materializations.begin(), - materializations.end(), - [&](HNewInstance* n) { return n->InputAt(0) == cls2; }); - ASSERT_NE(mat_alloc1, nullptr); - ASSERT_NE(mat_alloc2, nullptr); - HPhi* merge_alloc1 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kReference && p->InputAt(0) == mat_alloc1; - }); - HPhi* merge_alloc2 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kReference && p->InputAt(0) == mat_alloc2; - }); - ASSERT_NE(merge_alloc1, nullptr); - HPredicatedInstanceFieldGet* pred_get1 = - FindOrNull(pred_gets.begin(), pred_gets.end(), [&](HPredicatedInstanceFieldGet* pg) { - return pg->GetTarget() == merge_alloc1; - }); - ASSERT_NE(merge_alloc2, nullptr); - HPredicatedInstanceFieldGet* pred_get2 = - FindOrNull(pred_gets.begin(), pred_gets.end(), [&](HPredicatedInstanceFieldGet* pg) { - return pg->GetTarget() == merge_alloc2; - }); - ASSERT_NE(merge_value_return1, nullptr); - ASSERT_NE(merge_value_return2, nullptr); - EXPECT_INS_EQ(merge_alloc1->InputAt(1), graph_->GetNullConstant()); - EXPECT_INS_EQ(merge_alloc2->InputAt(1), graph_->GetNullConstant()); - ASSERT_NE(pred_get1, nullptr); - EXPECT_INS_EQ(pred_get1->GetTarget(), merge_alloc1); - EXPECT_INS_EQ(pred_get1->GetDefaultValue(), merge_value_return1) - << " pred-get is: " << *pred_get1; - EXPECT_INS_EQ(merge_value_return1->InputAt(0), graph_->GetIntConstant(0)) - << " merge val is: " << *merge_value_return1; - EXPECT_INS_EQ(merge_value_return1->InputAt(1), c2) << " merge val is: " << *merge_value_return1; - ASSERT_NE(pred_get2, nullptr); - EXPECT_INS_EQ(pred_get2->GetTarget(), merge_alloc2); - EXPECT_INS_EQ(pred_get2->GetDefaultValue(), merge_value_return2) - << " pred-get is: " << *pred_get2; - EXPECT_INS_EQ(merge_value_return2->InputAt(0), graph_->GetIntConstant(0)) - << " merge val is: " << *merge_value_return1; - EXPECT_INS_EQ(merge_value_return2->InputAt(1), c12) << " merge val is: " << *merge_value_return1; - EXPECT_INS_EQ(mat_alloc2->GetEnvironment()->GetInstructionAt(0), mat_alloc1); -} - -// // TODO We can compile this better if we are better able to understand lifetimes. -// // ENTRY -// obj1 = new Obj1(); -// obj2 = new Obj2(); -// // The exact order the stores are written affects what the order we perform -// // partial LSE on the values -// obj{1,2}.var = param_obj; -// obj{2,1}.var = param_obj; -// if (param_1) { -// // EARLY_RETURN -// return; -// } -// // escape of obj1 -// obj2.var = obj1; -// if (param_2) { -// // escape of obj2 with a materialization that uses obj1 -// escape(obj2); -// } -// // EXIT -// return; -TEST_P(OrderDependentTestGroup, MaterializationMovedUse) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "early_return"}, - {"early_return", "exit"}, - {"entry", "escape_1"}, - {"escape_1", "escape_2"}, - {"escape_1", "escape_1_crit_break"}, - {"escape_1_crit_break", "exit"}, - {"escape_2", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(early_return); - GET_BLOCK(escape_1); - GET_BLOCK(escape_1_crit_break); - GET_BLOCK(escape_2); -#undef GET_BLOCK - TestOrder order = GetParam(); - HInstruction* param_1 = MakeParam(DataType::Type::kBool); - HInstruction* param_2 = MakeParam(DataType::Type::kBool); - HInstruction* param_obj = MakeParam(DataType::Type::kReference); - - HInstruction* cls1 = MakeClassLoad(); - HInstruction* cls2 = MakeClassLoad(); - HInstruction* new_inst1 = MakeNewInstance(cls1); - HInstruction* new_inst2 = MakeNewInstance(cls2); - HInstruction* write_entry1 = MakeIFieldSet(new_inst1, param_obj, MemberOffset(32)); - HInstruction* write_entry2 = MakeIFieldSet(new_inst2, param_obj, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(param_1); - entry->AddInstruction(cls1); - entry->AddInstruction(cls2); - entry->AddInstruction(new_inst1); - entry->AddInstruction(new_inst2); - if (order == TestOrder::kSameAsAlloc) { - entry->AddInstruction(write_entry1); - entry->AddInstruction(write_entry2); - } else { - entry->AddInstruction(write_entry2); - entry->AddInstruction(write_entry1); - } - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls1, {}); - cls2->CopyEnvironmentFrom(cls1->GetEnvironment()); - new_inst1->CopyEnvironmentFrom(cls1->GetEnvironment()); - new_inst2->CopyEnvironmentFrom(cls1->GetEnvironment()); - - early_return->AddInstruction(new (GetAllocator()) HReturnVoid()); - - HInstruction* escape_1_set = MakeIFieldSet(new_inst2, new_inst1, MemberOffset(32)); - HInstruction* escape_1_if = new (GetAllocator()) HIf(param_2); - escape_1->AddInstruction(escape_1_set); - escape_1->AddInstruction(escape_1_if); - - escape_1_crit_break->AddInstruction(new (GetAllocator()) HReturnVoid()); - - HInstruction* escape_2_call = MakeInvoke(DataType::Type::kVoid, {new_inst2}); - HInstruction* escape_2_return = new (GetAllocator()) HReturnVoid(); - escape_2->AddInstruction(escape_2_call); - escape_2->AddInstruction(escape_2_return); - escape_2_call->CopyEnvironmentFrom(cls1->GetEnvironment()); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_REMOVED(new_inst1); - EXPECT_INS_REMOVED(new_inst2); - EXPECT_INS_REMOVED(write_entry1); - EXPECT_INS_REMOVED(write_entry2); - EXPECT_INS_REMOVED(escape_1_set); - EXPECT_INS_RETAINED(escape_2_call); - - HInstruction* obj1_mat = - FindSingleInstruction<HNewInstance>(graph_, escape_1->GetSinglePredecessor()); - HInstruction* obj1_set = - FindSingleInstruction<HInstanceFieldSet>(graph_, escape_1->GetSinglePredecessor()); - HInstruction* obj2_mat = - FindSingleInstruction<HNewInstance>(graph_, escape_2->GetSinglePredecessor()); - HInstruction* obj2_set = - FindSingleInstruction<HInstanceFieldSet>(graph_, escape_2->GetSinglePredecessor()); - ASSERT_TRUE(obj1_mat != nullptr); - ASSERT_TRUE(obj2_mat != nullptr); - ASSERT_TRUE(obj1_set != nullptr); - ASSERT_TRUE(obj2_set != nullptr); - EXPECT_INS_EQ(obj1_set->InputAt(0), obj1_mat); - EXPECT_INS_EQ(obj1_set->InputAt(1), param_obj); - EXPECT_INS_EQ(obj2_set->InputAt(0), obj2_mat); - EXPECT_INS_EQ(obj2_set->InputAt(1), obj1_mat); -} - -INSTANTIATE_TEST_SUITE_P(LoadStoreEliminationTest, - OrderDependentTestGroup, - testing::Values(TestOrder::kSameAsAlloc, TestOrder::kReverseOfAlloc)); - -// // ENTRY -// // To be moved -// obj = new Obj(); -// obj.foo = 12; -// if (parameter_value) { -// // LEFT -// escape(obj); -// } else {} -// EXIT -TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"right", "breturn"}, - {"left", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {left, right}); - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c12 = graph_->GetIntConstant(12); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* store = MakeIFieldSet(new_inst, c12, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(store); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - right->AddInstruction(new (GetAllocator()) HGoto()); - - HInstruction* return_exit = new (GetAllocator()) HReturnVoid(); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - HNewInstance* moved_new_inst = nullptr; - HInstanceFieldSet* moved_set = nullptr; - std::tie(moved_new_inst, moved_set) = - FindSingleInstructions<HNewInstance, HInstanceFieldSet>(graph_); - EXPECT_NE(moved_new_inst, nullptr); - EXPECT_NE(moved_set, nullptr); - EXPECT_INS_RETAINED(call_left); - // store removed or moved. - EXPECT_NE(store->GetBlock(), entry); - // New-inst removed or moved. - EXPECT_NE(new_inst->GetBlock(), entry); - EXPECT_INS_EQ(moved_set->InputAt(0), moved_new_inst); - EXPECT_INS_EQ(moved_set->InputAt(1), c12); -} - -// // ENTRY -// // To be moved -// obj = new Obj(); -// obj.foo = 12; -// if (parameter_value) { -// // LEFT -// escape(obj); -// } -// EXIT -// int a = obj.foo; -// obj.foo = 13; -// noescape(); -// int b = obj.foo; -// obj.foo = 14; -// noescape(); -// int c = obj.foo; -// obj.foo = 15; -// noescape(); -// return a + b + c -TEST_F(LoadStoreEliminationTest, MutiPartialLoadStore) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"right", "breturn"}, - {"left", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {left, right}); - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c12 = graph_->GetIntConstant(12); - HInstruction* c13 = graph_->GetIntConstant(13); - HInstruction* c14 = graph_->GetIntConstant(14); - HInstruction* c15 = graph_->GetIntConstant(15); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* store = MakeIFieldSet(new_inst, c12, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(store); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(goto_right); - - HInstruction* a_val = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* a_reset = MakeIFieldSet(new_inst, c13, MemberOffset(32)); - HInstruction* a_noescape = MakeInvoke(DataType::Type::kVoid, {}); - HInstruction* b_val = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* b_reset = MakeIFieldSet(new_inst, c14, MemberOffset(32)); - HInstruction* b_noescape = MakeInvoke(DataType::Type::kVoid, {}); - HInstruction* c_val = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* c_reset = MakeIFieldSet(new_inst, c15, MemberOffset(32)); - HInstruction* c_noescape = MakeInvoke(DataType::Type::kVoid, {}); - HInstruction* add_1_exit = new (GetAllocator()) HAdd(DataType::Type::kInt32, a_val, b_val); - HInstruction* add_2_exit = new (GetAllocator()) HAdd(DataType::Type::kInt32, c_val, add_1_exit); - HInstruction* return_exit = new (GetAllocator()) HReturn(add_2_exit); - breturn->AddInstruction(a_val); - breturn->AddInstruction(a_reset); - breturn->AddInstruction(a_noescape); - breturn->AddInstruction(b_val); - breturn->AddInstruction(b_reset); - breturn->AddInstruction(b_noescape); - breturn->AddInstruction(c_val); - breturn->AddInstruction(c_reset); - breturn->AddInstruction(c_noescape); - breturn->AddInstruction(add_1_exit); - breturn->AddInstruction(add_2_exit); - breturn->AddInstruction(return_exit); - ManuallyBuildEnvFor(a_noescape, {new_inst, a_val}); - ManuallyBuildEnvFor(b_noescape, {new_inst, a_val, b_val}); - ManuallyBuildEnvFor(c_noescape, {new_inst, a_val, b_val, c_val}); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - HNewInstance* moved_new_inst = nullptr; - HInstanceFieldSet* moved_set = nullptr; - std::tie(moved_new_inst, moved_set) = - FindSingleInstructions<HNewInstance, HInstanceFieldSet>(graph_, left->GetSinglePredecessor()); - std::vector<HPredicatedInstanceFieldGet*> pred_gets; - std::vector<HInstanceFieldSet*> pred_sets; - std::vector<HPhi*> return_phis; - std::tie(return_phis, pred_gets, pred_sets) = - FindAllInstructions<HPhi, HPredicatedInstanceFieldGet, HInstanceFieldSet>(graph_, breturn); - ASSERT_EQ(return_phis.size(), 2u); - HPhi* inst_phi = return_phis[0]; - HPhi* val_phi = return_phis[1]; - if (inst_phi->GetType() != DataType::Type::kReference) { - std::swap(inst_phi, val_phi); - } - ASSERT_NE(moved_new_inst, nullptr); - EXPECT_INS_EQ(inst_phi->InputAt(0), moved_new_inst); - EXPECT_INS_EQ(inst_phi->InputAt(1), graph_->GetNullConstant()); - EXPECT_INS_EQ(val_phi->InputAt(0), graph_->GetIntConstant(0)); - EXPECT_EQ(val_phi->InputAt(1), c12); - ASSERT_EQ(pred_gets.size(), 3u); - ASSERT_EQ(pred_gets.size(), pred_sets.size()); - std::vector<HInstruction*> set_values{c13, c14, c15}; - std::vector<HInstruction*> get_values{val_phi, c13, c14}; - ASSERT_NE(moved_set, nullptr); - EXPECT_INS_EQ(moved_set->InputAt(0), moved_new_inst); - EXPECT_INS_EQ(moved_set->InputAt(1), c12); - EXPECT_INS_RETAINED(call_left); - // store removed or moved. - EXPECT_NE(store->GetBlock(), entry); - // New-inst removed or moved. - EXPECT_NE(new_inst->GetBlock(), entry); - for (auto [get, val] : ZipLeft(MakeIterationRange(pred_gets), MakeIterationRange(get_values))) { - EXPECT_INS_EQ(get->GetDefaultValue(), val); - } - for (auto [set, val] : ZipLeft(MakeIterationRange(pred_sets), MakeIterationRange(set_values))) { - EXPECT_INS_EQ(set->InputAt(1), val); - EXPECT_TRUE(set->GetIsPredicatedSet()) << *set; - } - EXPECT_INS_RETAINED(a_noescape); - EXPECT_INS_RETAINED(b_noescape); - EXPECT_INS_RETAINED(c_noescape); - EXPECT_INS_EQ(add_1_exit->InputAt(0), pred_gets[0]); - EXPECT_INS_EQ(add_1_exit->InputAt(1), pred_gets[1]); - EXPECT_INS_EQ(add_2_exit->InputAt(0), pred_gets[2]); - - EXPECT_EQ(a_noescape->GetEnvironment()->Size(), 2u); - EXPECT_INS_EQ(a_noescape->GetEnvironment()->GetInstructionAt(0), inst_phi); - EXPECT_INS_EQ(a_noescape->GetEnvironment()->GetInstructionAt(1), pred_gets[0]); - EXPECT_EQ(b_noescape->GetEnvironment()->Size(), 3u); - EXPECT_INS_EQ(b_noescape->GetEnvironment()->GetInstructionAt(0), inst_phi); - EXPECT_INS_EQ(b_noescape->GetEnvironment()->GetInstructionAt(1), pred_gets[0]); - EXPECT_INS_EQ(b_noescape->GetEnvironment()->GetInstructionAt(2), pred_gets[1]); - EXPECT_EQ(c_noescape->GetEnvironment()->Size(), 4u); - EXPECT_INS_EQ(c_noescape->GetEnvironment()->GetInstructionAt(0), inst_phi); - EXPECT_INS_EQ(c_noescape->GetEnvironment()->GetInstructionAt(1), pred_gets[0]); - EXPECT_INS_EQ(c_noescape->GetEnvironment()->GetInstructionAt(2), pred_gets[1]); - EXPECT_INS_EQ(c_noescape->GetEnvironment()->GetInstructionAt(3), pred_gets[2]); -} - -// // ENTRY -// // To be moved -// obj = new Obj(); -// obj.foo = 12; -// int a = obj.foo; -// obj.foo = 13; -// noescape(); -// int b = obj.foo; -// obj.foo = 14; -// noescape(); -// int c = obj.foo; -// obj.foo = 15; -// noescape(); -// if (parameter_value) { -// // LEFT -// escape(obj); -// } -// EXIT -// return a + b + c + obj.foo -TEST_F(LoadStoreEliminationTest, MutiPartialLoadStore2) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - // Need to have an actual entry block since we check env-layout and the way we - // add constants would screw this up otherwise. - AdjacencyListGraph blks(SetupFromAdjacencyList("start", - "exit", - {{"start", "entry"}, - {"entry", "left"}, - {"entry", "right"}, - {"right", "breturn"}, - {"left", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(start); - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {left, right}); - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c12 = graph_->GetIntConstant(12); - HInstruction* c13 = graph_->GetIntConstant(13); - HInstruction* c14 = graph_->GetIntConstant(14); - HInstruction* c15 = graph_->GetIntConstant(15); - - HInstruction* start_suspend = new (GetAllocator()) HSuspendCheck(); - HInstruction* start_goto = new (GetAllocator()) HGoto(); - - start->AddInstruction(start_suspend); - start->AddInstruction(start_goto); - ManuallyBuildEnvFor(start_suspend, {}); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* store = MakeIFieldSet(new_inst, c12, MemberOffset(32)); - - HInstruction* a_val = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* a_reset = MakeIFieldSet(new_inst, c13, MemberOffset(32)); - HInstruction* a_noescape = MakeInvoke(DataType::Type::kVoid, {}); - HInstruction* b_val = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* b_reset = MakeIFieldSet(new_inst, c14, MemberOffset(32)); - HInstruction* b_noescape = MakeInvoke(DataType::Type::kVoid, {}); - HInstruction* c_val = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* c_reset = MakeIFieldSet(new_inst, c15, MemberOffset(32)); - HInstruction* c_noescape = MakeInvoke(DataType::Type::kVoid, {}); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(store); - entry->AddInstruction(a_val); - entry->AddInstruction(a_reset); - entry->AddInstruction(a_noescape); - entry->AddInstruction(b_val); - entry->AddInstruction(b_reset); - entry->AddInstruction(b_noescape); - entry->AddInstruction(c_val); - entry->AddInstruction(c_reset); - entry->AddInstruction(c_noescape); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - ManuallyBuildEnvFor(a_noescape, {new_inst, a_val}); - ManuallyBuildEnvFor(b_noescape, {new_inst, a_val, b_val}); - ManuallyBuildEnvFor(c_noescape, {new_inst, a_val, b_val, c_val}); - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(c_noescape->GetEnvironment()); - - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(goto_right); - - HInstruction* val_exit = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* add_1_exit = new (GetAllocator()) HAdd(DataType::Type::kInt32, a_val, b_val); - HInstruction* add_2_exit = new (GetAllocator()) HAdd(DataType::Type::kInt32, c_val, add_1_exit); - HInstruction* add_3_exit = - new (GetAllocator()) HAdd(DataType::Type::kInt32, val_exit, add_2_exit); - HInstruction* return_exit = new (GetAllocator()) HReturn(add_3_exit); - breturn->AddInstruction(val_exit); - breturn->AddInstruction(add_1_exit); - breturn->AddInstruction(add_2_exit); - breturn->AddInstruction(add_3_exit); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - HNewInstance* moved_new_inst = nullptr; - HInstanceFieldSet* moved_set = nullptr; - std::tie(moved_new_inst, moved_set) = - FindSingleInstructions<HNewInstance, HInstanceFieldSet>(graph_, left->GetSinglePredecessor()); - std::vector<HPredicatedInstanceFieldGet*> pred_gets; - std::vector<HInstanceFieldSet*> pred_sets; - std::vector<HPhi*> return_phis; - std::tie(return_phis, pred_gets, pred_sets) = - FindAllInstructions<HPhi, HPredicatedInstanceFieldGet, HInstanceFieldSet>(graph_, breturn); - ASSERT_EQ(return_phis.size(), 2u); - HPhi* inst_phi = return_phis[0]; - HPhi* val_phi = return_phis[1]; - if (inst_phi->GetType() != DataType::Type::kReference) { - std::swap(inst_phi, val_phi); - } - ASSERT_NE(moved_new_inst, nullptr); - EXPECT_INS_EQ(inst_phi->InputAt(0), moved_new_inst); - EXPECT_INS_EQ(inst_phi->InputAt(1), graph_->GetNullConstant()); - EXPECT_INS_EQ(val_phi->InputAt(0), graph_->GetIntConstant(0)); - EXPECT_INS_EQ(val_phi->InputAt(1), c15); - ASSERT_EQ(pred_gets.size(), 1u); - ASSERT_EQ(pred_sets.size(), 0u); - ASSERT_NE(moved_set, nullptr); - EXPECT_INS_EQ(moved_set->InputAt(0), moved_new_inst); - EXPECT_INS_EQ(moved_set->InputAt(1), c15); - EXPECT_INS_RETAINED(call_left); - // store removed or moved. - EXPECT_NE(store->GetBlock(), entry); - // New-inst removed or moved. - EXPECT_NE(new_inst->GetBlock(), entry); - EXPECT_INS_REMOVED(a_val); - EXPECT_INS_REMOVED(b_val); - EXPECT_INS_REMOVED(c_val); - EXPECT_INS_RETAINED(a_noescape); - EXPECT_INS_RETAINED(b_noescape); - EXPECT_INS_RETAINED(c_noescape); - EXPECT_INS_EQ(add_1_exit->InputAt(0), c12); - EXPECT_INS_EQ(add_1_exit->InputAt(1), c13); - EXPECT_INS_EQ(add_2_exit->InputAt(0), c14); - EXPECT_INS_EQ(add_2_exit->InputAt(1), add_1_exit); - EXPECT_INS_EQ(add_3_exit->InputAt(0), pred_gets[0]); - EXPECT_INS_EQ(pred_gets[0]->GetDefaultValue(), val_phi); - EXPECT_INS_EQ(add_3_exit->InputAt(1), add_2_exit); - EXPECT_EQ(a_noescape->GetEnvironment()->Size(), 2u); - EXPECT_INS_EQ(a_noescape->GetEnvironment()->GetInstructionAt(0), graph_->GetNullConstant()); - EXPECT_INS_EQ(a_noescape->GetEnvironment()->GetInstructionAt(1), c12); - EXPECT_EQ(b_noescape->GetEnvironment()->Size(), 3u); - EXPECT_INS_EQ(b_noescape->GetEnvironment()->GetInstructionAt(0), graph_->GetNullConstant()); - EXPECT_INS_EQ(b_noescape->GetEnvironment()->GetInstructionAt(1), c12); - EXPECT_INS_EQ(b_noescape->GetEnvironment()->GetInstructionAt(2), c13); - EXPECT_EQ(c_noescape->GetEnvironment()->Size(), 4u); - EXPECT_INS_EQ(c_noescape->GetEnvironment()->GetInstructionAt(0), graph_->GetNullConstant()); - EXPECT_INS_EQ(c_noescape->GetEnvironment()->GetInstructionAt(1), c12); - EXPECT_INS_EQ(c_noescape->GetEnvironment()->GetInstructionAt(2), c13); - EXPECT_INS_EQ(c_noescape->GetEnvironment()->GetInstructionAt(3), c14); -} - -// // ENTRY -// // To be moved -// obj = new Obj(); -// // Transforms required for creation non-trivial and unimportant -// if (parameter_value) { -// obj.foo = 10 -// } else { -// obj.foo = 12; -// } -// if (parameter_value_2) { -// escape(obj); -// } -// EXIT -TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc2) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left_set"}, - {"entry", "right_set"}, - {"left_set", "merge_crit_break"}, - {"right_set", "merge_crit_break"}, - {"merge_crit_break", "merge"}, - {"merge", "escape"}, - {"escape", "breturn"}, - {"merge", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left_set); - GET_BLOCK(right_set); - GET_BLOCK(merge); - GET_BLOCK(merge_crit_break); - GET_BLOCK(escape); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {merge, escape}); - EnsurePredecessorOrder(merge_crit_break, {left_set, right_set}); - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* bool_value_2 = MakeParam(DataType::Type::kBool); - HInstruction* c10 = graph_->GetIntConstant(10); - HInstruction* c12 = graph_->GetIntConstant(12); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* store_left = MakeIFieldSet(new_inst, c10, MemberOffset(32)); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left_set->AddInstruction(store_left); - left_set->AddInstruction(goto_left); - - HInstruction* store_right = MakeIFieldSet(new_inst, c12, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right_set->AddInstruction(store_right); - right_set->AddInstruction(goto_right); - - merge_crit_break->AddInstruction(new (GetAllocator()) HGoto()); - HInstruction* if_merge = new (GetAllocator()) HIf(bool_value_2); - merge->AddInstruction(if_merge); - - HInstruction* escape_instruction = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* escape_goto = new (GetAllocator()) HGoto(); - escape->AddInstruction(escape_instruction); - escape->AddInstruction(escape_goto); - escape_instruction->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* return_exit = new (GetAllocator()) HReturnVoid(); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - HNewInstance* moved_new_inst; - HInstanceFieldSet* moved_set; - std::tie(moved_new_inst, moved_set) = - FindSingleInstructions<HNewInstance, HInstanceFieldSet>(graph_); - HPhi* merge_phi = FindSingleInstruction<HPhi>(graph_, merge_crit_break); - HPhi* alloc_phi = FindSingleInstruction<HPhi>(graph_, breturn); - EXPECT_INS_EQ(moved_new_inst, moved_set->InputAt(0)); - ASSERT_NE(alloc_phi, nullptr); - EXPECT_EQ(alloc_phi->InputAt(0), graph_->GetNullConstant()) - << alloc_phi->GetBlock()->GetPredecessors()[0]->GetBlockId() << " " << *alloc_phi; - EXPECT_TRUE(alloc_phi->InputAt(1)->IsNewInstance()) << *alloc_phi; - ASSERT_NE(merge_phi, nullptr); - EXPECT_EQ(merge_phi->InputCount(), 2u); - EXPECT_INS_EQ(merge_phi->InputAt(0), c10); - EXPECT_INS_EQ(merge_phi->InputAt(1), c12); - EXPECT_TRUE(merge_phi->GetUses().HasExactlyOneElement()); - EXPECT_INS_EQ(merge_phi->GetUses().front().GetUser(), moved_set); - EXPECT_INS_RETAINED(escape_instruction); - EXPECT_INS_EQ(escape_instruction->InputAt(0), moved_new_inst); - // store removed or moved. - EXPECT_NE(store_left->GetBlock(), left_set); - EXPECT_NE(store_right->GetBlock(), left_set); - // New-inst removed or moved. - EXPECT_NE(new_inst->GetBlock(), entry); -} - -// // ENTRY -// // To be moved -// obj = new Obj(); -// switch(args) { -// default: -// return obj.a; -// case b: -// obj.a = 5; break; -// case c: -// obj.b = 4; break; -// } -// escape(obj); -// return obj.a; -// EXIT -TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc3) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "early_return"}, - {"entry", "set_one"}, - {"entry", "set_two"}, - {"early_return", "exit"}, - {"set_one", "escape"}, - {"set_two", "escape"}, - {"escape", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(escape); - GET_BLOCK(early_return); - GET_BLOCK(set_one); - GET_BLOCK(set_two); -#undef GET_BLOCK - EnsurePredecessorOrder(escape, {set_one, set_two}); - HInstruction* int_val = MakeParam(DataType::Type::kInt32); - HInstruction* c0 = graph_->GetIntConstant(0); - HInstruction* c4 = graph_->GetIntConstant(4); - HInstruction* c5 = graph_->GetIntConstant(5); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, int_val); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(switch_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* store_one = MakeIFieldSet(new_inst, c4, MemberOffset(32)); - HInstruction* goto_one = new (GetAllocator()) HGoto(); - set_one->AddInstruction(store_one); - set_one->AddInstruction(goto_one); - - HInstruction* store_two = MakeIFieldSet(new_inst, c5, MemberOffset(32)); - HInstruction* goto_two = new (GetAllocator()) HGoto(); - set_two->AddInstruction(store_two); - set_two->AddInstruction(goto_two); - - HInstruction* read_early = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_early = new (GetAllocator()) HReturn(read_early); - early_return->AddInstruction(read_early); - early_return->AddInstruction(return_early); - - HInstruction* escape_instruction = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* read_escape = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_escape = new (GetAllocator()) HReturn(read_escape); - escape->AddInstruction(escape_instruction); - escape->AddInstruction(read_escape); - escape->AddInstruction(return_escape); - escape_instruction->CopyEnvironmentFrom(cls->GetEnvironment()); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - // Each escaping switch path gets its own materialization block. - // Blocks: - // early_return(5) -> [exit(4)] - // entry(3) -> [early_return(5), <Unnamed>(9), <Unnamed>(10)] - // escape(8) -> [exit(4)] - // exit(4) -> [] - // set_one(6) -> [escape(8)] - // set_two(7) -> [escape(8)] - // <Unnamed>(10) -> [set_two(7)] - // <Unnamed>(9) -> [set_one(6)] - HBasicBlock* materialize_one = set_one->GetSinglePredecessor(); - HBasicBlock* materialize_two = set_two->GetSinglePredecessor(); - HNewInstance* materialization_ins_one = - FindSingleInstruction<HNewInstance>(graph_, materialize_one); - HNewInstance* materialization_ins_two = - FindSingleInstruction<HNewInstance>(graph_, materialize_two); - HPhi* new_phi = FindSingleInstruction<HPhi>(graph_, escape); - EXPECT_NE(materialization_ins_one, nullptr); - EXPECT_NE(materialization_ins_two, nullptr); - EXPECT_EQ(materialization_ins_one, new_phi->InputAt(0)) - << *materialization_ins_one << " vs " << *new_phi; - EXPECT_EQ(materialization_ins_two, new_phi->InputAt(1)) - << *materialization_ins_two << " vs " << *new_phi; - - EXPECT_INS_RETAINED(escape_instruction); - EXPECT_INS_RETAINED(read_escape); - EXPECT_EQ(read_escape->InputAt(0), new_phi) << *new_phi << " vs " << *read_escape->InputAt(0); - EXPECT_EQ(store_one->InputAt(0), materialization_ins_one); - EXPECT_EQ(store_two->InputAt(0), materialization_ins_two); - EXPECT_EQ(escape_instruction->InputAt(0), new_phi); - EXPECT_INS_REMOVED(read_early); - EXPECT_EQ(return_early->InputAt(0), c0); -} - -// // ENTRY -// // To be moved -// obj = new Obj(); -// switch(args) { -// case a: -// // set_one_and_escape -// obj.a = 5; -// escape(obj); -// // FALLTHROUGH -// case c: -// // set_two -// obj.a = 4; break; -// default: -// return obj.a; -// } -// escape(obj); -// return obj.a; -// EXIT -TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc4) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - // Break the critical edge between entry and set_two with the - // set_two_critical_break node. Graph simplification would do this for us if - // we didn't do it manually. This way we have a nice-name for debugging and - // testing. - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "early_return"}, - {"entry", "set_one_and_escape"}, - {"entry", "set_two_critical_break"}, - {"set_two_critical_break", "set_two"}, - {"early_return", "exit"}, - {"set_one_and_escape", "set_two"}, - {"set_two", "escape"}, - {"escape", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(escape); - GET_BLOCK(early_return); - GET_BLOCK(set_one_and_escape); - GET_BLOCK(set_two); - GET_BLOCK(set_two_critical_break); -#undef GET_BLOCK - EnsurePredecessorOrder(set_two, {set_one_and_escape, set_two_critical_break}); - HInstruction* int_val = MakeParam(DataType::Type::kInt32); - HInstruction* c0 = graph_->GetIntConstant(0); - HInstruction* c4 = graph_->GetIntConstant(4); - HInstruction* c5 = graph_->GetIntConstant(5); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, int_val); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(switch_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* store_one = MakeIFieldSet(new_inst, c4, MemberOffset(32)); - HInstruction* escape_one = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_one = new (GetAllocator()) HGoto(); - set_one_and_escape->AddInstruction(store_one); - set_one_and_escape->AddInstruction(escape_one); - set_one_and_escape->AddInstruction(goto_one); - escape_one->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* goto_crit_break = new (GetAllocator()) HGoto(); - set_two_critical_break->AddInstruction(goto_crit_break); - - HInstruction* store_two = MakeIFieldSet(new_inst, c5, MemberOffset(32)); - HInstruction* goto_two = new (GetAllocator()) HGoto(); - set_two->AddInstruction(store_two); - set_two->AddInstruction(goto_two); - - HInstruction* read_early = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_early = new (GetAllocator()) HReturn(read_early); - early_return->AddInstruction(read_early); - early_return->AddInstruction(return_early); - - HInstruction* escape_instruction = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* read_escape = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_escape = new (GetAllocator()) HReturn(read_escape); - escape->AddInstruction(escape_instruction); - escape->AddInstruction(read_escape); - escape->AddInstruction(return_escape); - escape_instruction->CopyEnvironmentFrom(cls->GetEnvironment()); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_REMOVED(read_early); - EXPECT_EQ(return_early->InputAt(0), c0); - // Each escaping switch path gets its own materialization block. - // Blocks: - // early_return(5) -> [exit(4)] - // entry(3) -> [early_return(5), <Unnamed>(10), <Unnamed>(11)] - // escape(9) -> [exit(4)] - // exit(4) -> [] - // set_one_and_escape(6) -> [set_two(8)] - // set_two(8) -> [escape(9)] - // set_two_critical_break(7) -> [set_two(8)] - // <Unnamed>(11) -> [set_two_critical_break(7)] - // <Unnamed>(10) -> [set_one_and_escape(6)] - HBasicBlock* materialize_one = set_one_and_escape->GetSinglePredecessor(); - HBasicBlock* materialize_two = set_two_critical_break->GetSinglePredecessor(); - HNewInstance* materialization_ins_one = - FindSingleInstruction<HNewInstance>(graph_, materialize_one); - HNewInstance* materialization_ins_two = - FindSingleInstruction<HNewInstance>(graph_, materialize_two); - HPhi* new_phi = FindSingleInstruction<HPhi>(graph_, set_two); - ASSERT_NE(new_phi, nullptr); - ASSERT_NE(materialization_ins_one, nullptr); - ASSERT_NE(materialization_ins_two, nullptr); - EXPECT_INS_EQ(materialization_ins_one, new_phi->InputAt(0)); - EXPECT_INS_EQ(materialization_ins_two, new_phi->InputAt(1)); - - EXPECT_INS_EQ(store_one->InputAt(0), materialization_ins_one); - EXPECT_INS_EQ(store_two->InputAt(0), new_phi) << *store_two << " vs " << *new_phi; - EXPECT_INS_EQ(escape_instruction->InputAt(0), new_phi); - EXPECT_INS_RETAINED(escape_one); - EXPECT_INS_EQ(escape_one->InputAt(0), materialization_ins_one); - EXPECT_INS_RETAINED(escape_instruction); - EXPECT_INS_RETAINED(read_escape); - EXPECT_EQ(read_escape->InputAt(0), new_phi) << *new_phi << " vs " << *read_escape->InputAt(0); -} - -// // ENTRY -// // To be moved -// obj = new Obj(); -// switch(args) { -// case a: -// // set_one -// obj.a = 5; -// // nb passthrough -// case c: -// // set_two_and_escape -// obj.a += 4; -// escape(obj); -// break; -// default: -// obj.a = 10; -// } -// return obj.a; -// EXIT -TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc5) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - // Break the critical edge between entry and set_two with the - // set_two_critical_break node. Graph simplification would do this for us if - // we didn't do it manually. This way we have a nice-name for debugging and - // testing. - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "set_noescape"}, - {"entry", "set_one"}, - {"entry", "set_two_critical_break"}, - {"set_two_critical_break", "set_two_and_escape"}, - {"set_noescape", "breturn"}, - {"set_one", "set_two_and_escape"}, - {"set_two_and_escape", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(set_noescape); - GET_BLOCK(set_one); - GET_BLOCK(set_two_and_escape); - GET_BLOCK(set_two_critical_break); -#undef GET_BLOCK - EnsurePredecessorOrder(set_two_and_escape, {set_one, set_two_critical_break}); - EnsurePredecessorOrder(breturn, {set_two_and_escape, set_noescape}); - HInstruction* int_val = MakeParam(DataType::Type::kInt32); - HInstruction* c0 = graph_->GetIntConstant(0); - HInstruction* c4 = graph_->GetIntConstant(4); - HInstruction* c5 = graph_->GetIntConstant(5); - HInstruction* c10 = graph_->GetIntConstant(10); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, int_val); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(switch_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* store_one = MakeIFieldSet(new_inst, c5, MemberOffset(32)); - HInstruction* goto_one = new (GetAllocator()) HGoto(); - set_one->AddInstruction(store_one); - set_one->AddInstruction(goto_one); - - HInstruction* goto_crit_break = new (GetAllocator()) HGoto(); - set_two_critical_break->AddInstruction(goto_crit_break); - - HInstruction* get_two = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* add_two = new (GetAllocator()) HAdd(DataType::Type::kInt32, get_two, c4); - HInstruction* store_two = MakeIFieldSet(new_inst, add_two, MemberOffset(32)); - HInstruction* escape_two = MakeInvoke(DataType::Type::kVoid, {new_inst}); - HInstruction* goto_two = new (GetAllocator()) HGoto(); - set_two_and_escape->AddInstruction(get_two); - set_two_and_escape->AddInstruction(add_two); - set_two_and_escape->AddInstruction(store_two); - set_two_and_escape->AddInstruction(escape_two); - set_two_and_escape->AddInstruction(goto_two); - escape_two->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* store_noescape = MakeIFieldSet(new_inst, c10, MemberOffset(32)); - HInstruction* goto_noescape = new (GetAllocator()) HGoto(); - set_noescape->AddInstruction(store_noescape); - set_noescape->AddInstruction(goto_noescape); - - HInstruction* read_breturn = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_breturn = new (GetAllocator()) HReturn(read_breturn); - breturn->AddInstruction(read_breturn); - breturn->AddInstruction(return_breturn); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - // Normal LSE can get rid of these two. - EXPECT_INS_REMOVED(store_one); - EXPECT_INS_REMOVED(get_two); - EXPECT_INS_RETAINED(add_two); - EXPECT_TRUE(add_two->InputAt(0)->IsPhi()); - EXPECT_INS_EQ(add_two->InputAt(0)->InputAt(0), c5); - EXPECT_INS_EQ(add_two->InputAt(0)->InputAt(1), c0); - EXPECT_INS_EQ(add_two->InputAt(1), c4); - - HBasicBlock* materialize_one = set_one->GetSinglePredecessor(); - HBasicBlock* materialize_two = set_two_critical_break->GetSinglePredecessor(); - HNewInstance* materialization_ins_one = - FindSingleInstruction<HNewInstance>(graph_, materialize_one); - HNewInstance* materialization_ins_two = - FindSingleInstruction<HNewInstance>(graph_, materialize_two); - std::vector<HPhi*> phis; - std::tie(phis) = FindAllInstructions<HPhi>(graph_, set_two_and_escape); - HPhi* new_phi = FindOrNull( - phis.begin(), phis.end(), [&](auto p) { return p->GetType() == DataType::Type::kReference; }); - ASSERT_NE(new_phi, nullptr); - ASSERT_NE(materialization_ins_one, nullptr); - ASSERT_NE(materialization_ins_two, nullptr); - EXPECT_INS_EQ(materialization_ins_one, new_phi->InputAt(0)); - EXPECT_INS_EQ(materialization_ins_two, new_phi->InputAt(1)); - - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - EXPECT_TRUE(pred_get->GetTarget()->IsPhi()); - EXPECT_INS_EQ(pred_get->GetTarget()->InputAt(0), new_phi); - EXPECT_INS_EQ(pred_get->GetTarget()->InputAt(1), graph_->GetNullConstant()); - - EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(0), c0); - EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(1), c10); -} - -// // ENTRY -// obj = new Obj(); -// if (parameter_value) { -// // LEFT -// // DO NOT ELIMINATE -// obj.field = 1; -// escape(obj); -// return obj.field; -// } else { -// // RIGHT -// // ELIMINATE -// obj.field = 2; -// return obj.field; -// } -// EXIT -TEST_F(LoadStoreEliminationTest, PartialLoadElimination3) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList( - "entry", - "exit", - {{"entry", "left"}, {"entry", "right"}, {"left", "exit"}, {"right", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(left); - GET_BLOCK(right); -#undef GET_BLOCK - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c1 = graph_->GetIntConstant(1); - HInstruction* c2 = graph_->GetIntConstant(2); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_left = MakeIFieldSet(new_inst, c1, MemberOffset(32)); - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* read_left = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_left = new (GetAllocator()) HReturn(read_left); - left->AddInstruction(write_left); - left->AddInstruction(call_left); - left->AddInstruction(read_left); - left->AddInstruction(return_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* read_right = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_right = new (GetAllocator()) HReturn(read_right); - right->AddInstruction(write_right); - right->AddInstruction(read_right); - right->AddInstruction(return_right); - - SetupExit(exit); - - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - PerformLSE(); - - EXPECT_INS_REMOVED(read_right); - EXPECT_INS_REMOVED(write_right); - EXPECT_INS_RETAINED(write_left); - EXPECT_INS_RETAINED(call_left); - EXPECT_INS_RETAINED(read_left); -} - -// // ENTRY -// obj = new Obj(); -// if (parameter_value) { -// // LEFT -// // DO NOT ELIMINATE -// obj.field = 1; -// while (true) { -// bool esc = escape(obj); -// // DO NOT ELIMINATE -// obj.field = 3; -// if (esc) break; -// } -// // ELIMINATE. -// return obj.field; -// } else { -// // RIGHT -// // ELIMINATE -// obj.field = 2; -// return obj.field; -// } -// EXIT -TEST_F(LoadStoreEliminationTest, PartialLoadElimination4) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "entry_post"}, - {"entry_post", "right"}, - {"right", "exit"}, - {"entry_post", "left_pre"}, - {"left_pre", "left_loop"}, - {"left_loop", "left_loop"}, - {"left_loop", "left_finish"}, - {"left_finish", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(entry_post); - GET_BLOCK(exit); - GET_BLOCK(left_pre); - GET_BLOCK(left_loop); - GET_BLOCK(left_finish); - GET_BLOCK(right); -#undef GET_BLOCK - // Left-loops first successor is the break. - if (left_loop->GetSuccessors()[0] != left_finish) { - left_loop->SwapSuccessors(); - } - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c1 = graph_->GetIntConstant(1); - HInstruction* c2 = graph_->GetIntConstant(2); - HInstruction* c3 = graph_->GetIntConstant(3); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* goto_entry = new (GetAllocator()) HGoto(); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(goto_entry); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry_post->AddInstruction(if_inst); - - HInstruction* write_left_pre = MakeIFieldSet(new_inst, c1, MemberOffset(32)); - HInstruction* goto_left_pre = new (GetAllocator()) HGoto(); - left_pre->AddInstruction(write_left_pre); - left_pre->AddInstruction(goto_left_pre); - - HInstruction* suspend_left_loop = new (GetAllocator()) HSuspendCheck(); - HInstruction* call_left_loop = MakeInvoke(DataType::Type::kBool, { new_inst }); - HInstruction* write_left_loop = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* if_left_loop = new (GetAllocator()) HIf(call_left_loop); - left_loop->AddInstruction(suspend_left_loop); - left_loop->AddInstruction(call_left_loop); - left_loop->AddInstruction(write_left_loop); - left_loop->AddInstruction(if_left_loop); - suspend_left_loop->CopyEnvironmentFrom(cls->GetEnvironment()); - call_left_loop->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* read_left_end = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_left_end = new (GetAllocator()) HReturn(read_left_end); - left_finish->AddInstruction(read_left_end); - left_finish->AddInstruction(return_left_end); - - HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* read_right = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_right = new (GetAllocator()) HReturn(read_right); - right->AddInstruction(write_right); - right->AddInstruction(read_right); - right->AddInstruction(return_right); - - SetupExit(exit); - - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - PerformLSE(); - - EXPECT_INS_RETAINED(write_left_pre); - EXPECT_INS_REMOVED(read_right); - EXPECT_INS_REMOVED(write_right); - EXPECT_INS_RETAINED(write_left_loop); - EXPECT_INS_RETAINED(call_left_loop); - EXPECT_INS_REMOVED(read_left_end); -} - -// // ENTRY -// obj = new Obj(); -// if (parameter_value) { -// // LEFT -// // DO NOT ELIMINATE -// escape(obj); -// obj.field = 1; -// } else { -// // RIGHT -// // obj hasn't escaped so it's invisible. -// // ELIMINATE -// obj.field = 2; -// noescape(); -// } -// EXIT -// ELIMINATE -// return obj.field -TEST_F(LoadStoreEliminationTest, PartialLoadElimination5) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); -#undef GET_BLOCK - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c1 = graph_->GetIntConstant(1); - HInstruction* c2 = graph_->GetIntConstant(2); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* write_left = MakeIFieldSet(new_inst, c1, MemberOffset(32)); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left); - left->AddInstruction(write_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* call_right = MakeInvoke(DataType::Type::kVoid, {}); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(call_right); - right->AddInstruction(goto_right); - call_right->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - PerformLSE(); - - EXPECT_INS_REMOVED(read_bottom); - EXPECT_INS_REMOVED(write_right); - EXPECT_INS_RETAINED(write_left); - EXPECT_INS_RETAINED(call_left); - EXPECT_INS_RETAINED(call_right); -} - -// // ENTRY -// obj = new Obj(); -// // Eliminate this one. Object hasn't escaped yet so it's safe. -// obj.field = 3; -// noescape(); -// if (parameter_value) { -// // LEFT -// // DO NOT ELIMINATE -// obj.field = 5; -// escape(obj); -// obj.field = 1; -// } else { -// // RIGHT -// // ELIMINATE -// obj.field = 2; -// } -// EXIT -// ELIMINATE -// return obj.fid -TEST_F(LoadStoreEliminationTest, PartialLoadElimination6) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); -#undef GET_BLOCK - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c1 = graph_->GetIntConstant(1); - HInstruction* c2 = graph_->GetIntConstant(2); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c5 = graph_->GetIntConstant(5); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* call_entry = MakeInvoke(DataType::Type::kVoid, {}); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_entry); - entry->AddInstruction(call_entry); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - call_entry->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_left_start = MakeIFieldSet(new_inst, c5, MemberOffset(32)); - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* write_left = MakeIFieldSet(new_inst, c1, MemberOffset(32)); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(write_left_start); - left->AddInstruction(call_left); - left->AddInstruction(write_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - PerformLSE(); - - EXPECT_INS_REMOVED(read_bottom); - EXPECT_INS_REMOVED(write_right); - EXPECT_INS_REMOVED(write_entry); - EXPECT_INS_RETAINED(write_left_start); - EXPECT_INS_RETAINED(write_left); - EXPECT_INS_RETAINED(call_left); - EXPECT_INS_RETAINED(call_entry); -} - -// // ENTRY -// obj = new Obj(); -// if (parameter_value) { -// // LEFT -// // DO NOT ELIMINATE // obj.field = 1; // while (true) { // bool esc = escape(obj); @@ -4471,7 +2119,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved3) { left_pre->AddInstruction(goto_left_pre); HInstruction* suspend_left_loop = new (GetAllocator()) HSuspendCheck(); - HInstruction* call_left_loop = MakeInvoke(DataType::Type::kBool, { new_inst }); + HInstruction* call_left_loop = MakeInvoke(DataType::Type::kBool, {new_inst}); HInstruction* if_left_loop = new (GetAllocator()) HIf(call_left_loop); left_loop->AddInstruction(suspend_left_loop); left_loop->AddInstruction(call_left_loop); @@ -4496,7 +2144,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved3) { SetupExit(exit); - PerformLSENoPartial(blks); + PerformLSE(blks); EXPECT_INS_RETAINED(write_left_pre) << *write_left_pre; EXPECT_INS_RETAINED(read_return) << *read_return; @@ -4588,7 +2236,7 @@ TEST_F(LoadStoreEliminationTest, DISABLED_PartialLoadPreserved4) { call_left_loop->CopyEnvironmentFrom(cls->GetEnvironment()); HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* call_right = MakeInvoke(DataType::Type::kBool, { new_inst }); + HInstruction* call_right = MakeInvoke(DataType::Type::kBool, {new_inst}); HInstruction* goto_right = new (GetAllocator()) HGoto(); right->AddInstruction(write_right); right->AddInstruction(call_right); @@ -4602,7 +2250,7 @@ TEST_F(LoadStoreEliminationTest, DISABLED_PartialLoadPreserved4) { SetupExit(exit); - PerformLSENoPartial(blks); + PerformLSE(blks); EXPECT_INS_RETAINED(read_return); EXPECT_INS_RETAINED(write_right); @@ -4688,7 +2336,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved5) { SetupExit(exit); - PerformLSENoPartial(blks); + PerformLSE(blks); EXPECT_INS_RETAINED(read_bottom); EXPECT_INS_RETAINED(write_right); @@ -4771,7 +2419,7 @@ TEST_F(LoadStoreEliminationTest, DISABLED_PartialLoadPreserved6) { SetupExit(exit); - PerformLSENoPartial(blks); + PerformLSE(blks); EXPECT_INS_REMOVED(read_bottom); EXPECT_INS_REMOVED(write_right); @@ -4780,3894 +2428,4 @@ TEST_F(LoadStoreEliminationTest, DISABLED_PartialLoadPreserved6) { EXPECT_INS_RETAINED(call_left); EXPECT_INS_RETAINED(call_entry); } - -// // ENTRY -// // MOVED TO MATERIALIZATION BLOCK -// obj = new Obj(); -// ELIMINATE, moved to materialization block. Kept by escape. -// obj.field = 3; -// // Make sure this graph isn't broken -// if (obj ==/!= (STATIC.VALUE|obj|null)) { -// // partial_BLOCK -// // REMOVE (either from unreachable or normal PHI creation) -// obj.field = 4; -// } -// if (parameter_value) { -// // LEFT -// // DO NOT ELIMINATE -// escape(obj); -// } else { -// // RIGHT -// // ELIMINATE -// obj.field = 2; -// } -// EXIT -// PREDICATED GET -// return obj.field -TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "first_block"}, - {"first_block", "critical_break"}, - {"first_block", "partial"}, - {"partial", "merge"}, - {"critical_break", "merge"}, - {"merge", "left"}, - {"merge", "right"}, - {"left", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(first_block); - GET_BLOCK(merge); - GET_BLOCK(partial); - GET_BLOCK(critical_break); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); -#undef GET_BLOCK - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c2 = graph_->GetIntConstant(2); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c4 = graph_->GetIntConstant(4); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - ComparisonInstructions cmp_instructions = GetComparisonInstructions(new_inst); - HInstruction* if_inst = new (GetAllocator()) HIf(cmp_instructions.cmp_); - first_block->AddInstruction(cls); - first_block->AddInstruction(new_inst); - first_block->AddInstruction(write_entry); - cmp_instructions.AddSetup(first_block); - first_block->AddInstruction(cmp_instructions.cmp_); - first_block->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - cmp_instructions.AddEnvironment(cls->GetEnvironment()); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_partial = MakeIFieldSet(new_inst, c4, MemberOffset(32)); - HInstruction* goto_partial = new (GetAllocator()) HGoto(); - partial->AddInstruction(write_partial); - partial->AddInstruction(goto_partial); - - HInstruction* goto_crit_break = new (GetAllocator()) HGoto(); - critical_break->AddInstruction(goto_crit_break); - - HInstruction* if_merge = new (GetAllocator()) HIf(bool_value); - merge->AddInstruction(if_merge); - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - std::vector<HPhi*> merges; - HPredicatedInstanceFieldGet* pred_get; - HInstanceFieldSet* init_set; - std::tie(pred_get, init_set) = - FindSingleInstructions<HPredicatedInstanceFieldGet, HInstanceFieldSet>(graph_); - std::tie(merges) = FindAllInstructions<HPhi>(graph_); - ASSERT_EQ(merges.size(), 3u); - HPhi* merge_value_return = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kInt32 && p->GetBlock() == breturn; - }); - HPhi* merge_value_top = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kInt32 && p->GetBlock() != breturn; - }); - HPhi* merge_alloc = FindOrNull(merges.begin(), merges.end(), [](HPhi* p) { - return p->GetType() == DataType::Type::kReference; - }); - EXPECT_INS_REMOVED(read_bottom); - EXPECT_INS_REMOVED(write_entry); - EXPECT_INS_REMOVED(write_partial); - EXPECT_INS_RETAINED(call_left); - CheckFinalInstruction(if_inst->InputAt(0), ComparisonPlacement::kBeforeEscape); - EXPECT_INS_EQ(init_set->InputAt(1), merge_value_top); - EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc); - EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return); -} - -// // ENTRY -// // MOVED TO MATERIALIZATION BLOCK -// obj = new Obj(); -// ELIMINATE, moved to materialization block. Kept by escape. -// obj.field = 3; -// // Make sure this graph isn't broken -// if (parameter_value) { -// if (obj ==/!= (STATIC.VALUE|obj|null)) { -// // partial_BLOCK -// obj.field = 4; -// } -// // LEFT -// // DO NOT ELIMINATE -// escape(obj); -// } else { -// // RIGHT -// // ELIMINATE -// obj.field = 2; -// } -// EXIT -// PREDICATED GET -// return obj.field -TEST_P(PartialComparisonTestGroup, PartialComparisonInCohortBeforeEscape) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left_begin"}, - {"left_begin", "partial"}, - {"left_begin", "left_crit_break"}, - {"left_crit_break", "left"}, - {"partial", "left"}, - {"entry", "right"}, - {"left", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(partial); - GET_BLOCK(left_begin); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(left_crit_break); - GET_BLOCK(right); -#undef GET_BLOCK - EnsurePredecessorOrder(left, {left_crit_break, partial}); - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c2 = graph_->GetIntConstant(2); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c4 = graph_->GetIntConstant(4); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_entry); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - ComparisonInstructions cmp_instructions = GetComparisonInstructions(new_inst); - HInstruction* if_left_begin = new (GetAllocator()) HIf(cmp_instructions.cmp_); - cmp_instructions.AddSetup(left_begin); - left_begin->AddInstruction(cmp_instructions.cmp_); - left_begin->AddInstruction(if_left_begin); - cmp_instructions.AddEnvironment(cls->GetEnvironment()); - - left_crit_break->AddInstruction(new (GetAllocator()) HGoto()); - - HInstruction* write_partial = MakeIFieldSet(new_inst, c4, MemberOffset(32)); - HInstruction* goto_partial = new (GetAllocator()) HGoto(); - partial->AddInstruction(write_partial); - partial->AddInstruction(goto_partial); - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - std::vector<HPhi*> merges; - HInstanceFieldSet* init_set = - FindSingleInstruction<HInstanceFieldSet>(graph_, left_begin->GetSinglePredecessor()); - HInstanceFieldSet* partial_set = FindSingleInstruction<HInstanceFieldSet>(graph_, partial); - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_); - std::tie(merges) = FindAllInstructions<HPhi>(graph_); - ASSERT_EQ(merges.size(), 2u); - HPhi* merge_value_return = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kInt32; - }); - HPhi* merge_alloc = FindOrNull(merges.begin(), merges.end(), [](HPhi* p) { - return p->GetType() == DataType::Type::kReference; - }); - EXPECT_EQ(merge_value_return->GetBlock(), breturn) - << blks.GetName(merge_value_return->GetBlock()); - EXPECT_INS_REMOVED(read_bottom); - EXPECT_INS_REMOVED(write_entry); - EXPECT_INS_RETAINED(write_partial); - EXPECT_INS_RETAINED(call_left); - CheckFinalInstruction(if_left_begin->InputAt(0), ComparisonPlacement::kInEscape); - EXPECT_INS_EQ(init_set->InputAt(1), c3); - EXPECT_INS_EQ(partial_set->InputAt(0), init_set->InputAt(0)); - EXPECT_INS_EQ(partial_set->InputAt(1), c4); - EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc); - EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return); -} - -// // ENTRY -// // MOVED TO MATERIALIZATION BLOCK -// obj = new Obj(); -// ELIMINATE, moved to materialization block. Kept by escape. -// obj.field = 3; -// // Make sure this graph isn't broken -// if (parameter_value) { -// // LEFT -// // DO NOT ELIMINATE -// escape(obj); -// } else { -// // RIGHT -// // ELIMINATE -// obj.field = 2; -// } -// if (obj ==/!= (STATIC.VALUE|obj|null)) { -// // partial_BLOCK -// obj.field = 4; -// } -// EXIT -// PREDICATED GET -// return obj.field -TEST_P(PartialComparisonTestGroup, PartialComparisonAfterCohort) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "merge"}, - {"right", "merge"}, - {"merge", "critical_break"}, - {"critical_break", "breturn"}, - {"merge", "partial"}, - {"partial", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(partial); - GET_BLOCK(critical_break); - GET_BLOCK(merge); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {critical_break, partial}); - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c2 = graph_->GetIntConstant(2); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c4 = graph_->GetIntConstant(4); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_entry); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - ComparisonInstructions cmp_instructions = GetComparisonInstructions(new_inst); - HInstruction* if_merge = new (GetAllocator()) HIf(cmp_instructions.cmp_); - cmp_instructions.AddSetup(merge); - merge->AddInstruction(cmp_instructions.cmp_); - merge->AddInstruction(if_merge); - cmp_instructions.AddEnvironment(cls->GetEnvironment()); - - HInstanceFieldSet* write_partial = MakeIFieldSet(new_inst, c4, MemberOffset(32)); - HInstruction* goto_partial = new (GetAllocator()) HGoto(); - partial->AddInstruction(write_partial); - partial->AddInstruction(goto_partial); - - HInstruction* goto_crit_break = new (GetAllocator()) HGoto(); - critical_break->AddInstruction(goto_crit_break); - - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - std::vector<HPhi*> merges; - HInstanceFieldSet* init_set = - FindSingleInstruction<HInstanceFieldSet>(graph_, left->GetSinglePredecessor()); - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_); - std::tie(merges) = FindAllInstructions<HPhi>(graph_); - ASSERT_EQ(merges.size(), 3u); - HPhi* merge_value_return = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kInt32 && p->GetBlock() == breturn; - }); - HPhi* merge_alloc = FindOrNull(merges.begin(), merges.end(), [](HPhi* p) { - return p->GetType() == DataType::Type::kReference; - }); - EXPECT_INS_REMOVED(read_bottom); - EXPECT_INS_REMOVED(write_entry); - EXPECT_INS_RETAINED(write_partial); - EXPECT_TRUE(write_partial->GetIsPredicatedSet()); - EXPECT_INS_RETAINED(call_left); - CheckFinalInstruction(if_merge->InputAt(0), ComparisonPlacement::kAfterEscape); - EXPECT_INS_EQ(init_set->InputAt(1), c3); - ASSERT_TRUE(write_partial->InputAt(0)->IsPhi()); - EXPECT_INS_EQ(write_partial->InputAt(0)->AsPhi()->InputAt(0), init_set->InputAt(0)); - EXPECT_INS_EQ(write_partial->InputAt(1), c4); - EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc); - EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return); -} - -// // ENTRY -// // MOVED TO MATERIALIZATION BLOCK -// obj = new Obj(); -// ELIMINATE, moved to materialization block. Kept by escape. -// obj.field = 3; -// // Make sure this graph isn't broken -// if (parameter_value) { -// // LEFT -// // DO NOT ELIMINATE -// escape(obj); -// if (obj ==/!= (STATIC.VALUE|obj|null)) { -// // partial_BLOCK -// obj.field = 4; -// } -// } else { -// // RIGHT -// // ELIMINATE -// obj.field = 2; -// } -// EXIT -// PREDICATED GET -// return obj.field -TEST_P(PartialComparisonTestGroup, PartialComparisonInCohortAfterEscape) { - PartialComparisonKind kind = GetParam(); - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"left", "partial"}, - {"partial", "left_end"}, - {"left", "left_crit_break"}, - {"left_crit_break", "left_end"}, - {"left_end", "breturn"}, - {"entry", "right"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(partial); - GET_BLOCK(left_end); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(left_crit_break); - GET_BLOCK(right); -#undef GET_BLOCK - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c2 = graph_->GetIntConstant(2); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c4 = graph_->GetIntConstant(4); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_entry); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - ComparisonInstructions cmp_instructions = GetComparisonInstructions(new_inst); - HInstruction* if_left = new (GetAllocator()) HIf(cmp_instructions.cmp_); - left->AddInstruction(call_left); - cmp_instructions.AddSetup(left); - left->AddInstruction(cmp_instructions.cmp_); - left->AddInstruction(if_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - cmp_instructions.AddEnvironment(cls->GetEnvironment()); - if (if_left->AsIf()->IfTrueSuccessor() != partial) { - left->SwapSuccessors(); - } - - HInstruction* write_partial = MakeIFieldSet(new_inst, c4, MemberOffset(32)); - HInstruction* goto_partial = new (GetAllocator()) HGoto(); - partial->AddInstruction(write_partial); - partial->AddInstruction(goto_partial); - - HInstruction* goto_left_crit_break = new (GetAllocator()) HGoto(); - left_crit_break->AddInstruction(goto_left_crit_break); - - HInstruction* goto_left_end = new (GetAllocator()) HGoto(); - left_end->AddInstruction(goto_left_end); - - HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - std::vector<HPhi*> merges; - std::vector<HInstanceFieldSet*> sets; - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_); - std::tie(merges, sets) = FindAllInstructions<HPhi, HInstanceFieldSet>(graph_); - ASSERT_EQ(merges.size(), 2u); - ASSERT_EQ(sets.size(), 2u); - HInstanceFieldSet* init_set = FindOrNull(sets.begin(), sets.end(), [&](HInstanceFieldSet* s) { - return s->GetBlock()->GetSingleSuccessor() == left; - }); - EXPECT_INS_EQ(init_set->InputAt(1), c3); - HPhi* merge_value_return = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kInt32 && p->GetBlock() == breturn; - }); - HPhi* merge_alloc = FindOrNull(merges.begin(), merges.end(), [](HPhi* p) { - return p->GetType() == DataType::Type::kReference; - }); - EXPECT_INS_REMOVED(read_bottom); - EXPECT_INS_REMOVED(write_entry); - if (kind.IsPossiblyTrue()) { - EXPECT_INS_RETAINED(write_partial); - EXPECT_TRUE(std::find(sets.begin(), sets.end(), write_partial) != sets.end()); - } - EXPECT_INS_RETAINED(call_left); - CheckFinalInstruction(if_left->InputAt(0), ComparisonPlacement::kInEscape); - EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc); - EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return); -} - -INSTANTIATE_TEST_SUITE_P( - LoadStoreEliminationTest, - PartialComparisonTestGroup, - testing::Values(PartialComparisonKind{PartialComparisonKind::Type::kEquals, - PartialComparisonKind::Target::kNull, - PartialComparisonKind::Position::kLeft}, - PartialComparisonKind{PartialComparisonKind::Type::kEquals, - PartialComparisonKind::Target::kNull, - PartialComparisonKind::Position::kRight}, - PartialComparisonKind{PartialComparisonKind::Type::kEquals, - PartialComparisonKind::Target::kValue, - PartialComparisonKind::Position::kLeft}, - PartialComparisonKind{PartialComparisonKind::Type::kEquals, - PartialComparisonKind::Target::kValue, - PartialComparisonKind::Position::kRight}, - PartialComparisonKind{PartialComparisonKind::Type::kEquals, - PartialComparisonKind::Target::kSelf, - PartialComparisonKind::Position::kLeft}, - PartialComparisonKind{PartialComparisonKind::Type::kNotEquals, - PartialComparisonKind::Target::kNull, - PartialComparisonKind::Position::kLeft}, - PartialComparisonKind{PartialComparisonKind::Type::kNotEquals, - PartialComparisonKind::Target::kNull, - PartialComparisonKind::Position::kRight}, - PartialComparisonKind{PartialComparisonKind::Type::kNotEquals, - PartialComparisonKind::Target::kSelf, - PartialComparisonKind::Position::kLeft}, - PartialComparisonKind{PartialComparisonKind::Type::kNotEquals, - PartialComparisonKind::Target::kValue, - PartialComparisonKind::Position::kLeft}, - PartialComparisonKind{PartialComparisonKind::Type::kNotEquals, - PartialComparisonKind::Target::kValue, - PartialComparisonKind::Position::kRight})); - -// // ENTRY -// obj = new Obj(); -// if (parameter_value) { -// // LEFT -// escape(obj); -// } else { -// // RIGHT -// // ELIMINATE -// obj.field = 2; -// } -// EXIT -// predicated-ELIMINATE -// obj.field = 3; -TEST_F(LoadStoreEliminationTest, PredicatedStore1) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - InitGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {left, right}); - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* null_const = graph_->GetNullConstant(); - HInstruction* c2 = graph_->GetIntConstant(2); - HInstruction* c3 = graph_->GetIntConstant(3); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* write_bottom = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturnVoid(); - breturn->AddInstruction(write_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_RETAINED(write_bottom); - EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet()); - EXPECT_INS_REMOVED(write_right); - EXPECT_INS_RETAINED(call_left); - HPhi* merge_alloc = FindSingleInstruction<HPhi>(graph_, breturn); - ASSERT_NE(merge_alloc, nullptr); - EXPECT_TRUE(merge_alloc->InputAt(0)->IsNewInstance()) << *merge_alloc; - EXPECT_EQ(merge_alloc->InputAt(0)->InputAt(0), cls) << *merge_alloc << " cls? " << *cls; - EXPECT_EQ(merge_alloc->InputAt(1), null_const); -} - -// // ENTRY -// obj = new Obj(); -// obj.field = 3; -// if (parameter_value) { -// // LEFT -// escape(obj); -// } else { -// // RIGHT -// // ELIMINATE -// obj.field = 2; -// } -// // MERGE -// if (second_param) { -// // NON_ESCAPE -// obj.field = 1; -// noescape(); -// } -// EXIT -// predicated-ELIMINATE -// obj.field = 4; -TEST_F(LoadStoreEliminationTest, PredicatedStore2) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "merge"}, - {"right", "merge"}, - {"merge", "non_escape"}, - {"non_escape", "breturn"}, - {"merge", "merge_crit_break"}, - {"merge_crit_break", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); - GET_BLOCK(merge); - GET_BLOCK(merge_crit_break); - GET_BLOCK(non_escape); -#undef GET_BLOCK - EnsurePredecessorOrder(merge, {left, right}); - EnsurePredecessorOrder(breturn, {merge_crit_break, non_escape}); - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* bool_value2 = MakeParam(DataType::Type::kBool); - HInstruction* null_const = graph_->GetNullConstant(); - HInstruction* c1 = graph_->GetIntConstant(3); - HInstruction* c2 = graph_->GetIntConstant(2); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c4 = graph_->GetIntConstant(4); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_entry); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* merge_if = new (GetAllocator()) HIf(bool_value2); - merge->AddInstruction(merge_if); - - merge_crit_break->AddInstruction(new (GetAllocator()) HGoto()); - - HInstruction* write_non_escape = MakeIFieldSet(new_inst, c1, MemberOffset(32)); - HInstruction* non_escape_call = MakeInvoke(DataType::Type::kVoid, {}); - HInstruction* non_escape_goto = new (GetAllocator()) HGoto(); - non_escape->AddInstruction(write_non_escape); - non_escape->AddInstruction(non_escape_call); - non_escape->AddInstruction(non_escape_goto); - non_escape_call->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_bottom = MakeIFieldSet(new_inst, c4, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturnVoid(); - breturn->AddInstruction(write_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_RETAINED(write_bottom); - EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_bottom; - EXPECT_INS_REMOVED(write_right); - EXPECT_INS_RETAINED(call_left); - HInstanceFieldSet* pred_set = FindSingleInstruction<HInstanceFieldSet>(graph_, breturn); - HPhi* merge_alloc = FindSingleInstruction<HPhi>(graph_); - ASSERT_NE(merge_alloc, nullptr); - EXPECT_TRUE(merge_alloc->InputAt(0)->IsNewInstance()) << *merge_alloc; - EXPECT_INS_EQ(merge_alloc->InputAt(0)->InputAt(0), cls) << " phi is: " << *merge_alloc; - EXPECT_INS_EQ(merge_alloc->InputAt(1), null_const); - ASSERT_NE(pred_set, nullptr); - EXPECT_TRUE(pred_set->GetIsPredicatedSet()) << *pred_set; - EXPECT_INS_EQ(pred_set->InputAt(0), merge_alloc); -} - -// // ENTRY -// obj = new Obj(); -// obj.field = 3; -// if (parameter_value) { -// // LEFT -// escape(obj); -// } else { -// // RIGHT -// // ELIMINATE -// obj.field = 2; -// } -// EXIT -// predicated-ELIMINATE -// return obj.field -TEST_F(LoadStoreEliminationTest, PredicatedLoad1) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {left, right}); - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* null_const = graph_->GetNullConstant(); - HInstruction* c2 = graph_->GetIntConstant(2); - HInstruction* c3 = graph_->GetIntConstant(3); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_entry); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_REMOVED(read_bottom); - EXPECT_INS_REMOVED(write_right); - EXPECT_INS_RETAINED(call_left); - std::vector<HPhi*> merges; - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - std::tie(merges) = FindAllInstructions<HPhi>(graph_, breturn); - ASSERT_EQ(merges.size(), 2u); - HPhi* merge_value_return = FindOrNull( - merges.begin(), merges.end(), [](HPhi* p) { return p->GetType() == DataType::Type::kInt32; }); - HPhi* merge_alloc = FindOrNull(merges.begin(), merges.end(), [](HPhi* p) { - return p->GetType() == DataType::Type::kReference; - }); - ASSERT_NE(merge_alloc, nullptr); - EXPECT_TRUE(merge_alloc->InputAt(0)->IsNewInstance()) << *merge_alloc; - EXPECT_EQ(merge_alloc->InputAt(0)->InputAt(0), cls) << *merge_alloc << " cls? " << *cls; - EXPECT_EQ(merge_alloc->InputAt(1), null_const); - ASSERT_NE(pred_get, nullptr); - EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc); - EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return) << " pred-get is: " << *pred_get; - EXPECT_INS_EQ(merge_value_return->InputAt(0), graph_->GetIntConstant(0)) - << " merge val is: " << *merge_value_return; - EXPECT_INS_EQ(merge_value_return->InputAt(1), c2) << " merge val is: " << *merge_value_return; -} - -// // ENTRY -// obj1 = new Obj1(); -// obj2 = new Obj2(); -// obj1.field = 3; -// obj2.field = 13; -// if (parameter_value) { -// // LEFT -// escape(obj1); -// escape(obj2); -// } else { -// // RIGHT -// // ELIMINATE -// obj1.field = 2; -// obj2.field = 12; -// } -// EXIT -// predicated-ELIMINATE -// return obj1.field + obj2.field -TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad1) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {left, right}); - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c2 = graph_->GetIntConstant(2); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c12 = graph_->GetIntConstant(12); - HInstruction* c13 = graph_->GetIntConstant(13); - - HInstruction* cls1 = MakeClassLoad(); - HInstruction* cls2 = MakeClassLoad(); - HInstruction* new_inst1 = MakeNewInstance(cls1); - HInstruction* new_inst2 = MakeNewInstance(cls2); - HInstruction* write_entry1 = MakeIFieldSet(new_inst1, c3, MemberOffset(32)); - HInstruction* write_entry2 = MakeIFieldSet(new_inst2, c13, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls1); - entry->AddInstruction(cls2); - entry->AddInstruction(new_inst1); - entry->AddInstruction(new_inst2); - entry->AddInstruction(write_entry1); - entry->AddInstruction(write_entry2); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls1, {}); - cls2->CopyEnvironmentFrom(cls1->GetEnvironment()); - new_inst1->CopyEnvironmentFrom(cls1->GetEnvironment()); - new_inst2->CopyEnvironmentFrom(cls1->GetEnvironment()); - - HInstruction* call_left1 = MakeInvoke(DataType::Type::kVoid, { new_inst1 }); - HInstruction* call_left2 = MakeInvoke(DataType::Type::kVoid, { new_inst2 }); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left1); - left->AddInstruction(call_left2); - left->AddInstruction(goto_left); - call_left1->CopyEnvironmentFrom(cls1->GetEnvironment()); - call_left2->CopyEnvironmentFrom(cls1->GetEnvironment()); - - HInstruction* write_right1 = MakeIFieldSet(new_inst1, c2, MemberOffset(32)); - HInstruction* write_right2 = MakeIFieldSet(new_inst2, c12, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right1); - right->AddInstruction(write_right2); - right->AddInstruction(goto_right); - - HInstruction* read_bottom1 = MakeIFieldGet(new_inst1, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* read_bottom2 = MakeIFieldGet(new_inst2, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* combine = - new (GetAllocator()) HAdd(DataType::Type::kInt32, read_bottom1, read_bottom2); - HInstruction* return_exit = new (GetAllocator()) HReturn(combine); - breturn->AddInstruction(read_bottom1); - breturn->AddInstruction(read_bottom2); - breturn->AddInstruction(combine); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_REMOVED(read_bottom1); - EXPECT_INS_REMOVED(read_bottom2); - EXPECT_INS_REMOVED(write_right1); - EXPECT_INS_REMOVED(write_right2); - EXPECT_INS_RETAINED(call_left1); - EXPECT_INS_RETAINED(call_left2); - std::vector<HPhi*> merges; - std::vector<HPredicatedInstanceFieldGet*> pred_gets; - std::tie(merges, pred_gets) = - FindAllInstructions<HPhi, HPredicatedInstanceFieldGet>(graph_, breturn); - ASSERT_EQ(merges.size(), 4u); - ASSERT_EQ(pred_gets.size(), 2u); - HPhi* merge_value_return1 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kInt32 && p->InputAt(1) == c2; - }); - HPhi* merge_value_return2 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kInt32 && p->InputAt(1) == c12; - }); - HPhi* merge_alloc1 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kReference && - p->InputAt(0)->IsNewInstance() && - p->InputAt(0)->InputAt(0) == cls1; - }); - HPhi* merge_alloc2 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kReference && - p->InputAt(0)->IsNewInstance() && - p->InputAt(0)->InputAt(0) == cls2; - }); - ASSERT_NE(merge_alloc1, nullptr); - ASSERT_NE(merge_alloc2, nullptr); - EXPECT_EQ(merge_alloc1->InputAt(1), graph_->GetNullConstant()); - EXPECT_EQ(merge_alloc2->InputAt(1), graph_->GetNullConstant()); - HPredicatedInstanceFieldGet* pred_get1 = - FindOrNull(pred_gets.begin(), pred_gets.end(), [&](HPredicatedInstanceFieldGet* pg) { - return pg->GetTarget() == merge_alloc1; - }); - HPredicatedInstanceFieldGet* pred_get2 = - FindOrNull(pred_gets.begin(), pred_gets.end(), [&](HPredicatedInstanceFieldGet* pg) { - return pg->GetTarget() == merge_alloc2; - }); - ASSERT_NE(pred_get1, nullptr); - EXPECT_INS_EQ(pred_get1->GetTarget(), merge_alloc1); - EXPECT_INS_EQ(pred_get1->GetDefaultValue(), merge_value_return1) - << " pred-get is: " << *pred_get1; - EXPECT_INS_EQ(merge_value_return1->InputAt(0), graph_->GetIntConstant(0)) - << " merge val is: " << *merge_value_return1; - EXPECT_INS_EQ(merge_value_return1->InputAt(1), c2) << " merge val is: " << *merge_value_return1; - ASSERT_NE(pred_get2, nullptr); - EXPECT_INS_EQ(pred_get2->GetTarget(), merge_alloc2); - EXPECT_INS_EQ(pred_get2->GetDefaultValue(), merge_value_return2) - << " pred-get is: " << *pred_get2; - EXPECT_INS_EQ(merge_value_return2->InputAt(0), graph_->GetIntConstant(0)) - << " merge val is: " << *merge_value_return1; - EXPECT_INS_EQ(merge_value_return2->InputAt(1), c12) << " merge val is: " << *merge_value_return1; -} - -// // ENTRY -// obj1 = new Obj1(); -// obj2 = new Obj2(); -// obj1.field = 3; -// obj2.field = 13; -// if (parameter_value) { -// // LEFT -// escape(obj1); -// // ELIMINATE -// obj2.field = 12; -// } else { -// // RIGHT -// // ELIMINATE -// obj1.field = 2; -// escape(obj2); -// } -// EXIT -// predicated-ELIMINATE -// return obj1.field + obj2.field -TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad2) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {left, right}); - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c2 = graph_->GetIntConstant(2); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c12 = graph_->GetIntConstant(12); - HInstruction* c13 = graph_->GetIntConstant(13); - - HInstruction* cls1 = MakeClassLoad(); - HInstruction* cls2 = MakeClassLoad(); - HInstruction* new_inst1 = MakeNewInstance(cls1); - HInstruction* new_inst2 = MakeNewInstance(cls2); - HInstruction* write_entry1 = MakeIFieldSet(new_inst1, c3, MemberOffset(32)); - HInstruction* write_entry2 = MakeIFieldSet(new_inst2, c13, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls1); - entry->AddInstruction(cls2); - entry->AddInstruction(new_inst1); - entry->AddInstruction(new_inst2); - entry->AddInstruction(write_entry1); - entry->AddInstruction(write_entry2); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls1, {}); - cls2->CopyEnvironmentFrom(cls1->GetEnvironment()); - new_inst1->CopyEnvironmentFrom(cls1->GetEnvironment()); - new_inst2->CopyEnvironmentFrom(cls1->GetEnvironment()); - - HInstruction* call_left1 = MakeInvoke(DataType::Type::kVoid, { new_inst1 }); - HInstruction* write_left2 = MakeIFieldSet(new_inst2, c12, MemberOffset(32)); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left1); - left->AddInstruction(write_left2); - left->AddInstruction(goto_left); - call_left1->CopyEnvironmentFrom(cls1->GetEnvironment()); - - HInstruction* write_right1 = MakeIFieldSet(new_inst1, c2, MemberOffset(32)); - HInstruction* call_right2 = MakeInvoke(DataType::Type::kVoid, { new_inst2 }); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right1); - right->AddInstruction(call_right2); - right->AddInstruction(goto_right); - call_right2->CopyEnvironmentFrom(cls1->GetEnvironment()); - - HInstruction* read_bottom1 = MakeIFieldGet(new_inst1, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* read_bottom2 = MakeIFieldGet(new_inst2, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* combine = - new (GetAllocator()) HAdd(DataType::Type::kInt32, read_bottom1, read_bottom2); - HInstruction* return_exit = new (GetAllocator()) HReturn(combine); - breturn->AddInstruction(read_bottom1); - breturn->AddInstruction(read_bottom2); - breturn->AddInstruction(combine); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_REMOVED(read_bottom1); - EXPECT_INS_REMOVED(read_bottom2); - EXPECT_INS_REMOVED(write_right1); - EXPECT_INS_REMOVED(write_left2); - EXPECT_INS_RETAINED(call_left1); - EXPECT_INS_RETAINED(call_right2); - std::vector<HPhi*> merges; - std::vector<HPredicatedInstanceFieldGet*> pred_gets; - std::tie(merges, pred_gets) = - FindAllInstructions<HPhi, HPredicatedInstanceFieldGet>(graph_, breturn); - ASSERT_EQ(merges.size(), 4u); - ASSERT_EQ(pred_gets.size(), 2u); - HPhi* merge_value_return1 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kInt32 && p->InputAt(1) == c2; - }); - HPhi* merge_value_return2 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kInt32 && p->InputAt(0) == c12; - }); - HPhi* merge_alloc1 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kReference && p->InputAt(1)->IsNullConstant(); - }); - HPhi* merge_alloc2 = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kReference && p->InputAt(0)->IsNullConstant(); - }); - ASSERT_NE(merge_alloc1, nullptr); - ASSERT_NE(merge_alloc2, nullptr); - EXPECT_TRUE(merge_alloc1->InputAt(0)->IsNewInstance()) << *merge_alloc1; - EXPECT_INS_EQ(merge_alloc1->InputAt(0)->InputAt(0), cls1) << *merge_alloc1; - EXPECT_INS_EQ(merge_alloc1->InputAt(1), graph_->GetNullConstant()); - EXPECT_TRUE(merge_alloc2->InputAt(1)->IsNewInstance()) << *merge_alloc2; - EXPECT_INS_EQ(merge_alloc2->InputAt(1)->InputAt(0), cls2) << *merge_alloc2; - EXPECT_INS_EQ(merge_alloc2->InputAt(0), graph_->GetNullConstant()); - HPredicatedInstanceFieldGet* pred_get1 = - FindOrNull(pred_gets.begin(), pred_gets.end(), [&](HPredicatedInstanceFieldGet* pg) { - return pg->GetTarget() == merge_alloc1; - }); - HPredicatedInstanceFieldGet* pred_get2 = - FindOrNull(pred_gets.begin(), pred_gets.end(), [&](HPredicatedInstanceFieldGet* pg) { - return pg->GetTarget() == merge_alloc2; - }); - ASSERT_NE(pred_get1, nullptr); - EXPECT_INS_EQ(pred_get1->GetTarget(), merge_alloc1); - EXPECT_INS_EQ(pred_get1->GetDefaultValue(), merge_value_return1) - << " pred-get is: " << *pred_get1; - EXPECT_INS_EQ(merge_value_return1->InputAt(0), graph_->GetIntConstant(0)) - << " merge val is: " << *merge_value_return1; - EXPECT_INS_EQ(merge_value_return1->InputAt(1), c2) << " merge val is: " << *merge_value_return1; - ASSERT_NE(pred_get2, nullptr); - EXPECT_INS_EQ(pred_get2->GetTarget(), merge_alloc2); - EXPECT_INS_EQ(pred_get2->GetDefaultValue(), merge_value_return2) - << " pred-get is: " << *pred_get2; - EXPECT_INS_EQ(merge_value_return2->InputAt(1), graph_->GetIntConstant(0)) - << " merge val is: " << *merge_value_return1; - EXPECT_INS_EQ(merge_value_return2->InputAt(0), c12) << " merge val is: " << *merge_value_return1; -} - -// Based on structure seen in `java.util.List -// java.util.Collections.checkedList(java.util.List, java.lang.Class)` -// Incorrect accounting would cause attempts to materialize both obj1 and obj2 -// in each of the materialization blocks. -// // ENTRY -// Obj obj; -// if (param1) { -// // needs to be moved after param2 check -// obj1 = new Obj1(); -// obj1.foo = 33; -// if (param2) { -// return obj1.foo; -// } -// obj = obj1; -// } else { -// obj2 = new Obj2(); -// obj2.foo = 44; -// if (param2) { -// return obj2.foo; -// } -// obj = obj2; -// } -// EXIT -// // obj = PHI[obj1, obj2] -// // NB The phi acts as an escape for both obj1 and obj2 meaning as far as the -// // LSA is concerned the escape frontier is left_crit_break->breturn and -// // right_crit_break->breturn for both even though only one of the objects is -// // actually live at each edge. -// // TODO In the future we really should track liveness through PHIs which would -// // allow us to entirely remove the allocation in this test. -// return obj.foo; -TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad3) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"left", "left_end"}, - {"left_end", "breturn"}, - {"left", "left_exit_early"}, - {"left_exit_early", "exit"}, - {"entry", "right"}, - {"right", "right_end"}, - {"right_end", "breturn"}, - {"right", "right_exit_early"}, - {"right_exit_early", "exit"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(left_end); - GET_BLOCK(left_exit_early); - GET_BLOCK(right); - GET_BLOCK(right_end); - GET_BLOCK(right_exit_early); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {left_end, right_end}); - HInstruction* param1 = MakeParam(DataType::Type::kBool); - HInstruction* param2 = MakeParam(DataType::Type::kBool); - HInstruction* c33 = graph_->GetIntConstant(33); - HInstruction* c44 = graph_->GetIntConstant(44); - - HInstruction* if_inst = new (GetAllocator()) HIf(param1); - entry->AddInstruction(if_inst); - - HInstruction* cls1 = MakeClassLoad(); - HInstruction* new_inst1 = MakeNewInstance(cls1); - HInstruction* write1 = MakeIFieldSet(new_inst1, c33, MemberOffset(32)); - HInstruction* if_left = new (GetAllocator()) HIf(param2); - left->AddInstruction(cls1); - left->AddInstruction(new_inst1); - left->AddInstruction(write1); - left->AddInstruction(if_left); - ManuallyBuildEnvFor(cls1, {}); - new_inst1->CopyEnvironmentFrom(cls1->GetEnvironment()); - - left_end->AddInstruction(new (GetAllocator()) HGoto()); - - HInstruction* early_exit_left_read = - MakeIFieldGet(new_inst1, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* early_exit_left_return = new (GetAllocator()) HReturn(early_exit_left_read); - left_exit_early->AddInstruction(early_exit_left_read); - left_exit_early->AddInstruction(early_exit_left_return); - - HInstruction* cls2 = MakeClassLoad(); - HInstruction* new_inst2 = MakeNewInstance(cls2); - HInstruction* write2 = MakeIFieldSet(new_inst2, c44, MemberOffset(32)); - HInstruction* if_right = new (GetAllocator()) HIf(param2); - right->AddInstruction(cls2); - right->AddInstruction(new_inst2); - right->AddInstruction(write2); - right->AddInstruction(if_right); - cls2->CopyEnvironmentFrom(cls1->GetEnvironment()); - new_inst2->CopyEnvironmentFrom(cls2->GetEnvironment()); - - right_end->AddInstruction(new (GetAllocator()) HGoto()); - - HInstruction* early_exit_right_read = - MakeIFieldGet(new_inst2, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* early_exit_right_return = new (GetAllocator()) HReturn(early_exit_right_read); - right_exit_early->AddInstruction(early_exit_right_read); - right_exit_early->AddInstruction(early_exit_right_return); - - HPhi* bottom_phi = MakePhi({new_inst1, new_inst2}); - HInstruction* read_bottom = MakeIFieldGet(bottom_phi, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddPhi(bottom_phi); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_REMOVED(early_exit_left_read); - EXPECT_INS_REMOVED(early_exit_right_read); - EXPECT_INS_RETAINED(bottom_phi); - EXPECT_INS_RETAINED(read_bottom); - EXPECT_INS_EQ(early_exit_left_return->InputAt(0), c33); - EXPECT_INS_EQ(early_exit_right_return->InputAt(0), c44); - // These assert there is only 1 HNewInstance in the given blocks. - HNewInstance* moved_ni1 = - FindSingleInstruction<HNewInstance>(graph_, left_end->GetSinglePredecessor()); - HNewInstance* moved_ni2 = - FindSingleInstruction<HNewInstance>(graph_, right_end->GetSinglePredecessor()); - ASSERT_NE(moved_ni1, nullptr); - ASSERT_NE(moved_ni2, nullptr); - EXPECT_INS_EQ(bottom_phi->InputAt(0), moved_ni1); - EXPECT_INS_EQ(bottom_phi->InputAt(1), moved_ni2); -} - -// // ENTRY -// obj = new Obj(); -// if (param1) { -// obj.field = 3; -// noescape(); -// } else { -// obj.field = 2; -// noescape(); -// } -// int abc; -// if (parameter_value) { -// // LEFT -// abc = 4; -// escape(obj); -// } else { -// // RIGHT -// // ELIMINATE -// noescape(); -// abc = obj.field + 4; -// } -// abc = phi -// EXIT -// predicated-ELIMINATE -// return obj.field + abc -TEST_F(LoadStoreEliminationTest, PredicatedLoad4) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "start_left"}, - {"entry", "start_right"}, - {"start_left", "mid"}, - {"start_right", "mid"}, - {"mid", "left"}, - {"mid", "right"}, - {"left", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); - GET_BLOCK(mid); - GET_BLOCK(start_left); - GET_BLOCK(start_right); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {left, right}); - EnsurePredecessorOrder(mid, {start_left, start_right}); - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* bool_value2 = MakeParam(DataType::Type::kBool); - HInstruction* null_const = graph_->GetNullConstant(); - HInstruction* c2 = graph_->GetIntConstant(2); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c4 = graph_->GetIntConstant(4); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_start_left = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* call_start_left = MakeInvoke(DataType::Type::kVoid, { }); - start_left->AddInstruction(write_start_left); - start_left->AddInstruction(call_start_left); - start_left->AddInstruction(new (GetAllocator()) HGoto()); - call_start_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_start_right = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* call_start_right = MakeInvoke(DataType::Type::kVoid, { }); - start_right->AddInstruction(write_start_right); - start_right->AddInstruction(call_start_right); - start_right->AddInstruction(new (GetAllocator()) HGoto()); - call_start_right->CopyEnvironmentFrom(cls->GetEnvironment()); - - mid->AddInstruction(new (GetAllocator()) HIf(bool_value2)); - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_right = MakeInvoke(DataType::Type::kVoid, { }); - HInstruction* read_right = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* add_right = new (GetAllocator()) HAdd(DataType::Type::kInt32, read_right, c4); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(call_right); - right->AddInstruction(read_right); - right->AddInstruction(add_right); - right->AddInstruction(goto_right); - call_right->CopyEnvironmentFrom(cls->GetEnvironment()); - - HPhi* phi_bottom = MakePhi({c4, add_right}); - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* add_bottom = - new (GetAllocator()) HAdd(DataType::Type::kInt32, read_bottom, phi_bottom); - HInstruction* return_exit = new (GetAllocator()) HReturn(add_bottom); - breturn->AddPhi(phi_bottom); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(add_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_REMOVED(read_bottom); - EXPECT_INS_REMOVED(read_right); - EXPECT_INS_RETAINED(call_left); - EXPECT_INS_RETAINED(call_right); - EXPECT_INS_RETAINED(call_start_left); - EXPECT_INS_RETAINED(call_start_right); - std::vector<HPhi*> merges; - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - std::tie(merges) = FindAllInstructions<HPhi>(graph_, breturn); - ASSERT_EQ(merges.size(), 3u); - HPhi* merge_value_return = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p != phi_bottom && p->GetType() == DataType::Type::kInt32; - }); - HPhi* merge_alloc = FindOrNull(merges.begin(), merges.end(), [](HPhi* p) { - return p->GetType() == DataType::Type::kReference; - }); - ASSERT_NE(merge_alloc, nullptr); - EXPECT_TRUE(merge_alloc->InputAt(0)->IsNewInstance()) << *merge_alloc; - EXPECT_EQ(merge_alloc->InputAt(0)->InputAt(0), cls) << *merge_alloc << " cls? " << *cls; - EXPECT_EQ(merge_alloc->InputAt(1), null_const); - ASSERT_NE(pred_get, nullptr); - EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc); - EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return) << " pred-get is: " << *pred_get; - EXPECT_INS_EQ(merge_value_return->InputAt(0), graph_->GetIntConstant(0)) - << " merge val is: " << *merge_value_return; - EXPECT_INS_EQ(merge_value_return->InputAt(1), FindSingleInstruction<HPhi>(graph_, mid)) - << " merge val is: " << *merge_value_return; -} - -// Based on structure seen in `java.util.Set java.util.Collections$UnmodifiableMap.entrySet()` -// We end up having to update a PHI generated by normal LSE. -// // ENTRY -// Obj obj_init = param_obj.BAR; -// if (param1) { -// Obj other = new Obj(); -// other.foo = 42; -// if (param2) { -// return other.foo; -// } else { -// param_obj.BAR = other; -// } -// } else { } -// EXIT -// LSE Turns this into PHI[obj_init, other] -// read_bottom = param_obj.BAR; -// // won't be changed. The escape happens with .BAR set so this is in escaping cohort. -// return read_bottom.foo; -TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad4) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"left", "left_early_return"}, - {"left_early_return", "exit"}, - {"left", "left_write_escape"}, - {"left_write_escape", "breturn"}, - {"entry", "right"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(left_early_return); - GET_BLOCK(left_write_escape); - GET_BLOCK(right); -#undef GET_BLOCK - MemberOffset foo_offset = MemberOffset(32); - MemberOffset bar_offset = MemberOffset(20); - EnsurePredecessorOrder(breturn, {left_write_escape, right}); - HInstruction* c42 = graph_->GetIntConstant(42); - HInstruction* param1 = MakeParam(DataType::Type::kBool); - HInstruction* param2 = MakeParam(DataType::Type::kBool); - HInstruction* param_obj = MakeParam(DataType::Type::kReference); - - HInstruction* get_initial = MakeIFieldGet(param_obj, DataType::Type::kReference, bar_offset); - HInstruction* if_inst = new (GetAllocator()) HIf(param1); - entry->AddInstruction(get_initial); - entry->AddInstruction(if_inst); - - HInstruction* cls1 = MakeClassLoad(); - HInstruction* new_inst1 = MakeNewInstance(cls1); - HInstruction* write1 = MakeIFieldSet(new_inst1, c42, foo_offset); - HInstruction* if_left = new (GetAllocator()) HIf(param2); - left->AddInstruction(cls1); - left->AddInstruction(new_inst1); - left->AddInstruction(write1); - left->AddInstruction(if_left); - ManuallyBuildEnvFor(cls1, {}); - new_inst1->CopyEnvironmentFrom(cls1->GetEnvironment()); - - HInstruction* read_early_return = MakeIFieldGet(new_inst1, DataType::Type::kInt32, foo_offset); - HInstruction* return_early = new (GetAllocator()) HReturn(read_early_return); - left_early_return->AddInstruction(read_early_return); - left_early_return->AddInstruction(return_early); - - HInstruction* write_escape = MakeIFieldSet(param_obj, new_inst1, bar_offset); - HInstruction* write_goto = new (GetAllocator()) HGoto(); - left_write_escape->AddInstruction(write_escape); - left_write_escape->AddInstruction(write_goto); - - right->AddInstruction(new (GetAllocator()) HGoto()); - - HInstruction* read_bottom = MakeIFieldGet(param_obj, DataType::Type::kReference, bar_offset); - HInstruction* final_read = MakeIFieldGet(read_bottom, DataType::Type::kInt32, foo_offset); - HInstruction* return_exit = new (GetAllocator()) HReturn(final_read); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(final_read); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_REMOVED(read_bottom); - EXPECT_INS_REMOVED(read_early_return); - EXPECT_INS_EQ(return_early->InputAt(0), c42); - EXPECT_INS_RETAINED(final_read); - HNewInstance* moved_ni = - FindSingleInstruction<HNewInstance>(graph_, left_write_escape->GetSinglePredecessor()); - EXPECT_TRUE(final_read->InputAt(0)->IsPhi()); - EXPECT_INS_EQ(final_read->InputAt(0)->InputAt(0), moved_ni); - EXPECT_INS_EQ(final_read->InputAt(0)->InputAt(1), get_initial); -} - -// // ENTRY -// obj = new Obj(); -// obj.field = 3; -// if (parameter_value) { -// // LEFT -// escape(obj); -// } else { -// // RIGHT -// // ELIMINATE -// obj.field = 2; -// } -// // MERGE -// if (second_param) { -// // NON_ESCAPE -// obj.field = 1; -// noescape(); -// } -// EXIT -// predicated-ELIMINATE -// return obj.field -TEST_F(LoadStoreEliminationTest, PredicatedLoad2) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "merge"}, - {"right", "merge"}, - {"merge", "non_escape"}, - {"non_escape", "breturn"}, - {"merge", "crit_break"}, - {"crit_break", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); - GET_BLOCK(merge); - GET_BLOCK(non_escape); - GET_BLOCK(crit_break); -#undef GET_BLOCK - EnsurePredecessorOrder(merge, {left, right}); - EnsurePredecessorOrder(breturn, {crit_break, non_escape}); - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* bool_value2 = MakeParam(DataType::Type::kBool); - HInstruction* null_const = graph_->GetNullConstant(); - HInstruction* c1 = graph_->GetIntConstant(1); - HInstruction* c2 = graph_->GetIntConstant(2); - HInstruction* c3 = graph_->GetIntConstant(3); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_entry); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* merge_if = new (GetAllocator()) HIf(bool_value2); - merge->AddInstruction(merge_if); - - crit_break->AddInstruction(new (GetAllocator()) HGoto()); - - HInstruction* write_non_escape = MakeIFieldSet(new_inst, c1, MemberOffset(32)); - HInstruction* non_escape_call = MakeInvoke(DataType::Type::kVoid, {}); - HInstruction* non_escape_goto = new (GetAllocator()) HGoto(); - non_escape->AddInstruction(write_non_escape); - non_escape->AddInstruction(non_escape_call); - non_escape->AddInstruction(non_escape_goto); - non_escape_call->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_REMOVED(read_bottom); - EXPECT_INS_REMOVED(write_right); - EXPECT_INS_RETAINED(call_left); - std::vector<HPhi*> merges; - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - std::tie(merges) = FindAllInstructions<HPhi>(graph_); - ASSERT_EQ(merges.size(), 3u); - HPhi* merge_value_return = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kInt32 && p->GetBlock() == breturn; - }); - HPhi* merge_value_merge = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kInt32 && p->GetBlock() != breturn; - }); - HPhi* merge_alloc = FindOrNull(merges.begin(), merges.end(), [](HPhi* p) { - return p->GetType() == DataType::Type::kReference; - }); - ASSERT_NE(merge_alloc, nullptr); - EXPECT_TRUE(merge_alloc->InputAt(0)->IsNewInstance()) << *merge_alloc; - EXPECT_INS_EQ(merge_alloc->InputAt(0)->InputAt(0), cls) - << " phi is: " << merge_alloc->DumpWithArgs(); - EXPECT_INS_EQ(merge_alloc->InputAt(1), null_const); - ASSERT_NE(pred_get, nullptr); - EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc); - EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return) - << "get is " << pred_get->DumpWithArgs(); - EXPECT_INS_EQ(merge_value_return->InputAt(0), merge_value_merge) - << " phi is: " << *merge_value_return; - EXPECT_INS_EQ(merge_value_return->InputAt(1), c1) - << " phi is: " << merge_value_return->DumpWithArgs(); - EXPECT_INS_EQ(merge_value_merge->InputAt(0), graph_->GetIntConstant(0)) - << " phi is: " << *merge_value_merge; - EXPECT_INS_EQ(merge_value_merge->InputAt(1), c2) - << " phi is: " << merge_value_merge->DumpWithArgs(); -} - -// // ENTRY -// obj = new Obj(); -// obj.field = 3; -// if (parameter_value) { -// // LEFT -// escape(obj); -// } else { -// // RIGHT -// // ELIMINATE -// obj.field = 2; -// } -// // MERGE -// if (second_param) { -// // NON_ESCAPE -// obj.field = 1; -// } -// noescape(); -// EXIT -// predicated-ELIMINATE -// return obj.field -TEST_F(LoadStoreEliminationTest, PredicatedLoad3) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "merge"}, - {"right", "merge"}, - {"merge", "non_escape"}, - {"non_escape", "breturn"}, - {"merge", "crit_break"}, - {"crit_break", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); - GET_BLOCK(merge); - GET_BLOCK(crit_break); - GET_BLOCK(non_escape); -#undef GET_BLOCK - EnsurePredecessorOrder(merge, {left, right}); - EnsurePredecessorOrder(breturn, {crit_break, non_escape}); - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* bool_value2 = MakeParam(DataType::Type::kBool); - HInstruction* null_const = graph_->GetNullConstant(); - HInstruction* c1 = graph_->GetIntConstant(1); - HInstruction* c2 = graph_->GetIntConstant(2); - HInstruction* c3 = graph_->GetIntConstant(3); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_entry); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_right = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* merge_if = new (GetAllocator()) HIf(bool_value2); - merge->AddInstruction(merge_if); - - HInstruction* write_non_escape = MakeIFieldSet(new_inst, c1, MemberOffset(32)); - HInstruction* non_escape_goto = new (GetAllocator()) HGoto(); - non_escape->AddInstruction(write_non_escape); - non_escape->AddInstruction(non_escape_goto); - - crit_break->AddInstruction(new (GetAllocator()) HGoto()); - - HInstruction* bottom_call = MakeInvoke(DataType::Type::kVoid, {}); - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddInstruction(bottom_call); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - bottom_call->CopyEnvironmentFrom(cls->GetEnvironment()); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_REMOVED(read_bottom); - EXPECT_INS_REMOVED(write_right); - EXPECT_INS_RETAINED(call_left); - std::vector<HPhi*> merges; - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - std::tie(merges) = FindAllInstructions<HPhi>(graph_); - ASSERT_EQ(merges.size(), 3u); - HPhi* merge_value_return = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kInt32 && p->GetBlock() == breturn; - }); - HPhi* merge_value_merge = FindOrNull(merges.begin(), merges.end(), [&](HPhi* p) { - return p->GetType() == DataType::Type::kInt32 && p->GetBlock() != breturn; - }); - HPhi* merge_alloc = FindOrNull(merges.begin(), merges.end(), [](HPhi* p) { - return p->GetType() == DataType::Type::kReference; - }); - ASSERT_NE(merge_alloc, nullptr); - EXPECT_TRUE(merge_alloc->InputAt(0)->IsNewInstance()) << merge_alloc->DumpWithArgs(); - EXPECT_INS_EQ(merge_alloc->InputAt(0)->InputAt(0), cls) - << " phi is: " << merge_alloc->DumpWithArgs(); - EXPECT_INS_EQ(merge_alloc->InputAt(1), null_const); - ASSERT_NE(pred_get, nullptr); - EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc); - EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return) - << "get is " << pred_get->DumpWithArgs(); - EXPECT_INS_EQ(merge_value_return->InputAt(0), merge_value_merge) - << " phi is: " << *merge_value_return; - EXPECT_INS_EQ(merge_value_return->InputAt(1), c1) << " phi is: " << *merge_value_return; - EXPECT_INS_EQ(merge_value_merge->InputAt(0), graph_->GetIntConstant(0)) - << " phi is: " << *merge_value_merge; - EXPECT_INS_EQ(merge_value_merge->InputAt(1), c2) << " phi is: " << *merge_value_merge; -} - -// // ENTRY -// obj = new Obj(); -// if (parameter_value) { -// // LEFT -// obj.field = 3; -// escape(obj); -// } else { -// // RIGHT - Leave it as default value -// } -// EXIT -// predicated-ELIMINATE -// return obj.field -TEST_F(LoadStoreEliminationTest, PredicatedLoadDefaultValue) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {left, right}); - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* null_const = graph_->GetNullConstant(); - HInstruction* c0 = graph_->GetIntConstant(0); - HInstruction* c3 = graph_->GetIntConstant(3); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_left = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(write_left); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(goto_right); - - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_REMOVED(read_bottom); - EXPECT_INS_RETAINED(write_left); - EXPECT_INS_RETAINED(call_left); - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - HPhi* merge_alloc = FindSingleInstruction<HPhi>(graph_, breturn); - ASSERT_NE(merge_alloc, nullptr); - EXPECT_TRUE(merge_alloc->InputAt(0)->IsNewInstance()) << *merge_alloc; - EXPECT_EQ(merge_alloc->InputAt(0)->InputAt(0), cls) << *merge_alloc << " cls? " << *cls; - EXPECT_EQ(merge_alloc->InputAt(1), null_const); - ASSERT_NE(pred_get, nullptr); - EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc); - EXPECT_INS_EQ(pred_get->GetDefaultValue(), c0) << " pred-get is: " << *pred_get; -} - -// // ENTRY -// obj = new Obj(); -// // ALL should be kept -// switch (parameter_value) { -// case 1: -// // Case1 -// obj.field = 1; -// call_func(obj); -// break; -// case 2: -// // Case2 -// obj.field = 2; -// call_func(obj); -// break; -// default: -// // Case3 -// obj.field = 3; -// do { -// if (test2()) { } else { obj.field = 5; } -// } while (test()); -// break; -// } -// EXIT -// // predicated-ELIMINATE -// return obj.field -TEST_F(LoadStoreEliminationTest, PartialLoopPhis1) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "bswitch"}, - {"bswitch", "case1"}, - {"bswitch", "case2"}, - {"bswitch", "case3"}, - {"case1", "breturn"}, - {"case2", "breturn"}, - {"case3", "loop_pre_header"}, - {"loop_pre_header", "loop_header"}, - {"loop_header", "loop_body"}, - {"loop_body", "loop_if_left"}, - {"loop_body", "loop_if_right"}, - {"loop_if_left", "loop_merge"}, - {"loop_if_right", "loop_merge"}, - {"loop_merge", "loop_end"}, - {"loop_end", "loop_header"}, - {"loop_end", "critical_break"}, - {"critical_break", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(bswitch); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(case1); - GET_BLOCK(case2); - GET_BLOCK(case3); - - GET_BLOCK(loop_pre_header); - GET_BLOCK(loop_header); - GET_BLOCK(loop_body); - GET_BLOCK(loop_if_left); - GET_BLOCK(loop_if_right); - GET_BLOCK(loop_merge); - GET_BLOCK(loop_end); - GET_BLOCK(critical_break); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {case1, case2, critical_break}); - EnsurePredecessorOrder(loop_header, {loop_pre_header, loop_end}); - EnsurePredecessorOrder(loop_merge, {loop_if_left, loop_if_right}); - CHECK_SUBROUTINE_FAILURE(); - HInstruction* switch_val = MakeParam(DataType::Type::kInt32); - HInstruction* c1 = graph_->GetIntConstant(1); - HInstruction* c2 = graph_->GetIntConstant(2); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c5 = graph_->GetIntConstant(5); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* entry_goto = new (GetAllocator()) HGoto(); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(entry_goto); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, switch_val); - bswitch->AddInstruction(switch_inst); - - HInstruction* write_c1 = MakeIFieldSet(new_inst, c1, MemberOffset(32)); - HInstruction* call_c1 = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_c1 = new (GetAllocator()) HGoto(); - case1->AddInstruction(write_c1); - case1->AddInstruction(call_c1); - case1->AddInstruction(goto_c1); - call_c1->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_c2 = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* call_c2 = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_c2 = new (GetAllocator()) HGoto(); - case2->AddInstruction(write_c2); - case2->AddInstruction(call_c2); - case2->AddInstruction(goto_c2); - call_c2->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_c3 = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* goto_c3 = new (GetAllocator()) HGoto(); - case3->AddInstruction(write_c3); - case3->AddInstruction(goto_c3); - - HInstruction* goto_preheader = new (GetAllocator()) HGoto(); - loop_pre_header->AddInstruction(goto_preheader); - - HInstruction* suspend_check_header = new (GetAllocator()) HSuspendCheck(); - HInstruction* goto_header = new (GetAllocator()) HGoto(); - loop_header->AddInstruction(suspend_check_header); - loop_header->AddInstruction(goto_header); - suspend_check_header->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_loop_body = MakeInvoke(DataType::Type::kBool, {}); - HInstruction* if_loop_body = new (GetAllocator()) HIf(call_loop_body); - loop_body->AddInstruction(call_loop_body); - loop_body->AddInstruction(if_loop_body); - call_loop_body->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* goto_loop_left = new (GetAllocator()) HGoto(); - loop_if_left->AddInstruction(goto_loop_left); - - HInstruction* write_loop_right = MakeIFieldSet(new_inst, c5, MemberOffset(32)); - HInstruction* goto_loop_right = new (GetAllocator()) HGoto(); - loop_if_right->AddInstruction(write_loop_right); - loop_if_right->AddInstruction(goto_loop_right); - - HInstruction* goto_loop_merge = new (GetAllocator()) HGoto(); - loop_merge->AddInstruction(goto_loop_merge); - - HInstruction* call_end = MakeInvoke(DataType::Type::kBool, {}); - HInstruction* if_end = new (GetAllocator()) HIf(call_end); - loop_end->AddInstruction(call_end); - loop_end->AddInstruction(if_end); - call_end->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* goto_critical_break = new (GetAllocator()) HGoto(); - critical_break->AddInstruction(goto_critical_break); - - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - EXPECT_INS_REMOVED(read_bottom) << *read_bottom; - ASSERT_TRUE(pred_get != nullptr); - HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi(); - ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs(); - EXPECT_INS_EQ(inst_return_phi->InputAt(0), - FindSingleInstruction<HNewInstance>(graph_, case1->GetSinglePredecessor())); - EXPECT_INS_EQ(inst_return_phi->InputAt(1), - FindSingleInstruction<HNewInstance>(graph_, case2->GetSinglePredecessor())); - EXPECT_INS_EQ(inst_return_phi->InputAt(2), graph_->GetNullConstant()); - HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi(); - ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs(); - EXPECT_INS_EQ(inst_value_phi->InputAt(0), graph_->GetIntConstant(0)); - EXPECT_INS_EQ(inst_value_phi->InputAt(1), graph_->GetIntConstant(0)); - HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge); - ASSERT_TRUE(loop_merge_phi != nullptr); - HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header); - ASSERT_TRUE(loop_header_phi != nullptr); - EXPECT_INS_EQ(loop_header_phi->InputAt(0), c3); - EXPECT_INS_EQ(loop_header_phi->InputAt(1), loop_merge_phi); - EXPECT_INS_EQ(loop_merge_phi->InputAt(0), loop_header_phi); - EXPECT_INS_EQ(loop_merge_phi->InputAt(1), c5); - EXPECT_INS_EQ(inst_value_phi->InputAt(2), loop_merge_phi); - EXPECT_INS_RETAINED(write_c1) << *write_c1; - EXPECT_INS_RETAINED(write_c2) << *write_c2; - EXPECT_INS_REMOVED(write_c3) << *write_c3; - EXPECT_INS_REMOVED(write_loop_right) << *write_loop_right; -} - -// // ENTRY -// obj = new Obj(); -// switch (parameter_value) { -// case 1: -// // Case1 -// obj.field = 1; -// call_func(obj); -// break; -// case 2: -// // Case2 -// obj.field = 2; -// call_func(obj); -// break; -// default: -// // Case3 -// obj.field = 3; -// while (!test()) { -// if (test2()) { } else { obj.field = 5; } -// } -// break; -// } -// EXIT -// // predicated-ELIMINATE -// return obj.field -TEST_F(LoadStoreEliminationTest, PartialLoopPhis2) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "bswitch"}, - {"bswitch", "case1"}, - {"bswitch", "case2"}, - {"bswitch", "case3"}, - {"case1", "breturn"}, - {"case2", "breturn"}, - {"case3", "loop_pre_header"}, - - {"loop_pre_header", "loop_header"}, - {"loop_header", "critical_break"}, - {"loop_header", "loop_body"}, - {"loop_body", "loop_if_left"}, - {"loop_body", "loop_if_right"}, - {"loop_if_left", "loop_merge"}, - {"loop_if_right", "loop_merge"}, - {"loop_merge", "loop_header"}, - - {"critical_break", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(bswitch); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(case1); - GET_BLOCK(case2); - GET_BLOCK(case3); - - GET_BLOCK(loop_pre_header); - GET_BLOCK(loop_header); - GET_BLOCK(loop_body); - GET_BLOCK(loop_if_left); - GET_BLOCK(loop_if_right); - GET_BLOCK(loop_merge); - GET_BLOCK(critical_break); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {case1, case2, critical_break}); - EnsurePredecessorOrder(loop_header, {loop_pre_header, loop_merge}); - EnsurePredecessorOrder(loop_merge, {loop_if_left, loop_if_right}); - CHECK_SUBROUTINE_FAILURE(); - HInstruction* switch_val = MakeParam(DataType::Type::kInt32); - HInstruction* c1 = graph_->GetIntConstant(1); - HInstruction* c2 = graph_->GetIntConstant(2); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c5 = graph_->GetIntConstant(5); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* entry_goto = new (GetAllocator()) HGoto(); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(entry_goto); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, switch_val); - bswitch->AddInstruction(switch_inst); - - HInstruction* write_c1 = MakeIFieldSet(new_inst, c1, MemberOffset(32)); - HInstruction* call_c1 = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_c1 = new (GetAllocator()) HGoto(); - case1->AddInstruction(write_c1); - case1->AddInstruction(call_c1); - case1->AddInstruction(goto_c1); - call_c1->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_c2 = MakeIFieldSet(new_inst, c2, MemberOffset(32)); - HInstruction* call_c2 = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_c2 = new (GetAllocator()) HGoto(); - case2->AddInstruction(write_c2); - case2->AddInstruction(call_c2); - case2->AddInstruction(goto_c2); - call_c2->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_c3 = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* goto_c3 = new (GetAllocator()) HGoto(); - case3->AddInstruction(write_c3); - case3->AddInstruction(goto_c3); - - HInstruction* goto_preheader = new (GetAllocator()) HGoto(); - loop_pre_header->AddInstruction(goto_preheader); - - HInstruction* suspend_check_header = new (GetAllocator()) HSuspendCheck(); - HInstruction* call_header = MakeInvoke(DataType::Type::kBool, {}); - HInstruction* if_header = new (GetAllocator()) HIf(call_header); - loop_header->AddInstruction(suspend_check_header); - loop_header->AddInstruction(call_header); - loop_header->AddInstruction(if_header); - call_header->CopyEnvironmentFrom(cls->GetEnvironment()); - suspend_check_header->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_loop_body = MakeInvoke(DataType::Type::kBool, {}); - HInstruction* if_loop_body = new (GetAllocator()) HIf(call_loop_body); - loop_body->AddInstruction(call_loop_body); - loop_body->AddInstruction(if_loop_body); - call_loop_body->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* goto_loop_left = new (GetAllocator()) HGoto(); - loop_if_left->AddInstruction(goto_loop_left); - - HInstruction* write_loop_right = MakeIFieldSet(new_inst, c5, MemberOffset(32)); - HInstruction* goto_loop_right = new (GetAllocator()) HGoto(); - loop_if_right->AddInstruction(write_loop_right); - loop_if_right->AddInstruction(goto_loop_right); - - HInstruction* goto_loop_merge = new (GetAllocator()) HGoto(); - loop_merge->AddInstruction(goto_loop_merge); - - HInstruction* goto_critical_break = new (GetAllocator()) HGoto(); - critical_break->AddInstruction(goto_critical_break); - - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - EXPECT_INS_REMOVED(read_bottom) << *read_bottom; - ASSERT_TRUE(pred_get != nullptr); - HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi(); - ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs(); - EXPECT_INS_EQ(inst_return_phi->InputAt(0), - FindSingleInstruction<HNewInstance>(graph_, case1->GetSinglePredecessor())); - EXPECT_INS_EQ(inst_return_phi->InputAt(1), - FindSingleInstruction<HNewInstance>(graph_, case2->GetSinglePredecessor())); - EXPECT_INS_EQ(inst_return_phi->InputAt(2), graph_->GetNullConstant()); - HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi(); - ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs(); - EXPECT_INS_EQ(inst_value_phi->InputAt(0), graph_->GetIntConstant(0)); - EXPECT_INS_EQ(inst_value_phi->InputAt(1), graph_->GetIntConstant(0)); - HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge); - ASSERT_TRUE(loop_merge_phi != nullptr); - HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header); - ASSERT_TRUE(loop_header_phi != nullptr); - EXPECT_INS_EQ(loop_header_phi->InputAt(0), c3); - EXPECT_INS_EQ(loop_header_phi->InputAt(1), loop_merge_phi); - EXPECT_INS_EQ(loop_merge_phi->InputAt(0), loop_header_phi); - EXPECT_INS_EQ(loop_merge_phi->InputAt(1), c5); - EXPECT_INS_EQ(inst_value_phi->InputAt(2), loop_header_phi); - EXPECT_INS_RETAINED(write_c1) << *write_c1; - EXPECT_INS_RETAINED(write_c2) << *write_c2; - EXPECT_INS_REMOVED(write_c3) << *write_c3; - EXPECT_INS_REMOVED(write_loop_right) << *write_loop_right; -} - -// // ENTRY -// obj = new Obj(); -// obj.field = 3; -// while (!test()) { -// if (test2()) { } else { obj.field = 5; } -// } -// if (parameter_value) { -// escape(obj); -// } -// EXIT -// return obj.field -TEST_F(LoadStoreEliminationTest, PartialLoopPhis3) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "loop_pre_header"}, - - {"loop_pre_header", "loop_header"}, - {"loop_header", "escape_check"}, - {"loop_header", "loop_body"}, - {"loop_body", "loop_if_left"}, - {"loop_body", "loop_if_right"}, - {"loop_if_left", "loop_merge"}, - {"loop_if_right", "loop_merge"}, - {"loop_merge", "loop_header"}, - - {"escape_check", "escape"}, - {"escape_check", "no_escape"}, - {"no_escape", "breturn"}, - {"escape", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(no_escape); - GET_BLOCK(escape); - GET_BLOCK(escape_check); - - GET_BLOCK(loop_pre_header); - GET_BLOCK(loop_header); - GET_BLOCK(loop_body); - GET_BLOCK(loop_if_left); - GET_BLOCK(loop_if_right); - GET_BLOCK(loop_merge); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {no_escape, escape}); - EnsurePredecessorOrder(loop_header, {loop_pre_header, loop_merge}); - EnsurePredecessorOrder(loop_merge, {loop_if_left, loop_if_right}); - CHECK_SUBROUTINE_FAILURE(); - HInstruction* bool_val = MakeParam(DataType::Type::kBool); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c5 = graph_->GetIntConstant(5); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* entry_goto = new (GetAllocator()) HGoto(); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(entry_goto); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_pre_header = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* goto_preheader = new (GetAllocator()) HGoto(); - loop_pre_header->AddInstruction(write_pre_header); - loop_pre_header->AddInstruction(goto_preheader); - - HInstruction* suspend_check_header = new (GetAllocator()) HSuspendCheck(); - HInstruction* call_header = MakeInvoke(DataType::Type::kBool, {}); - HInstruction* if_header = new (GetAllocator()) HIf(call_header); - loop_header->AddInstruction(suspend_check_header); - loop_header->AddInstruction(call_header); - loop_header->AddInstruction(if_header); - call_header->CopyEnvironmentFrom(cls->GetEnvironment()); - suspend_check_header->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_loop_body = MakeInvoke(DataType::Type::kBool, {}); - HInstruction* if_loop_body = new (GetAllocator()) HIf(call_loop_body); - loop_body->AddInstruction(call_loop_body); - loop_body->AddInstruction(if_loop_body); - call_loop_body->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* goto_loop_left = new (GetAllocator()) HGoto(); - loop_if_left->AddInstruction(goto_loop_left); - - HInstruction* write_loop_right = MakeIFieldSet(new_inst, c5, MemberOffset(32)); - HInstruction* goto_loop_right = new (GetAllocator()) HGoto(); - loop_if_right->AddInstruction(write_loop_right); - loop_if_right->AddInstruction(goto_loop_right); - - HInstruction* goto_loop_merge = new (GetAllocator()) HGoto(); - loop_merge->AddInstruction(goto_loop_merge); - - HInstruction* if_esc_check = new (GetAllocator()) HIf(bool_val); - escape_check->AddInstruction(if_esc_check); - - HInstruction* call_escape = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_escape = new (GetAllocator()) HGoto(); - escape->AddInstruction(call_escape); - escape->AddInstruction(goto_escape); - call_escape->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* goto_no_escape = new (GetAllocator()) HGoto(); - no_escape->AddInstruction(goto_no_escape); - - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - EXPECT_INS_REMOVED(read_bottom) << *read_bottom; - ASSERT_TRUE(pred_get != nullptr); - HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi(); - ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs(); - EXPECT_INS_EQ(inst_return_phi->InputAt(0), graph_->GetNullConstant()); - EXPECT_INS_EQ(inst_return_phi->InputAt(1), - FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor())); - HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi(); - ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs(); - HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header); - HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge); - EXPECT_INS_EQ(inst_value_phi->InputAt(0), loop_header_phi); - EXPECT_INS_EQ(inst_value_phi->InputAt(1), graph_->GetIntConstant(0)); - EXPECT_INS_EQ(loop_header_phi->InputAt(0), c3); - EXPECT_INS_EQ(loop_header_phi->InputAt(1), loop_merge_phi); - EXPECT_INS_EQ(loop_merge_phi->InputAt(0), loop_header_phi); - EXPECT_INS_EQ(loop_merge_phi->InputAt(1), c5); - HInstanceFieldSet* mat_set = - FindSingleInstruction<HInstanceFieldSet>(graph_, escape->GetSinglePredecessor()); - ASSERT_NE(mat_set, nullptr); - EXPECT_INS_EQ(mat_set->InputAt(1), loop_header_phi); - EXPECT_INS_REMOVED(write_loop_right) << *write_loop_right; - EXPECT_INS_REMOVED(write_pre_header) << *write_pre_header; -} - -// // ENTRY -// obj = new Obj(); -// if (parameter_value) { -// escape(obj); -// } -// obj.field = 3; -// while (!test()) { -// if (test2()) { } else { obj.field = 5; } -// } -// EXIT -// return obj.field -TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "escape_check"}, - {"escape_check", "escape"}, - {"escape_check", "no_escape"}, - {"no_escape", "loop_pre_header"}, - {"escape", "loop_pre_header"}, - - {"loop_pre_header", "loop_header"}, - {"loop_header", "breturn"}, - {"loop_header", "loop_body"}, - {"loop_body", "loop_if_left"}, - {"loop_body", "loop_if_right"}, - {"loop_if_left", "loop_merge"}, - {"loop_if_right", "loop_merge"}, - {"loop_merge", "loop_header"}, - - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(no_escape); - GET_BLOCK(escape); - GET_BLOCK(escape_check); - - GET_BLOCK(loop_pre_header); - GET_BLOCK(loop_header); - GET_BLOCK(loop_body); - GET_BLOCK(loop_if_left); - GET_BLOCK(loop_if_right); - GET_BLOCK(loop_merge); -#undef GET_BLOCK - EnsurePredecessorOrder(loop_pre_header, {no_escape, escape}); - EnsurePredecessorOrder(loop_header, {loop_pre_header, loop_merge}); - EnsurePredecessorOrder(loop_merge, {loop_if_left, loop_if_right}); - CHECK_SUBROUTINE_FAILURE(); - HInstruction* bool_val = MakeParam(DataType::Type::kBool); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c5 = graph_->GetIntConstant(5); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* entry_goto = new (GetAllocator()) HGoto(); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(entry_goto); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* if_esc_check = new (GetAllocator()) HIf(bool_val); - escape_check->AddInstruction(if_esc_check); - - HInstruction* call_escape = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_escape = new (GetAllocator()) HGoto(); - escape->AddInstruction(call_escape); - escape->AddInstruction(goto_escape); - call_escape->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* goto_no_escape = new (GetAllocator()) HGoto(); - no_escape->AddInstruction(goto_no_escape); - - HInstruction* write_pre_header = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* goto_preheader = new (GetAllocator()) HGoto(); - loop_pre_header->AddInstruction(write_pre_header); - loop_pre_header->AddInstruction(goto_preheader); - - HInstruction* suspend_check_header = new (GetAllocator()) HSuspendCheck(); - HInstruction* call_header = MakeInvoke(DataType::Type::kBool, {}); - HInstruction* if_header = new (GetAllocator()) HIf(call_header); - loop_header->AddInstruction(suspend_check_header); - loop_header->AddInstruction(call_header); - loop_header->AddInstruction(if_header); - call_header->CopyEnvironmentFrom(cls->GetEnvironment()); - suspend_check_header->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_loop_body = MakeInvoke(DataType::Type::kBool, {}); - HInstruction* if_loop_body = new (GetAllocator()) HIf(call_loop_body); - loop_body->AddInstruction(call_loop_body); - loop_body->AddInstruction(if_loop_body); - call_loop_body->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* goto_loop_left = new (GetAllocator()) HGoto(); - loop_if_left->AddInstruction(goto_loop_left); - - HInstruction* write_loop_right = MakeIFieldSet(new_inst, c5, MemberOffset(32)); - HInstruction* goto_loop_right = new (GetAllocator()) HGoto(); - loop_if_right->AddInstruction(write_loop_right); - loop_if_right->AddInstruction(goto_loop_right); - - HInstruction* goto_loop_merge = new (GetAllocator()) HGoto(); - loop_merge->AddInstruction(goto_loop_merge); - - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - EXPECT_INS_REMOVED(read_bottom) << *read_bottom; - ASSERT_TRUE(pred_get != nullptr); - HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi(); - ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs(); - EXPECT_INS_EQ(inst_return_phi->InputAt(0), graph_->GetNullConstant()); - EXPECT_INS_EQ(inst_return_phi->InputAt(1), - FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor())); - HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi(); - ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs(); - HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header); - HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge); - EXPECT_INS_EQ(inst_value_phi, loop_header_phi); - EXPECT_INS_EQ(loop_header_phi->InputAt(0), c3); - EXPECT_INS_EQ(loop_header_phi->InputAt(1), loop_merge_phi); - EXPECT_INS_EQ(loop_merge_phi->InputAt(0), loop_header_phi); - EXPECT_INS_EQ(loop_merge_phi->InputAt(1), c5); - EXPECT_INS_RETAINED(write_loop_right) << *write_loop_right; - EXPECT_TRUE(write_loop_right->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_loop_right; - EXPECT_INS_RETAINED(write_pre_header) << *write_pre_header; - EXPECT_TRUE(write_pre_header->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_pre_header; -} - -// // ENTRY -// obj = new Obj(); -// obj.field = 3; -// while (!test()) { -// if (test2()) { } else { obj.field += 5; } -// } -// if (parameter_value) { -// escape(obj); -// } -// EXIT -// return obj.field -TEST_F(LoadStoreEliminationTest, PartialLoopPhis5) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "loop_pre_header"}, - {"loop_pre_header", "loop_header"}, - {"loop_header", "escape_check"}, - {"loop_header", "loop_body"}, - {"loop_body", "loop_if_left"}, - {"loop_body", "loop_if_right"}, - {"loop_if_left", "loop_merge"}, - {"loop_if_right", "loop_merge"}, - {"loop_merge", "loop_header"}, - {"escape_check", "escape"}, - {"escape_check", "no_escape"}, - {"no_escape", "breturn"}, - {"escape", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(no_escape); - GET_BLOCK(escape); - GET_BLOCK(escape_check); - - GET_BLOCK(loop_pre_header); - GET_BLOCK(loop_header); - GET_BLOCK(loop_body); - GET_BLOCK(loop_if_left); - GET_BLOCK(loop_if_right); - GET_BLOCK(loop_merge); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {no_escape, escape}); - EnsurePredecessorOrder(loop_header, {loop_pre_header, loop_merge}); - EnsurePredecessorOrder(loop_merge, {loop_if_left, loop_if_right}); - CHECK_SUBROUTINE_FAILURE(); - HInstruction* bool_val = MakeParam(DataType::Type::kBool); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c5 = graph_->GetIntConstant(5); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* entry_goto = new (GetAllocator()) HGoto(); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(entry_goto); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_pre_header = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* goto_preheader = new (GetAllocator()) HGoto(); - loop_pre_header->AddInstruction(write_pre_header); - loop_pre_header->AddInstruction(goto_preheader); - - HInstruction* suspend_check_header = new (GetAllocator()) HSuspendCheck(); - HInstruction* call_header = MakeInvoke(DataType::Type::kBool, {}); - HInstruction* if_header = new (GetAllocator()) HIf(call_header); - loop_header->AddInstruction(suspend_check_header); - loop_header->AddInstruction(call_header); - loop_header->AddInstruction(if_header); - call_header->CopyEnvironmentFrom(cls->GetEnvironment()); - suspend_check_header->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_loop_body = MakeInvoke(DataType::Type::kBool, {}); - HInstruction* if_loop_body = new (GetAllocator()) HIf(call_loop_body); - loop_body->AddInstruction(call_loop_body); - loop_body->AddInstruction(if_loop_body); - call_loop_body->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* goto_loop_left = new (GetAllocator()) HGoto(); - loop_if_left->AddInstruction(goto_loop_left); - - HInstruction* read_loop_right = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* add_loop_right = - new (GetAllocator()) HAdd(DataType::Type::kInt32, read_loop_right, c5); - HInstruction* write_loop_right = MakeIFieldSet(new_inst, add_loop_right, MemberOffset(32)); - HInstruction* goto_loop_right = new (GetAllocator()) HGoto(); - loop_if_right->AddInstruction(read_loop_right); - loop_if_right->AddInstruction(add_loop_right); - loop_if_right->AddInstruction(write_loop_right); - loop_if_right->AddInstruction(goto_loop_right); - - HInstruction* goto_loop_merge = new (GetAllocator()) HGoto(); - loop_merge->AddInstruction(goto_loop_merge); - - HInstruction* if_esc_check = new (GetAllocator()) HIf(bool_val); - escape_check->AddInstruction(if_esc_check); - - HInstruction* call_escape = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_escape = new (GetAllocator()) HGoto(); - escape->AddInstruction(call_escape); - escape->AddInstruction(goto_escape); - call_escape->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* goto_no_escape = new (GetAllocator()) HGoto(); - no_escape->AddInstruction(goto_no_escape); - - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - EXPECT_INS_REMOVED(read_bottom) << *read_bottom; - ASSERT_TRUE(pred_get != nullptr); - HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi(); - ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs(); - EXPECT_INS_EQ(inst_return_phi->InputAt(0), graph_->GetNullConstant()); - EXPECT_INS_EQ(inst_return_phi->InputAt(1), - FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor())); - HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi(); - ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs(); - HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header); - HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge); - EXPECT_INS_EQ(inst_value_phi->InputAt(0), loop_header_phi); - EXPECT_INS_EQ(inst_value_phi->InputAt(1), graph_->GetIntConstant(0)); - EXPECT_INS_EQ(loop_header_phi->InputAt(0), c3); - EXPECT_INS_EQ(loop_header_phi->InputAt(1), loop_merge_phi); - EXPECT_INS_EQ(loop_merge_phi->InputAt(0), loop_header_phi); - EXPECT_INS_EQ(loop_merge_phi->InputAt(1), add_loop_right); - EXPECT_INS_EQ(add_loop_right->InputAt(0), loop_header_phi); - EXPECT_INS_EQ(add_loop_right->InputAt(1), c5); - HInstanceFieldSet* mat_set = - FindSingleInstruction<HInstanceFieldSet>(graph_, escape->GetSinglePredecessor()); - ASSERT_NE(mat_set, nullptr); - EXPECT_INS_EQ(mat_set->InputAt(1), loop_header_phi); - EXPECT_INS_REMOVED(write_loop_right) << *write_loop_right; - EXPECT_INS_REMOVED(write_pre_header) << *write_pre_header; -} - -// // ENTRY -// obj = new Obj(); -// obj.field = 3; -// if (param) { -// while (!test()) { -// if (test2()) { -// noescape(); -// } else { -// abc = obj.field; -// obj.field = abc + 5; -// noescape(); -// } -// } -// escape(obj); -// } else { -// } -// return obj.field -TEST_F(LoadStoreEliminationTest, PartialLoopPhis6) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(/*handles=*/&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "start"}, - {"start", "left"}, - {"start", "right"}, - {"left", "loop_pre_header"}, - - {"loop_pre_header", "loop_header"}, - {"loop_header", "escape"}, - {"loop_header", "loop_body"}, - {"loop_body", "loop_if_left"}, - {"loop_body", "loop_if_right"}, - {"loop_if_left", "loop_header"}, - {"loop_if_right", "loop_header"}, - - {"escape", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); - GET_BLOCK(start); - GET_BLOCK(escape); - - GET_BLOCK(loop_pre_header); - GET_BLOCK(loop_header); - GET_BLOCK(loop_body); - GET_BLOCK(loop_if_left); - GET_BLOCK(loop_if_right); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {escape, right}); - EnsurePredecessorOrder(loop_header, {loop_pre_header, loop_if_left, loop_if_right}); - CHECK_SUBROUTINE_FAILURE(); - HInstruction* bool_val = MakeParam(DataType::Type::kBool); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c5 = graph_->GetIntConstant(5); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* entry_goto = new (GetAllocator()) HGoto(); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_entry); - entry->AddInstruction(entry_goto); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - start->AddInstruction(new (GetAllocator()) HIf(bool_val)); - - HInstruction* left_goto = new (GetAllocator()) HGoto(); - left->AddInstruction(left_goto); - - HInstruction* goto_preheader = new (GetAllocator()) HGoto(); - loop_pre_header->AddInstruction(goto_preheader); - - HInstruction* suspend_check_header = new (GetAllocator()) HSuspendCheck(); - HInstruction* call_header = MakeInvoke(DataType::Type::kBool, {}); - HInstruction* if_header = new (GetAllocator()) HIf(call_header); - loop_header->AddInstruction(suspend_check_header); - loop_header->AddInstruction(call_header); - loop_header->AddInstruction(if_header); - call_header->CopyEnvironmentFrom(cls->GetEnvironment()); - suspend_check_header->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_loop_body = MakeInvoke(DataType::Type::kBool, {}); - HInstruction* if_loop_body = new (GetAllocator()) HIf(call_loop_body); - loop_body->AddInstruction(call_loop_body); - loop_body->AddInstruction(if_loop_body); - call_loop_body->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_loop_left = MakeInvoke(DataType::Type::kVoid, {}); - HInstruction* goto_loop_left = new (GetAllocator()) HGoto(); - loop_if_left->AddInstruction(call_loop_left); - loop_if_left->AddInstruction(goto_loop_left); - call_loop_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* read_loop_right = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* add_loop_right = - new (GetAllocator()) HAdd(DataType::Type::kInt32, c5, read_loop_right); - HInstruction* write_loop_right = MakeIFieldSet(new_inst, add_loop_right, MemberOffset(32)); - HInstruction* call_loop_right = MakeInvoke(DataType::Type::kVoid, {}); - HInstruction* goto_loop_right = new (GetAllocator()) HGoto(); - loop_if_right->AddInstruction(read_loop_right); - loop_if_right->AddInstruction(add_loop_right); - loop_if_right->AddInstruction(write_loop_right); - loop_if_right->AddInstruction(call_loop_right); - loop_if_right->AddInstruction(goto_loop_right); - call_loop_right->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_escape = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_escape = new (GetAllocator()) HGoto(); - escape->AddInstruction(call_escape); - escape->AddInstruction(goto_escape); - call_escape->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(goto_right); - - HInstruction* read_bottom = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); - breturn->AddInstruction(read_bottom); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - EXPECT_INS_REMOVED(read_bottom) << *read_bottom; - ASSERT_TRUE(pred_get != nullptr); - HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi(); - ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs(); - EXPECT_INS_EQ(inst_return_phi->InputAt(0), - FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor())); - EXPECT_INS_EQ(inst_return_phi->InputAt(1), graph_->GetNullConstant()); - EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(0), graph_->GetIntConstant(0)); - EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(1), c3); - HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header); - ASSERT_NE(loop_header_phi, nullptr); - EXPECT_INS_EQ(loop_header_phi->InputAt(0), c3); - EXPECT_INS_EQ(loop_header_phi->InputAt(1), loop_header_phi); - EXPECT_INS_EQ(loop_header_phi->InputAt(2), add_loop_right); - EXPECT_INS_EQ(add_loop_right->InputAt(0), c5); - EXPECT_INS_EQ(add_loop_right->InputAt(1), loop_header_phi); - HInstanceFieldSet* mat_set = - FindSingleInstruction<HInstanceFieldSet>(graph_, escape->GetSinglePredecessor()); - ASSERT_NE(mat_set, nullptr); - EXPECT_INS_EQ(mat_set->InputAt(1), loop_header_phi); - EXPECT_INS_REMOVED(write_loop_right); - EXPECT_INS_REMOVED(write_entry); - EXPECT_INS_RETAINED(call_header); - EXPECT_INS_RETAINED(call_loop_left); - EXPECT_INS_RETAINED(call_loop_right); -} - -// TODO This should really be in an Instruction simplifier Gtest but (1) that -// doesn't exist and (2) we should move this simplification to directly in the -// LSE pass since there is more information then. -// // ENTRY -// obj = new Obj(); -// obj.field = 3; -// if (param) { -// escape(obj); -// } else { -// obj.field = 10; -// } -// return obj.field; -TEST_F(LoadStoreEliminationTest, SimplifyTest) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {left, right}); - - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c10 = graph_->GetIntConstant(10); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* write_start = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_start); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_right = MakeIFieldSet(new_inst, c10, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - - HInstruction* read_end = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_end); - breturn->AddInstruction(read_end); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - // Run the code-simplifier too - PerformSimplifications(blks); - - EXPECT_INS_REMOVED(write_right); - EXPECT_INS_REMOVED(write_start); - EXPECT_INS_REMOVED(read_end); - EXPECT_INS_RETAINED(call_left); - - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - ASSERT_NE(pred_get, nullptr); - EXPECT_INS_EQ(pred_get->GetDefaultValue(), c10); -} - - -// TODO This should really be in an Instruction simplifier Gtest but (1) that -// doesn't exist and (2) we should move this simplification to directly in the -// LSE pass since there is more information then. -// -// This checks that we don't replace phis when the replacement isn't valid at -// that point (i.e. it doesn't dominate) -// // ENTRY -// obj = new Obj(); -// obj.field = 3; -// if (param) { -// escape(obj); -// } else { -// obj.field = noescape(); -// } -// return obj.field; -TEST_F(LoadStoreEliminationTest, SimplifyTest2) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left"}, - {"entry", "right"}, - {"left", "breturn"}, - {"right", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {left, right}); - - HInstruction* bool_value = MakeParam(DataType::Type::kBool); - HInstruction* c3 = graph_->GetIntConstant(3); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* write_start = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_start); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, {new_inst}); - HInstruction* goto_left = new (GetAllocator()) HGoto(); - left->AddInstruction(call_left); - left->AddInstruction(goto_left); - call_left->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_right = MakeInvoke(DataType::Type::kInt32, {}); - HInstruction* write_right = MakeIFieldSet(new_inst, call_right, MemberOffset(32)); - HInstruction* goto_right = new (GetAllocator()) HGoto(); - right->AddInstruction(call_right); - right->AddInstruction(write_right); - right->AddInstruction(goto_right); - call_right->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* read_end = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_end); - breturn->AddInstruction(read_end); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - // Run the code-simplifier too - PerformSimplifications(blks); - - EXPECT_INS_REMOVED(write_right); - EXPECT_INS_REMOVED(write_start); - EXPECT_INS_REMOVED(read_end); - EXPECT_INS_RETAINED(call_left); - EXPECT_INS_RETAINED(call_right); - EXPECT_EQ(call_right->GetBlock(), right); - - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - ASSERT_NE(pred_get, nullptr); - EXPECT_TRUE(pred_get->GetDefaultValue()->IsPhi()) << pred_get->DumpWithArgs(); - EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(0), graph_->GetIntConstant(0)) - << pred_get->DumpWithArgs(); - EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(1), call_right) << pred_get->DumpWithArgs(); -} - -// TODO This should really be in an Instruction simplifier Gtest but (1) that -// doesn't exist and (2) we should move this simplification to directly in the -// LSE pass since there is more information then. -// -// This checks that we replace phis even when there are multiple replacements as -// long as they are equal -// // ENTRY -// obj = new Obj(); -// obj.field = 3; -// switch (param) { -// case 1: -// escape(obj); -// break; -// case 2: -// obj.field = 10; -// break; -// case 3: -// obj.field = 10; -// break; -// } -// return obj.field; -TEST_F(LoadStoreEliminationTest, SimplifyTest3) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "case1"}, - {"entry", "case2"}, - {"entry", "case3"}, - {"case1", "breturn"}, - {"case2", "breturn"}, - {"case3", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(case1); - GET_BLOCK(case2); - GET_BLOCK(case3); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {case1, case2, case3}); - - HInstruction* int_val = MakeParam(DataType::Type::kInt32); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c10 = graph_->GetIntConstant(10); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* write_start = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, int_val); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_start); - entry->AddInstruction(switch_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_case1 = MakeInvoke(DataType::Type::kVoid, {new_inst}); - HInstruction* goto_case1 = new (GetAllocator()) HGoto(); - case1->AddInstruction(call_case1); - case1->AddInstruction(goto_case1); - call_case1->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_case2 = MakeIFieldSet(new_inst, c10, MemberOffset(32)); - HInstruction* goto_case2 = new (GetAllocator()) HGoto(); - case2->AddInstruction(write_case2); - case2->AddInstruction(goto_case2); - - HInstruction* write_case3 = MakeIFieldSet(new_inst, c10, MemberOffset(32)); - HInstruction* goto_case3 = new (GetAllocator()) HGoto(); - case3->AddInstruction(write_case3); - case3->AddInstruction(goto_case3); - - HInstruction* read_end = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_end); - breturn->AddInstruction(read_end); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - // Run the code-simplifier too - PerformSimplifications(blks); - - EXPECT_INS_REMOVED(write_case2); - EXPECT_INS_REMOVED(write_case3); - EXPECT_INS_REMOVED(write_start); - EXPECT_INS_REMOVED(read_end); - EXPECT_INS_RETAINED(call_case1); - - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - ASSERT_NE(pred_get, nullptr); - EXPECT_INS_EQ(pred_get->GetDefaultValue(), c10) - << pred_get->DumpWithArgs(); -} - -// TODO This should really be in an Instruction simplifier Gtest but (1) that -// doesn't exist and (2) we should move this simplification to directly in the -// LSE pass since there is more information then. -// -// This checks that we don't replace phis even when there are multiple -// replacements if they are not equal -// // ENTRY -// obj = new Obj(); -// obj.field = 3; -// switch (param) { -// case 1: -// escape(obj); -// break; -// case 2: -// obj.field = 10; -// break; -// case 3: -// obj.field = 20; -// break; -// } -// return obj.field; -TEST_F(LoadStoreEliminationTest, SimplifyTest4) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "case1"}, - {"entry", "case2"}, - {"entry", "case3"}, - {"case1", "breturn"}, - {"case2", "breturn"}, - {"case3", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(case1); - GET_BLOCK(case2); - GET_BLOCK(case3); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {case1, case2, case3}); - - HInstruction* int_val = MakeParam(DataType::Type::kInt32); - HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* c10 = graph_->GetIntConstant(10); - HInstruction* c20 = graph_->GetIntConstant(20); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* write_start = MakeIFieldSet(new_inst, c3, MemberOffset(32)); - HInstruction* switch_inst = new (GetAllocator()) HPackedSwitch(0, 2, int_val); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_start); - entry->AddInstruction(switch_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* call_case1 = MakeInvoke(DataType::Type::kVoid, {new_inst}); - HInstruction* goto_case1 = new (GetAllocator()) HGoto(); - case1->AddInstruction(call_case1); - case1->AddInstruction(goto_case1); - call_case1->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* write_case2 = MakeIFieldSet(new_inst, c10, MemberOffset(32)); - HInstruction* goto_case2 = new (GetAllocator()) HGoto(); - case2->AddInstruction(write_case2); - case2->AddInstruction(goto_case2); - - HInstruction* write_case3 = MakeIFieldSet(new_inst, c20, MemberOffset(32)); - HInstruction* goto_case3 = new (GetAllocator()) HGoto(); - case3->AddInstruction(write_case3); - case3->AddInstruction(goto_case3); - - HInstruction* read_end = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_end); - breturn->AddInstruction(read_end); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - // Run the code-simplifier too - PerformSimplifications(blks); - - EXPECT_INS_REMOVED(write_case2); - EXPECT_INS_REMOVED(write_case3); - EXPECT_INS_REMOVED(write_start); - EXPECT_INS_REMOVED(read_end); - EXPECT_INS_RETAINED(call_case1); - - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - ASSERT_NE(pred_get, nullptr); - EXPECT_TRUE(pred_get->GetDefaultValue()->IsPhi()) - << pred_get->DumpWithArgs(); - EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(0), graph_->GetIntConstant(0)); - EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(1), c10); - EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(2), c20); -} - -// Make sure that irreducible loops don't screw up Partial LSE. We can't pull -// phis through them so we need to treat them as escapes. -// TODO We should be able to do better than this? Need to do some research. -// // ENTRY -// obj = new Obj(); -// obj.foo = 11; -// if (param1) { -// } else { -// // irreducible loop here. NB the objdoesn't actually escape -// obj.foo = 33; -// if (param2) { -// goto inner; -// } else { -// while (test()) { -// if (test()) { -// obj.foo = 66; -// } else { -// } -// inner: -// } -// } -// } -// return obj.foo; -TEST_F(LoadStoreEliminationTest, PartialIrreducibleLoop) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("start", - "exit", - {{"start", "entry"}, - {"entry", "left"}, - {"entry", "right"}, - {"left", "breturn"}, - - {"right", "right_crit_break_loop"}, - {"right_crit_break_loop", "loop_header"}, - {"right", "right_crit_break_end"}, - {"right_crit_break_end", "loop_end"}, - - {"loop_header", "loop_body"}, - {"loop_body", "loop_left"}, - {"loop_body", "loop_right"}, - {"loop_left", "loop_end"}, - {"loop_right", "loop_end"}, - {"loop_end", "loop_header"}, - {"loop_header", "loop_header_crit_break"}, - {"loop_header_crit_break", "breturn"}, - - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(start); - GET_BLOCK(entry); - GET_BLOCK(exit); - GET_BLOCK(breturn); - GET_BLOCK(left); - GET_BLOCK(right); - GET_BLOCK(right_crit_break_end); - GET_BLOCK(right_crit_break_loop); - GET_BLOCK(loop_header); - GET_BLOCK(loop_header_crit_break); - GET_BLOCK(loop_body); - GET_BLOCK(loop_left); - GET_BLOCK(loop_right); - GET_BLOCK(loop_end); -#undef GET_BLOCK - EnsurePredecessorOrder(breturn, {left, loop_header_crit_break}); - HInstruction* c11 = graph_->GetIntConstant(11); - HInstruction* c33 = graph_->GetIntConstant(33); - HInstruction* c66 = graph_->GetIntConstant(66); - HInstruction* param1 = MakeParam(DataType::Type::kBool); - HInstruction* param2 = MakeParam(DataType::Type::kBool); - - HInstruction* suspend = new (GetAllocator()) HSuspendCheck(); - HInstruction* start_goto = new (GetAllocator()) HGoto(); - start->AddInstruction(suspend); - start->AddInstruction(start_goto); - ManuallyBuildEnvFor(suspend, {}); - - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* write_start = MakeIFieldSet(new_inst, c11, MemberOffset(32)); - HInstruction* if_inst = new (GetAllocator()) HIf(param1); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_start); - entry->AddInstruction(if_inst); - ManuallyBuildEnvFor(cls, {}); - new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); - - left->AddInstruction(new (GetAllocator()) HGoto()); - - right->AddInstruction(MakeIFieldSet(new_inst, c33, MemberOffset(32))); - right->AddInstruction(new (GetAllocator()) HIf(param2)); - - right_crit_break_end->AddInstruction(new (GetAllocator()) HGoto()); - right_crit_break_loop->AddInstruction(new (GetAllocator()) HGoto()); - - HInstruction* header_suspend = new (GetAllocator()) HSuspendCheck(); - HInstruction* header_invoke = MakeInvoke(DataType::Type::kBool, {}); - HInstruction* header_if = new (GetAllocator()) HIf(header_invoke); - loop_header->AddInstruction(header_suspend); - loop_header->AddInstruction(header_invoke); - loop_header->AddInstruction(header_if); - header_suspend->CopyEnvironmentFrom(cls->GetEnvironment()); - header_invoke->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* body_invoke = MakeInvoke(DataType::Type::kBool, {}); - HInstruction* body_if = new (GetAllocator()) HIf(body_invoke); - loop_body->AddInstruction(body_invoke); - loop_body->AddInstruction(body_if); - body_invoke->CopyEnvironmentFrom(cls->GetEnvironment()); - - HInstruction* left_set = MakeIFieldSet(new_inst, c66, MemberOffset(32)); - HInstruction* left_goto = MakeIFieldSet(new_inst, c66, MemberOffset(32)); - loop_left->AddInstruction(left_set); - loop_left->AddInstruction(left_goto); - - loop_right->AddInstruction(new (GetAllocator()) HGoto()); - - loop_end->AddInstruction(new (GetAllocator()) HGoto()); - - HInstruction* read_end = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* return_exit = new (GetAllocator()) HReturn(read_end); - breturn->AddInstruction(read_end); - breturn->AddInstruction(return_exit); - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_TRUE(loop_header->IsLoopHeader()); - EXPECT_TRUE(loop_header->GetLoopInformation()->IsIrreducible()); - - EXPECT_INS_RETAINED(left_set); - EXPECT_INS_REMOVED(write_start); - EXPECT_INS_REMOVED(read_end); - - HPredicatedInstanceFieldGet* pred_get = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - ASSERT_NE(pred_get, nullptr); - ASSERT_TRUE(pred_get->GetDefaultValue()->IsPhi()) << pred_get->DumpWithArgs(); - EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(0), c11); - EXPECT_INS_EQ(pred_get->GetDefaultValue()->InputAt(1), graph_->GetIntConstant(0)); - ASSERT_TRUE(pred_get->GetTarget()->IsPhi()) << pred_get->DumpWithArgs(); - EXPECT_INS_EQ(pred_get->GetTarget()->InputAt(0), graph_->GetNullConstant()); - HNewInstance* mat = FindSingleInstruction<HNewInstance>(graph_, right->GetSinglePredecessor()); - ASSERT_NE(mat, nullptr); - EXPECT_INS_EQ(pred_get->GetTarget()->InputAt(1), mat); -} - -enum class UsesOrder { kDefaultOrder, kReverseOrder }; -std::ostream& operator<<(std::ostream& os, const UsesOrder& ord) { - switch (ord) { - case UsesOrder::kDefaultOrder: - return os << "DefaultOrder"; - case UsesOrder::kReverseOrder: - return os << "ReverseOrder"; - } -} - -class UsesOrderDependentTestGroup - : public LoadStoreEliminationTestBase<CommonCompilerTestWithParam<UsesOrder>> {}; - -// Make sure that we record replacements by predicated loads and use them -// instead of constructing Phis with inputs removed from the graph. Bug: 183897743 -// Note that the bug was hit only for a certain ordering of the NewInstance -// uses, so we test both orderings. -// // ENTRY -// obj = new Obj(); -// obj.foo = 11; -// if (param1) { -// // LEFT1 -// escape(obj); -// } else { -// // RIGHT1 -// } -// // MIDDLE -// a = obj.foo; -// if (param2) { -// // LEFT2 -// obj.foo = 33; -// } else { -// // RIGHT2 -// } -// // BRETURN -// no_escape() // If `obj` escaped, the field value can change. (Avoid non-partial LSE.) -// b = obj.foo; -// return a + b; -TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements1) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left1"}, - {"entry", "right1"}, - {"left1", "middle"}, - {"right1", "middle"}, - {"middle", "left2"}, - {"middle", "right2"}, - {"left2", "breturn"}, - {"right2", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(left1); - GET_BLOCK(right1); - GET_BLOCK(middle); - GET_BLOCK(left2); - GET_BLOCK(right2); - GET_BLOCK(breturn); - GET_BLOCK(exit); -#undef GET_BLOCK - EnsurePredecessorOrder(middle, {left1, right1}); - EnsurePredecessorOrder(breturn, {left2, right2}); - HInstruction* c0 = graph_->GetIntConstant(0); - HInstruction* cnull = graph_->GetNullConstant(); - HInstruction* c11 = graph_->GetIntConstant(11); - HInstruction* c33 = graph_->GetIntConstant(33); - HInstruction* param1 = MakeParam(DataType::Type::kBool); - HInstruction* param2 = MakeParam(DataType::Type::kBool); - - HInstruction* suspend = new (GetAllocator()) HSuspendCheck(); - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* entry_write = MakeIFieldSet(new_inst, c11, MemberOffset(32)); - HInstruction* entry_if = new (GetAllocator()) HIf(param1); - entry->AddInstruction(suspend); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(entry_write); - entry->AddInstruction(entry_if); - ManuallyBuildEnvFor(suspend, {}); - ManuallyBuildEnvFor(cls, {}); - ManuallyBuildEnvFor(new_inst, {}); - - HInstruction* left1_call = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* left1_goto = new (GetAllocator()) HGoto(); - left1->AddInstruction(left1_call); - left1->AddInstruction(left1_goto); - ManuallyBuildEnvFor(left1_call, {}); - - HInstruction* right1_goto = new (GetAllocator()) HGoto(); - right1->AddInstruction(right1_goto); - - HInstruction* middle_read = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* middle_if = new (GetAllocator()) HIf(param2); - if (GetParam() == UsesOrder::kDefaultOrder) { - middle->AddInstruction(middle_read); - } - middle->AddInstruction(middle_if); - - HInstanceFieldSet* left2_write = MakeIFieldSet(new_inst, c33, MemberOffset(32)); - HInstruction* left2_goto = new (GetAllocator()) HGoto(); - left2->AddInstruction(left2_write); - left2->AddInstruction(left2_goto); - - HInstruction* right2_goto = new (GetAllocator()) HGoto(); - right2->AddInstruction(right2_goto); - - HInstruction* breturn_call = MakeInvoke(DataType::Type::kVoid, {}); - HInstruction* breturn_read = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* breturn_add = - new (GetAllocator()) HAdd(DataType::Type::kInt32, middle_read, breturn_read); - HInstruction* breturn_return = new (GetAllocator()) HReturn(breturn_add); - breturn->AddInstruction(breturn_call); - breturn->AddInstruction(breturn_read); - breturn->AddInstruction(breturn_add); - breturn->AddInstruction(breturn_return); - ManuallyBuildEnvFor(breturn_call, {}); - - if (GetParam() == UsesOrder::kReverseOrder) { - // Insert `middle_read` in the same position as for the `kDefaultOrder` case. - // The only difference is the order of entries in `new_inst->GetUses()` which - // is used by `HeapReferenceData::CollectReplacements()` and defines the order - // of instructions to process for `HeapReferenceData::PredicateInstructions()`. - middle->InsertInstructionBefore(middle_read, middle_if); - } - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_RETAINED(cls); - EXPECT_INS_REMOVED(new_inst); - HNewInstance* replacement_new_inst = FindSingleInstruction<HNewInstance>(graph_); - ASSERT_NE(replacement_new_inst, nullptr); - EXPECT_INS_REMOVED(entry_write); - std::vector<HInstanceFieldSet*> all_writes; - std::tie(all_writes) = FindAllInstructions<HInstanceFieldSet>(graph_); - ASSERT_EQ(2u, all_writes.size()); - ASSERT_NE(all_writes[0] == left2_write, all_writes[1] == left2_write); - HInstanceFieldSet* replacement_write = all_writes[(all_writes[0] == left2_write) ? 1u : 0u]; - ASSERT_FALSE(replacement_write->GetIsPredicatedSet()); - ASSERT_INS_EQ(replacement_write->InputAt(0), replacement_new_inst); - ASSERT_INS_EQ(replacement_write->InputAt(1), c11); - - EXPECT_INS_RETAINED(left1_call); - - EXPECT_INS_REMOVED(middle_read); - HPredicatedInstanceFieldGet* replacement_middle_read = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, middle); - ASSERT_NE(replacement_middle_read, nullptr); - ASSERT_TRUE(replacement_middle_read->GetTarget()->IsPhi()); - ASSERT_EQ(2u, replacement_middle_read->GetTarget()->AsPhi()->InputCount()); - ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(0), replacement_new_inst); - ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(1), cnull); - ASSERT_TRUE(replacement_middle_read->GetDefaultValue()->IsPhi()); - ASSERT_EQ(2u, replacement_middle_read->GetDefaultValue()->AsPhi()->InputCount()); - ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(0), c0); - ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(1), c11); - - EXPECT_INS_RETAINED(left2_write); - ASSERT_TRUE(left2_write->GetIsPredicatedSet()); - - EXPECT_INS_REMOVED(breturn_read); - HPredicatedInstanceFieldGet* replacement_breturn_read = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - ASSERT_NE(replacement_breturn_read, nullptr); - ASSERT_INS_EQ(replacement_breturn_read->GetTarget(), replacement_middle_read->GetTarget()); - ASSERT_TRUE(replacement_breturn_read->GetDefaultValue()->IsPhi()); - ASSERT_EQ(2u, replacement_breturn_read->GetDefaultValue()->AsPhi()->InputCount()); - ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(0), c33); - HInstruction* other_input = replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(1); - ASSERT_NE(other_input->GetBlock(), nullptr) << GetParam(); - ASSERT_INS_EQ(other_input, replacement_middle_read); -} - -// Regression test for a bad DCHECK() found while trying to write a test for b/188188275. -// // ENTRY -// obj = new Obj(); -// obj.foo = 11; -// if (param1) { -// // LEFT1 -// escape(obj); -// } else { -// // RIGHT1 -// } -// // MIDDLE -// a = obj.foo; -// if (param2) { -// // LEFT2 -// no_escape(); -// } else { -// // RIGHT2 -// } -// // BRETURN -// b = obj.foo; -// return a + b; -TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements2) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left1"}, - {"entry", "right1"}, - {"left1", "middle"}, - {"right1", "middle"}, - {"middle", "left2"}, - {"middle", "right2"}, - {"left2", "breturn"}, - {"right2", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(left1); - GET_BLOCK(right1); - GET_BLOCK(middle); - GET_BLOCK(left2); - GET_BLOCK(right2); - GET_BLOCK(breturn); - GET_BLOCK(exit); -#undef GET_BLOCK - EnsurePredecessorOrder(middle, {left1, right1}); - EnsurePredecessorOrder(breturn, {left2, right2}); - HInstruction* c0 = graph_->GetIntConstant(0); - HInstruction* cnull = graph_->GetNullConstant(); - HInstruction* c11 = graph_->GetIntConstant(11); - HInstruction* param1 = MakeParam(DataType::Type::kBool); - HInstruction* param2 = MakeParam(DataType::Type::kBool); - - HInstruction* suspend = new (GetAllocator()) HSuspendCheck(); - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* entry_write = MakeIFieldSet(new_inst, c11, MemberOffset(32)); - HInstruction* entry_if = new (GetAllocator()) HIf(param1); - entry->AddInstruction(suspend); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(entry_write); - entry->AddInstruction(entry_if); - ManuallyBuildEnvFor(suspend, {}); - ManuallyBuildEnvFor(cls, {}); - ManuallyBuildEnvFor(new_inst, {}); - - HInstruction* left1_call = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* left1_goto = new (GetAllocator()) HGoto(); - left1->AddInstruction(left1_call); - left1->AddInstruction(left1_goto); - ManuallyBuildEnvFor(left1_call, {}); - - HInstruction* right1_goto = new (GetAllocator()) HGoto(); - right1->AddInstruction(right1_goto); - - HInstruction* middle_read = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* middle_if = new (GetAllocator()) HIf(param2); - if (GetParam() == UsesOrder::kDefaultOrder) { - middle->AddInstruction(middle_read); - } - middle->AddInstruction(middle_if); - - HInstruction* left2_call = MakeInvoke(DataType::Type::kVoid, {}); - HInstruction* left2_goto = new (GetAllocator()) HGoto(); - left2->AddInstruction(left2_call); - left2->AddInstruction(left2_goto); - ManuallyBuildEnvFor(left2_call, {}); - - HInstruction* right2_goto = new (GetAllocator()) HGoto(); - right2->AddInstruction(right2_goto); - - HInstruction* breturn_read = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* breturn_add = - new (GetAllocator()) HAdd(DataType::Type::kInt32, middle_read, breturn_read); - HInstruction* breturn_return = new (GetAllocator()) HReturn(breturn_add); - breturn->AddInstruction(breturn_read); - breturn->AddInstruction(breturn_add); - breturn->AddInstruction(breturn_return); - - if (GetParam() == UsesOrder::kReverseOrder) { - // Insert `middle_read` in the same position as for the `kDefaultOrder` case. - // The only difference is the order of entries in `new_inst->GetUses()` which - // is used by `HeapReferenceData::CollectReplacements()` and defines the order - // of instructions to process for `HeapReferenceData::PredicateInstructions()`. - middle->InsertInstructionBefore(middle_read, middle_if); - } - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_RETAINED(cls); - EXPECT_INS_REMOVED(new_inst); - HNewInstance* replacement_new_inst = FindSingleInstruction<HNewInstance>(graph_); - ASSERT_NE(replacement_new_inst, nullptr); - EXPECT_INS_REMOVED(entry_write); - HInstanceFieldSet* replacement_write = FindSingleInstruction<HInstanceFieldSet>(graph_); - ASSERT_NE(replacement_write, nullptr); - ASSERT_FALSE(replacement_write->GetIsPredicatedSet()); - ASSERT_INS_EQ(replacement_write->InputAt(0), replacement_new_inst); - ASSERT_INS_EQ(replacement_write->InputAt(1), c11); - - EXPECT_INS_RETAINED(left1_call); - - EXPECT_INS_REMOVED(middle_read); - HPredicatedInstanceFieldGet* replacement_middle_read = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, middle); - ASSERT_NE(replacement_middle_read, nullptr); - ASSERT_TRUE(replacement_middle_read->GetTarget()->IsPhi()); - ASSERT_EQ(2u, replacement_middle_read->GetTarget()->AsPhi()->InputCount()); - ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(0), replacement_new_inst); - ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(1), cnull); - ASSERT_TRUE(replacement_middle_read->GetDefaultValue()->IsPhi()); - ASSERT_EQ(2u, replacement_middle_read->GetDefaultValue()->AsPhi()->InputCount()); - ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(0), c0); - ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(1), c11); - - EXPECT_INS_RETAINED(left2_call); - - EXPECT_INS_REMOVED(breturn_read); - HPredicatedInstanceFieldGet* replacement_breturn_read = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - ASSERT_NE(replacement_breturn_read, nullptr); - ASSERT_INS_EQ(replacement_breturn_read->GetTarget(), replacement_middle_read->GetTarget()); - ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue(), replacement_middle_read); -} - -INSTANTIATE_TEST_SUITE_P(LoadStoreEliminationTest, - UsesOrderDependentTestGroup, - testing::Values(UsesOrder::kDefaultOrder, UsesOrder::kReverseOrder)); - -// The parameter is the number of times we call `std::next_permutation` (from 0 to 5) -// so that we test all 6 permutation of three items. -class UsesOrderDependentTestGroupForThreeItems - : public LoadStoreEliminationTestBase<CommonCompilerTestWithParam<size_t>> {}; - -// Make sure that after we record replacements by predicated loads, we correctly -// use that predicated load for Phi placeholders that were previously marked as -// replaced by the now removed unpredicated load. (The fix for bug 183897743 was -// not good enough.) Bug: 188188275 -// // ENTRY -// obj = new Obj(); -// obj.foo = 11; -// if (param1) { -// // LEFT1 -// escape(obj); -// } else { -// // RIGHT1 -// } -// // MIDDLE1 -// a = obj.foo; -// if (param2) { -// // LEFT2 -// no_escape1(); -// } else { -// // RIGHT2 -// } -// // MIDDLE2 -// if (param3) { -// // LEFT3 -// x = obj.foo; -// no_escape2(); -// } else { -// // RIGHT3 -// x = 0; -// } -// // BRETURN -// b = obj.foo; -// return a + b + x; -TEST_P(UsesOrderDependentTestGroupForThreeItems, RecordPredicatedReplacements3) { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope vshs(soa.Self()); - CreateGraph(&vshs); - AdjacencyListGraph blks(SetupFromAdjacencyList("entry", - "exit", - {{"entry", "left1"}, - {"entry", "right1"}, - {"left1", "middle1"}, - {"right1", "middle1"}, - {"middle1", "left2"}, - {"middle1", "right2"}, - {"left2", "middle2"}, - {"right2", "middle2"}, - {"middle2", "left3"}, - {"middle2", "right3"}, - {"left3", "breturn"}, - {"right3", "breturn"}, - {"breturn", "exit"}})); -#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); - GET_BLOCK(left1); - GET_BLOCK(right1); - GET_BLOCK(middle1); - GET_BLOCK(left2); - GET_BLOCK(right2); - GET_BLOCK(middle2); - GET_BLOCK(left3); - GET_BLOCK(right3); - GET_BLOCK(breturn); - GET_BLOCK(exit); -#undef GET_BLOCK - EnsurePredecessorOrder(middle1, {left1, right1}); - EnsurePredecessorOrder(middle2, {left2, right2}); - EnsurePredecessorOrder(breturn, {left3, right3}); - HInstruction* c0 = graph_->GetIntConstant(0); - HInstruction* cnull = graph_->GetNullConstant(); - HInstruction* c11 = graph_->GetIntConstant(11); - HInstruction* param1 = MakeParam(DataType::Type::kBool); - HInstruction* param2 = MakeParam(DataType::Type::kBool); - HInstruction* param3 = MakeParam(DataType::Type::kBool); - - HInstruction* suspend = new (GetAllocator()) HSuspendCheck(); - HInstruction* cls = MakeClassLoad(); - HInstruction* new_inst = MakeNewInstance(cls); - HInstruction* entry_write = MakeIFieldSet(new_inst, c11, MemberOffset(32)); - HInstruction* entry_if = new (GetAllocator()) HIf(param1); - entry->AddInstruction(suspend); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(entry_write); - entry->AddInstruction(entry_if); - ManuallyBuildEnvFor(suspend, {}); - ManuallyBuildEnvFor(cls, {}); - ManuallyBuildEnvFor(new_inst, {}); - - HInstruction* left1_call = MakeInvoke(DataType::Type::kVoid, { new_inst }); - HInstruction* left1_goto = new (GetAllocator()) HGoto(); - left1->AddInstruction(left1_call); - left1->AddInstruction(left1_goto); - ManuallyBuildEnvFor(left1_call, {}); - - HInstruction* right1_goto = new (GetAllocator()) HGoto(); - right1->AddInstruction(right1_goto); - - HInstruction* middle1_read = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* middle1_if = new (GetAllocator()) HIf(param2); - // Delay inserting `middle1_read`, do that later with ordering based on `GetParam()`. - middle1->AddInstruction(middle1_if); - - HInstruction* left2_call = MakeInvoke(DataType::Type::kVoid, {}); - HInstruction* left2_goto = new (GetAllocator()) HGoto(); - left2->AddInstruction(left2_call); - left2->AddInstruction(left2_goto); - ManuallyBuildEnvFor(left2_call, {}); - - HInstruction* right2_goto = new (GetAllocator()) HGoto(); - right2->AddInstruction(right2_goto); - - HInstruction* middle2_if = new (GetAllocator()) HIf(param3); - middle2->AddInstruction(middle2_if); - - HInstruction* left3_read = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* left3_call = MakeInvoke(DataType::Type::kVoid, {}); - HInstruction* left3_goto = new (GetAllocator()) HGoto(); - // Delay inserting `left3_read`, do that later with ordering based on `GetParam()`. - left3->AddInstruction(left3_call); - left3->AddInstruction(left3_goto); - ManuallyBuildEnvFor(left3_call, {}); - - HInstruction* right3_goto = new (GetAllocator()) HGoto(); - right3->AddInstruction(right3_goto); - - HPhi* breturn_phi = MakePhi({left3_read, c0}); - HInstruction* breturn_read = MakeIFieldGet(new_inst, DataType::Type::kInt32, MemberOffset(32)); - HInstruction* breturn_add1 = - new (GetAllocator()) HAdd(DataType::Type::kInt32, middle1_read, breturn_read); - HInstruction* breturn_add2 = - new (GetAllocator()) HAdd(DataType::Type::kInt32, breturn_add1, breturn_phi); - HInstruction* breturn_return = new (GetAllocator()) HReturn(breturn_add2); - breturn->AddPhi(breturn_phi); - // Delay inserting `breturn_read`, do that later with ordering based on `GetParam()`. - breturn->AddInstruction(breturn_add1); - breturn->AddInstruction(breturn_add2); - breturn->AddInstruction(breturn_return); - - // Insert reads in the same positions but in different insertion orders. - // The only difference is the order of entries in `new_inst->GetUses()` which - // is used by `HeapReferenceData::CollectReplacements()` and defines the order - // of instructions to process for `HeapReferenceData::PredicateInstructions()`. - std::tuple<size_t, HInstruction*, HInstruction*> read_insertions[] = { - { 0u, middle1_read, middle1_if }, - { 1u, left3_read, left3_call }, - { 2u, breturn_read, breturn_add1 }, - }; - for (size_t i = 0, num = GetParam(); i != num; ++i) { - std::next_permutation(read_insertions, read_insertions + std::size(read_insertions)); - } - for (auto [order, read, cursor] : read_insertions) { - cursor->GetBlock()->InsertInstructionBefore(read, cursor); - } - - SetupExit(exit); - - PerformLSEWithPartial(blks); - - EXPECT_INS_RETAINED(cls); - EXPECT_INS_REMOVED(new_inst); - HNewInstance* replacement_new_inst = FindSingleInstruction<HNewInstance>(graph_); - ASSERT_NE(replacement_new_inst, nullptr); - EXPECT_INS_REMOVED(entry_write); - HInstanceFieldSet* replacement_write = FindSingleInstruction<HInstanceFieldSet>(graph_); - ASSERT_NE(replacement_write, nullptr); - ASSERT_FALSE(replacement_write->GetIsPredicatedSet()); - ASSERT_INS_EQ(replacement_write->InputAt(0), replacement_new_inst); - ASSERT_INS_EQ(replacement_write->InputAt(1), c11); - - EXPECT_INS_RETAINED(left1_call); - - EXPECT_INS_REMOVED(middle1_read); - HPredicatedInstanceFieldGet* replacement_middle1_read = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, middle1); - ASSERT_NE(replacement_middle1_read, nullptr); - ASSERT_TRUE(replacement_middle1_read->GetTarget()->IsPhi()); - ASSERT_EQ(2u, replacement_middle1_read->GetTarget()->AsPhi()->InputCount()); - ASSERT_INS_EQ(replacement_middle1_read->GetTarget()->AsPhi()->InputAt(0), replacement_new_inst); - ASSERT_INS_EQ(replacement_middle1_read->GetTarget()->AsPhi()->InputAt(1), cnull); - ASSERT_TRUE(replacement_middle1_read->GetDefaultValue()->IsPhi()); - ASSERT_EQ(2u, replacement_middle1_read->GetDefaultValue()->AsPhi()->InputCount()); - ASSERT_INS_EQ(replacement_middle1_read->GetDefaultValue()->AsPhi()->InputAt(0), c0); - ASSERT_INS_EQ(replacement_middle1_read->GetDefaultValue()->AsPhi()->InputAt(1), c11); - - EXPECT_INS_RETAINED(left2_call); - - EXPECT_INS_REMOVED(left3_read); - HPredicatedInstanceFieldGet* replacement_left3_read = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, left3); - ASSERT_NE(replacement_left3_read, nullptr); - ASSERT_TRUE(replacement_left3_read->GetTarget()->IsPhi()); - ASSERT_INS_EQ(replacement_left3_read->GetTarget(), replacement_middle1_read->GetTarget()); - ASSERT_INS_EQ(replacement_left3_read->GetDefaultValue(), replacement_middle1_read); - EXPECT_INS_RETAINED(left3_call); - - EXPECT_INS_RETAINED(breturn_phi); - EXPECT_INS_REMOVED(breturn_read); - HPredicatedInstanceFieldGet* replacement_breturn_read = - FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); - ASSERT_NE(replacement_breturn_read, nullptr); - ASSERT_INS_EQ(replacement_breturn_read->GetTarget(), replacement_middle1_read->GetTarget()); - ASSERT_EQ(2u, replacement_breturn_read->GetDefaultValue()->AsPhi()->InputCount()); - ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(0), - replacement_left3_read); - ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(1), - replacement_middle1_read); - EXPECT_INS_RETAINED(breturn_add1); - ASSERT_INS_EQ(breturn_add1->InputAt(0), replacement_middle1_read); - ASSERT_INS_EQ(breturn_add1->InputAt(1), replacement_breturn_read); - EXPECT_INS_RETAINED(breturn_add2); - ASSERT_INS_EQ(breturn_add2->InputAt(0), breturn_add1); - ASSERT_INS_EQ(breturn_add2->InputAt(1), breturn_phi); - EXPECT_INS_RETAINED(breturn_return); -} - -INSTANTIATE_TEST_SUITE_P(LoadStoreEliminationTest, - UsesOrderDependentTestGroupForThreeItems, - testing::Values(0u, 1u, 2u, 3u, 4u, 5u)); - } // namespace art diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index f40b7f4f0c..4189bc4053 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -62,7 +62,7 @@ Location Location::RegisterOrConstant(HInstruction* instruction) { } Location Location::RegisterOrInt32Constant(HInstruction* instruction) { - HConstant* constant = instruction->AsConstant(); + HConstant* constant = instruction->AsConstantOrNull(); if (constant != nullptr) { int64_t value = CodeGenerator::GetInt64ValueOf(constant); if (IsInt<32>(value)) { @@ -73,7 +73,7 @@ Location Location::RegisterOrInt32Constant(HInstruction* instruction) { } Location Location::FpuRegisterOrInt32Constant(HInstruction* instruction) { - HConstant* constant = instruction->AsConstant(); + HConstant* constant = instruction->AsConstantOrNull(); if (constant != nullptr) { int64_t value = CodeGenerator::GetInt64ValueOf(constant); if (IsInt<32>(value)) { diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 7ee076f442..20099ebbc2 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -79,7 +79,7 @@ class Location : public ValueObject { kUnallocated = 11, }; - Location() : ValueObject(), value_(kInvalid) { + constexpr Location() : ValueObject(), value_(kInvalid) { // Verify that non-constant location kinds do not interfere with kConstant. static_assert((kInvalid & kLocationConstantMask) != kConstant, "TagError"); static_assert((kUnallocated & kLocationConstantMask) != kConstant, "TagError"); @@ -95,7 +95,7 @@ class Location : public ValueObject { DCHECK(!IsValid()); } - Location(const Location& other) = default; + constexpr Location(const Location& other) = default; Location& operator=(const Location& other) = default; @@ -126,24 +126,24 @@ class Location : public ValueObject { } // Empty location. Used if there the location should be ignored. - static Location NoLocation() { + static constexpr Location NoLocation() { return Location(); } // Register locations. - static Location RegisterLocation(int reg) { + static constexpr Location RegisterLocation(int reg) { return Location(kRegister, reg); } - static Location FpuRegisterLocation(int reg) { + static constexpr Location FpuRegisterLocation(int reg) { return Location(kFpuRegister, reg); } - static Location RegisterPairLocation(int low, int high) { + static constexpr Location RegisterPairLocation(int low, int high) { return Location(kRegisterPair, low << 16 | high); } - static Location FpuRegisterPairLocation(int low, int high) { + static constexpr Location FpuRegisterPairLocation(int low, int high) { return Location(kFpuRegisterPair, low << 16 | high); } @@ -423,7 +423,7 @@ class Location : public ValueObject { explicit Location(uintptr_t value) : value_(value) {} - Location(Kind kind, uintptr_t payload) + constexpr Location(Kind kind, uintptr_t payload) : value_(KindField::Encode(kind) | PayloadField::Encode(payload)) {} uintptr_t GetPayload() const { diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc index 95e81533da..b3f9e835de 100644 --- a/compiler/optimizing/loop_analysis.cc +++ b/compiler/optimizing/loop_analysis.cc @@ -42,7 +42,7 @@ void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info, // not cause loop peeling to happen as they either cannot be inside a loop, or by // definition cannot be loop exits (unconditional instructions), or are not beneficial for // the optimization. - HIf* hif = block->GetLastInstruction()->AsIf(); + HIf* hif = block->GetLastInstruction()->AsIfOrNull(); if (hif != nullptr && !loop_info->Contains(*hif->InputAt(0)->GetBlock())) { analysis_results->invariant_exits_num_++; } @@ -221,9 +221,6 @@ class X86_64LoopHelper : public ArchDefaultLoopHelper { return 3; case HInstruction::InstructionKind::kIf: return 2; - case HInstruction::InstructionKind::kPredicatedInstanceFieldGet: - // test + cond-jump + IFieldGet - return 4; case HInstruction::InstructionKind::kInstanceFieldGet: return 2; case HInstruction::InstructionKind::kInstanceFieldSet: @@ -259,7 +256,7 @@ class X86_64LoopHelper : public ArchDefaultLoopHelper { case HInstruction::InstructionKind::kVecReplicateScalar: return 2; case HInstruction::InstructionKind::kVecExtractScalar: - return 1; + return 1; case HInstruction::InstructionKind::kVecReduce: return 4; case HInstruction::InstructionKind::kVecNeg: diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h index cec00fecf4..cd8f00588d 100644 --- a/compiler/optimizing/loop_analysis.h +++ b/compiler/optimizing/loop_analysis.h @@ -148,13 +148,15 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> { // // Returns 'true' by default, should be overridden by particular target loop helper. virtual bool IsLoopNonBeneficialForScalarOpts( - LoopAnalysisInfo* loop_analysis_info ATTRIBUTE_UNUSED) const { return true; } + [[maybe_unused]] LoopAnalysisInfo* loop_analysis_info) const { + return true; + } // Returns optimal scalar unrolling factor for the loop. // // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper. virtual uint32_t GetScalarUnrollingFactor( - const LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const { + [[maybe_unused]] const LoopAnalysisInfo* analysis_info) const { return LoopAnalysisInfo::kNoUnrollingFactor; } @@ -166,17 +168,17 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> { // Returns whether it is beneficial to fully unroll the loop. // // Returns 'false' by default, should be overridden by particular target loop helper. - virtual bool IsFullUnrollingBeneficial(LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const { + virtual bool IsFullUnrollingBeneficial([[maybe_unused]] LoopAnalysisInfo* analysis_info) const { return false; } // Returns optimal SIMD unrolling factor for the loop. // // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper. - virtual uint32_t GetSIMDUnrollingFactor(HBasicBlock* block ATTRIBUTE_UNUSED, - int64_t trip_count ATTRIBUTE_UNUSED, - uint32_t max_peel ATTRIBUTE_UNUSED, - uint32_t vector_length ATTRIBUTE_UNUSED) const { + virtual uint32_t GetSIMDUnrollingFactor([[maybe_unused]] HBasicBlock* block, + [[maybe_unused]] int64_t trip_count, + [[maybe_unused]] uint32_t max_peel, + [[maybe_unused]] uint32_t vector_length) const { return LoopAnalysisInfo::kNoUnrollingFactor; } diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 7a52502562..f6d69ca789 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -366,8 +366,8 @@ static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) { return (restrictions & tested) != 0; } -// Insert an instruction. -static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { +// Insert an instruction at the end of the block, with safe checks. +inline HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { DCHECK(block != nullptr); DCHECK(instruction != nullptr); block->InsertInstructionBefore(instruction, block->GetLastInstruction()); @@ -418,7 +418,7 @@ static void TryToEvaluateIfCondition(HIf* instruction, HGraph* graph) { ++it; if (true_succ->Dominates(user_block)) { user->ReplaceInput(graph->GetIntConstant(1), index); - } else if (false_succ->Dominates(user_block)) { + } else if (false_succ->Dominates(user_block)) { user->ReplaceInput(graph->GetIntConstant(0), index); } } @@ -453,6 +453,54 @@ static DataType::Type GetNarrowerType(HInstruction* a, HInstruction* b) { return type; } +// Returns whether the loop is of a diamond structure: +// +// header <----------------+ +// | | +// diamond_hif | +// / \ | +// diamond_true diamond_false | +// \ / | +// back_edge | +// | | +// +---------------------+ +static bool HasLoopDiamondStructure(HLoopInformation* loop_info) { + HBasicBlock* header = loop_info->GetHeader(); + if (loop_info->NumberOfBackEdges() != 1 || header->GetSuccessors().size() != 2) { + return false; + } + HBasicBlock* header_succ_0 = header->GetSuccessors()[0]; + HBasicBlock* header_succ_1 = header->GetSuccessors()[1]; + HBasicBlock* diamond_top = loop_info->Contains(*header_succ_0) ? + header_succ_0 : + header_succ_1; + if (!diamond_top->GetLastInstruction()->IsIf()) { + return false; + } + + HIf* diamond_hif = diamond_top->GetLastInstruction()->AsIf(); + HBasicBlock* diamond_true = diamond_hif->IfTrueSuccessor(); + HBasicBlock* diamond_false = diamond_hif->IfFalseSuccessor(); + + if (diamond_true->GetSuccessors().size() != 1 || diamond_false->GetSuccessors().size() != 1) { + return false; + } + + HBasicBlock* back_edge = diamond_true->GetSingleSuccessor(); + if (back_edge != diamond_false->GetSingleSuccessor() || + back_edge != loop_info->GetBackEdges()[0]) { + return false; + } + + DCHECK_EQ(loop_info->GetBlocks().NumSetBits(), 5u); + return true; +} + +static bool IsPredicatedLoopControlFlowSupported(HLoopInformation* loop_info) { + size_t num_of_blocks = loop_info->GetBlocks().NumSetBits(); + return num_of_blocks == 2 || HasLoopDiamondStructure(loop_info); +} + // // Public methods. // @@ -482,6 +530,8 @@ HLoopOptimization::HLoopOptimization(HGraph* graph, vector_runtime_test_b_(nullptr), vector_map_(nullptr), vector_permanent_map_(nullptr), + vector_external_set_(nullptr), + predicate_info_map_(nullptr), vector_mode_(kSequential), vector_preheader_(nullptr), vector_header_(nullptr), @@ -542,12 +592,17 @@ bool HLoopOptimization::LocalRun() { std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization)); ScopedArenaSafeMap<HInstruction*, HInstruction*> perm( std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization)); + ScopedArenaSet<HInstruction*> ext_set(loop_allocator_->Adapter(kArenaAllocLoopOptimization)); + ScopedArenaSafeMap<HBasicBlock*, BlockPredicateInfo*> pred( + std::less<HBasicBlock*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization)); // Attach. iset_ = &iset; reductions_ = &reds; vector_refs_ = &refs; vector_map_ = ↦ vector_permanent_map_ = &perm; + vector_external_set_ = &ext_set; + predicate_info_map_ = &pred; // Traverse. const bool did_loop_opt = TraverseLoopsInnerToOuter(top_loop_); // Detach. @@ -556,6 +611,9 @@ bool HLoopOptimization::LocalRun() { vector_refs_ = nullptr; vector_map_ = nullptr; vector_permanent_map_ = nullptr; + vector_external_set_ = nullptr; + predicate_info_map_ = nullptr; + return did_loop_opt; } @@ -787,6 +845,37 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) { } } +// Checks whether the loop has exit structure suitable for InnerLoopFinite optimization: +// - has single loop exit. +// - the exit block has only single predecessor - a block inside the loop. +// +// In that case returns single exit basic block (outside the loop); otherwise nullptr. +static HBasicBlock* GetInnerLoopFiniteSingleExit(HLoopInformation* loop_info) { + HBasicBlock* exit = nullptr; + for (HBlocksInLoopIterator block_it(*loop_info); + !block_it.Done(); + block_it.Advance()) { + HBasicBlock* block = block_it.Current(); + + // Check whether one of the successor is loop exit. + for (HBasicBlock* successor : block->GetSuccessors()) { + if (!loop_info->Contains(*successor)) { + if (exit != nullptr) { + // The loop has more than one exit. + return nullptr; + } + exit = successor; + + // Ensure exit can only be reached by exiting loop. + if (successor->GetPredecessors().size() != 1) { + return nullptr; + } + } + } + } + return exit; +} + bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); @@ -795,33 +884,22 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { if (!induction_range_.IsFinite(node->loop_info, &trip_count)) { return false; } - // Ensure there is only a single loop-body (besides the header). - HBasicBlock* body = nullptr; - for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) { - if (it.Current() != header) { - if (body != nullptr) { - return false; - } - body = it.Current(); - } - } - CHECK(body != nullptr); - // Ensure there is only a single exit point. - if (header->GetSuccessors().size() != 2) { - return false; - } - HBasicBlock* exit = (header->GetSuccessors()[0] == body) - ? header->GetSuccessors()[1] - : header->GetSuccessors()[0]; - // Ensure exit can only be reached by exiting loop. - if (exit->GetPredecessors().size() != 1) { + // Check loop exits. + HBasicBlock* exit = GetInnerLoopFiniteSingleExit(node->loop_info); + if (exit == nullptr) { return false; } + + HBasicBlock* body = (header->GetSuccessors()[0] == exit) + ? header->GetSuccessors()[1] + : header->GetSuccessors()[0]; // Detect either an empty loop (no side effects other than plain iteration) or // a trivial loop (just iterating once). Replace subsequent index uses, if any, // with the last value and remove the loop, possibly after unrolling its body. HPhi* main_phi = nullptr; - if (TrySetSimpleLoopHeader(header, &main_phi)) { + size_t num_of_blocks = header->GetLoopInformation()->GetBlocks().NumSetBits(); + + if (num_of_blocks == 2 && TrySetSimpleLoopHeader(header, &main_phi)) { bool is_empty = IsEmptyBody(body); if (reductions_->empty() && // TODO: possible with some effort (is_empty || trip_count == 1) && @@ -845,21 +923,61 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { } } // Vectorize loop, if possible and valid. - if (kEnableVectorization && + if (!kEnableVectorization || // Disable vectorization for debuggable graphs: this is a workaround for the bug // in 'GenerateNewLoop' which caused the SuspendCheck environment to be invalid. // TODO: b/138601207, investigate other possible cases with wrong environment values and // possibly switch back vectorization on for debuggable graphs. - !graph_->IsDebuggable() && - TrySetSimpleLoopHeader(header, &main_phi) && - ShouldVectorize(node, body, trip_count) && - TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) { - Vectorize(node, body, exit, trip_count); - graph_->SetHasSIMD(true); // flag SIMD usage - MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized); - return true; + graph_->IsDebuggable()) { + return false; + } + + if (IsInPredicatedVectorizationMode()) { + return TryVectorizePredicated(node, body, exit, main_phi, trip_count); + } else { + return TryVectorizedTraditional(node, body, exit, main_phi, trip_count); } - return false; +} + +bool HLoopOptimization::TryVectorizePredicated(LoopNode* node, + HBasicBlock* body, + HBasicBlock* exit, + HPhi* main_phi, + int64_t trip_count) { + if (!IsPredicatedLoopControlFlowSupported(node->loop_info) || + !ShouldVectorizeCommon(node, main_phi, trip_count)) { + return false; + } + + // Currently we can only generate cleanup loops for loops with 2 basic block. + // + // TODO: Support array disambiguation tests for CF loops. + if (NeedsArrayRefsDisambiguationTest() && + node->loop_info->GetBlocks().NumSetBits() != 2) { + return false; + } + + VectorizePredicated(node, body, exit); + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized); + graph_->SetHasPredicatedSIMD(true); // flag SIMD usage + return true; +} + +bool HLoopOptimization::TryVectorizedTraditional(LoopNode* node, + HBasicBlock* body, + HBasicBlock* exit, + HPhi* main_phi, + int64_t trip_count) { + HBasicBlock* header = node->loop_info->GetHeader(); + size_t num_of_blocks = header->GetLoopInformation()->GetBlocks().NumSetBits(); + + if (num_of_blocks != 2 || !ShouldVectorizeCommon(node, main_phi, trip_count)) { + return false; + } + VectorizeTraditional(node, body, exit, trip_count); + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized); + graph_->SetHasTraditionalSIMD(true); // flag SIMD usage + return true; } bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { @@ -1006,7 +1124,10 @@ bool HLoopOptimization::TryLoopScalarOpts(LoopNode* node) { // Intel Press, June, 2004 (http://www.aartbik.com/). // -bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count) { + +bool HLoopOptimization::CanVectorizeDataFlow(LoopNode* node, + HBasicBlock* header, + bool collect_alignment_info) { // Reset vector bookkeeping. vector_length_ = 0; vector_refs_->clear(); @@ -1015,16 +1136,30 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6 vector_runtime_test_a_ = vector_runtime_test_b_ = nullptr; - // Phis in the loop-body prevent vectorization. - if (!block->GetPhis().IsEmpty()) { - return false; - } + // Traverse the data flow of the loop, in the original program order. + for (HBlocksInLoopReversePostOrderIterator block_it(*header->GetLoopInformation()); + !block_it.Done(); + block_it.Advance()) { + HBasicBlock* block = block_it.Current(); - // Scan the loop-body, starting a right-hand-side tree traversal at each left-hand-side - // occurrence, which allows passing down attributes down the use tree. - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - if (!VectorizeDef(node, it.Current(), /*generate_code*/ false)) { - return false; // failure to vectorize a left-hand-side + if (block == header) { + // The header is of a certain structure (TrySetSimpleLoopHeader) and doesn't need to be + // processed here. + continue; + } + + // Phis in the loop-body prevent vectorization. + // TODO: Enable vectorization of CF loops with Phis. + if (!block->GetPhis().IsEmpty()) { + return false; + } + + // Scan the loop-body instructions, starting a right-hand-side tree traversal at each + // left-hand-side occurrence, which allows passing down attributes down the use tree. + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + if (!VectorizeDef(node, it.Current(), /*generate_code*/ false)) { + return false; // failure to vectorize a left-hand-side + } } } @@ -1111,24 +1246,123 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6 } } // for i - if (!IsInPredicatedVectorizationMode()) { - // Find a suitable alignment strategy. + if (collect_alignment_info) { + // Update the info on alignment strategy. SetAlignmentStrategy(peeling_votes, peeling_candidate); } - // Does vectorization seem profitable? - if (!IsVectorizationProfitable(trip_count)) { + // Success! + return true; +} + +bool HLoopOptimization::ShouldVectorizeCommon(LoopNode* node, + HPhi* main_phi, + int64_t trip_count) { + HBasicBlock* header = node->loop_info->GetHeader(); + HBasicBlock* preheader = node->loop_info->GetPreHeader(); + + bool enable_alignment_strategies = !IsInPredicatedVectorizationMode(); + if (!TrySetSimpleLoopHeader(header, &main_phi) || + !CanVectorizeDataFlow(node, header, enable_alignment_strategies) || + !IsVectorizationProfitable(trip_count) || + !TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) { return false; } - // Success! return true; } -void HLoopOptimization::Vectorize(LoopNode* node, - HBasicBlock* block, - HBasicBlock* exit, - int64_t trip_count) { +void HLoopOptimization::VectorizePredicated(LoopNode* node, + HBasicBlock* block, + HBasicBlock* exit) { + DCHECK(IsInPredicatedVectorizationMode()); + + HBasicBlock* header = node->loop_info->GetHeader(); + HBasicBlock* preheader = node->loop_info->GetPreHeader(); + + // Adjust vector bookkeeping. + HPhi* main_phi = nullptr; + bool is_simple_loop_header = TrySetSimpleLoopHeader(header, &main_phi); // refills sets + DCHECK(is_simple_loop_header); + vector_header_ = header; + vector_body_ = block; + + // Loop induction type. + DataType::Type induc_type = main_phi->GetType(); + DCHECK(induc_type == DataType::Type::kInt32 || induc_type == DataType::Type::kInt64) + << induc_type; + + // Generate loop control: + // stc = <trip-count>; + // vtc = <vector trip-count> + HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader); + HInstruction* vtc = stc; + vector_index_ = graph_->GetConstant(induc_type, 0); + bool needs_disambiguation_test = false; + // Generate runtime disambiguation test: + // vtc = a != b ? vtc : 0; + if (NeedsArrayRefsDisambiguationTest()) { + HInstruction* rt = Insert( + preheader, + new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_)); + vtc = Insert(preheader, + new (global_allocator_) + HSelect(rt, vtc, graph_->GetConstant(induc_type, 0), kNoDexPc)); + needs_disambiguation_test = true; + } + + // Generate vector loop: + // for ( ; i < vtc; i += vector_length) + // <vectorized-loop-body> + HBasicBlock* preheader_for_vector_loop = + graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit); + vector_mode_ = kVector; + GenerateNewLoopPredicated(node, + preheader_for_vector_loop, + vector_index_, + vtc, + graph_->GetConstant(induc_type, vector_length_)); + + // Generate scalar loop, if needed: + // for ( ; i < stc; i += 1) + // <loop-body> + if (needs_disambiguation_test) { + vector_mode_ = kSequential; + HBasicBlock* preheader_for_cleanup_loop = + graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit); + // Use "Traditional" version for the sequential loop. + GenerateNewLoopScalarOrTraditional(node, + preheader_for_cleanup_loop, + vector_index_, + stc, + graph_->GetConstant(induc_type, 1), + LoopAnalysisInfo::kNoUnrollingFactor); + } + + FinalizeVectorization(node); + + // Assign governing predicates for the predicated instructions inserted during vectorization + // outside the loop. + for (auto it : *vector_external_set_) { + DCHECK(it->IsVecOperation()); + HVecOperation* vec_op = it->AsVecOperation(); + + HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_, + graph_->GetIntConstant(1), + vec_op->GetPackedType(), + vec_op->GetVectorLength(), + 0u); + vec_op->GetBlock()->InsertInstructionBefore(set_pred, vec_op); + vec_op->SetMergingGoverningPredicate(set_pred); + } +} + +void HLoopOptimization::VectorizeTraditional(LoopNode* node, + HBasicBlock* block, + HBasicBlock* exit, + int64_t trip_count) { + DCHECK(!IsInPredicatedVectorizationMode()); + HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); @@ -1141,7 +1375,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, // A cleanup loop is needed, at least, for any unknown trip count or // for a known trip count with remainder iterations after vectorization. - bool needs_cleanup = !IsInPredicatedVectorizationMode() && + bool needs_cleanup = (trip_count == 0 || ((trip_count - vector_static_peeling_factor_) % chunk) != 0); // Adjust vector bookkeeping. @@ -1160,13 +1394,11 @@ void HLoopOptimization::Vectorize(LoopNode* node, // ptc = <peeling factor>; HInstruction* ptc = nullptr; if (vector_static_peeling_factor_ != 0) { - DCHECK(!IsInPredicatedVectorizationMode()); // Static loop peeling for SIMD alignment (using the most suitable // fixed peeling factor found during prior alignment analysis). DCHECK(vector_dynamic_peeling_candidate_ == nullptr); ptc = graph_->GetConstant(induc_type, vector_static_peeling_factor_); } else if (vector_dynamic_peeling_candidate_ != nullptr) { - DCHECK(!IsInPredicatedVectorizationMode()); // Dynamic loop peeling for SIMD alignment (using the most suitable // candidate found during prior alignment analysis): // rem = offset % ALIGN; // adjusted as #elements @@ -1197,7 +1429,6 @@ void HLoopOptimization::Vectorize(LoopNode* node, HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader); HInstruction* vtc = stc; if (needs_cleanup) { - DCHECK(!IsInPredicatedVectorizationMode()); DCHECK(IsPowerOfTwo(chunk)); HInstruction* diff = stc; if (ptc != nullptr) { @@ -1217,7 +1448,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, // Generate runtime disambiguation test: // vtc = a != b ? vtc : 0; - if (vector_runtime_test_a_ != nullptr) { + if (NeedsArrayRefsDisambiguationTest()) { HInstruction* rt = Insert( preheader, new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_)); @@ -1235,45 +1466,52 @@ void HLoopOptimization::Vectorize(LoopNode* node, // moved around during suspend checks, since all analysis was based on // nothing more than the Android runtime alignment conventions. if (ptc != nullptr) { - DCHECK(!IsInPredicatedVectorizationMode()); vector_mode_ = kSequential; - GenerateNewLoop(node, - block, - graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit), - vector_index_, - ptc, - graph_->GetConstant(induc_type, 1), - LoopAnalysisInfo::kNoUnrollingFactor); + HBasicBlock* preheader_for_peeling_loop = + graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit); + GenerateNewLoopScalarOrTraditional(node, + preheader_for_peeling_loop, + vector_index_, + ptc, + graph_->GetConstant(induc_type, 1), + LoopAnalysisInfo::kNoUnrollingFactor); } // Generate vector loop, possibly further unrolled: // for ( ; i < vtc; i += chunk) // <vectorized-loop-body> vector_mode_ = kVector; - GenerateNewLoop(node, - block, - graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit), - vector_index_, - vtc, - graph_->GetConstant(induc_type, vector_length_), // increment per unroll - unroll); - HLoopInformation* vloop = vector_header_->GetLoopInformation(); + HBasicBlock* preheader_for_vector_loop = + graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit); + GenerateNewLoopScalarOrTraditional(node, + preheader_for_vector_loop, + vector_index_, + vtc, + graph_->GetConstant(induc_type, vector_length_), // per unroll + unroll); // Generate cleanup loop, if needed: // for ( ; i < stc; i += 1) // <loop-body> if (needs_cleanup) { - DCHECK_IMPLIES(IsInPredicatedVectorizationMode(), vector_runtime_test_a_ != nullptr); vector_mode_ = kSequential; - GenerateNewLoop(node, - block, - graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit), - vector_index_, - stc, - graph_->GetConstant(induc_type, 1), - LoopAnalysisInfo::kNoUnrollingFactor); + HBasicBlock* preheader_for_cleanup_loop = + graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit); + GenerateNewLoopScalarOrTraditional(node, + preheader_for_cleanup_loop, + vector_index_, + stc, + graph_->GetConstant(induc_type, 1), + LoopAnalysisInfo::kNoUnrollingFactor); } + FinalizeVectorization(node); +} + +void HLoopOptimization::FinalizeVectorization(LoopNode* node) { + HBasicBlock* header = node->loop_info->GetHeader(); + HBasicBlock* preheader = node->loop_info->GetPreHeader(); + HLoopInformation* vloop = vector_header_->GetLoopInformation(); // Link reductions to their final uses. for (auto i = reductions_->begin(); i != reductions_->end(); ++i) { if (i->first->IsPhi()) { @@ -1287,9 +1525,17 @@ void HLoopOptimization::Vectorize(LoopNode* node, } } - // Remove the original loop by disconnecting the body block - // and removing all instructions from the header. - block->DisconnectAndDelete(); + // Remove the original loop. + for (HBlocksInLoopPostOrderIterator it_loop(*node->loop_info); + !it_loop.Done(); + it_loop.Advance()) { + HBasicBlock* cur_block = it_loop.Current(); + if (cur_block == node->loop_info->GetHeader()) { + continue; + } + cur_block->DisconnectAndDelete(); + } + while (!header->GetFirstInstruction()->IsGoto()) { header->RemoveInstruction(header->GetFirstInstruction()); } @@ -1301,14 +1547,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, node->loop_info = vloop; } -void HLoopOptimization::GenerateNewLoop(LoopNode* node, - HBasicBlock* block, - HBasicBlock* new_preheader, - HInstruction* lo, - HInstruction* hi, - HInstruction* step, - uint32_t unroll) { - DCHECK(unroll == 1 || vector_mode_ == kVector); +HPhi* HLoopOptimization::InitializeForNewLoop(HBasicBlock* new_preheader, HInstruction* lo) { DataType::Type induc_type = lo->GetType(); // Prepare new loop. vector_preheader_ = new_preheader, @@ -1318,68 +1557,160 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node, kNoRegNumber, 0, HPhi::ToPhiType(induc_type)); - // Generate header and prepare body. - // for (i = lo; i < hi; i += step) - // <loop-body> - HInstruction* cond = nullptr; - HInstruction* set_pred = nullptr; - if (IsInPredicatedVectorizationMode()) { - HVecPredWhile* pred_while = - new (global_allocator_) HVecPredWhile(global_allocator_, - phi, - hi, - HVecPredWhile::CondKind::kLO, - DataType::Type::kInt32, - vector_length_, - 0u); - - cond = new (global_allocator_) HVecPredCondition(global_allocator_, - pred_while, - HVecPredCondition::PCondKind::kNFirst, - DataType::Type::kInt32, - vector_length_, - 0u); - - vector_header_->AddPhi(phi); - vector_header_->AddInstruction(pred_while); - vector_header_->AddInstruction(cond); - set_pred = pred_while; - } else { - cond = new (global_allocator_) HAboveOrEqual(phi, hi); - vector_header_->AddPhi(phi); - vector_header_->AddInstruction(cond); - } + vector_header_->AddPhi(phi); + vector_index_ = phi; + vector_permanent_map_->clear(); + vector_external_set_->clear(); + predicate_info_map_->clear(); + + return phi; +} +void HLoopOptimization::GenerateNewLoopScalarOrTraditional(LoopNode* node, + HBasicBlock* new_preheader, + HInstruction* lo, + HInstruction* hi, + HInstruction* step, + uint32_t unroll) { + DCHECK(unroll == 1 || vector_mode_ == kVector); + DataType::Type induc_type = lo->GetType(); + HPhi* phi = InitializeForNewLoop(new_preheader, lo); + + // Generate loop exit check. + HInstruction* cond = new (global_allocator_) HAboveOrEqual(phi, hi); + vector_header_->AddInstruction(cond); vector_header_->AddInstruction(new (global_allocator_) HIf(cond)); - vector_index_ = phi; - vector_permanent_map_->clear(); // preserved over unrolling + for (uint32_t u = 0; u < unroll; u++) { - // Generate instruction map. - vector_map_->clear(); - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + GenerateNewLoopBodyOnce(node, induc_type, step); + } + + FinalizePhisForNewLoop(phi, lo); +} + +void HLoopOptimization::GenerateNewLoopPredicated(LoopNode* node, + HBasicBlock* new_preheader, + HInstruction* lo, + HInstruction* hi, + HInstruction* step) { + DCHECK(IsInPredicatedVectorizationMode()); + DCHECK_EQ(vector_mode_, kVector); + DataType::Type induc_type = lo->GetType(); + HPhi* phi = InitializeForNewLoop(new_preheader, lo); + + // Generate loop exit check. + HVecPredWhile* pred_while = + new (global_allocator_) HVecPredWhile(global_allocator_, + phi, + hi, + HVecPredWhile::CondKind::kLO, + DataType::Type::kInt32, + vector_length_, + 0u); + + HInstruction* cond = + new (global_allocator_) HVecPredToBoolean(global_allocator_, + pred_while, + HVecPredToBoolean::PCondKind::kNFirst, + DataType::Type::kInt32, + vector_length_, + 0u); + + vector_header_->AddInstruction(pred_while); + vector_header_->AddInstruction(cond); + vector_header_->AddInstruction(new (global_allocator_) HIf(cond)); + + PreparePredicateInfoMap(node); + GenerateNewLoopBodyOnce(node, induc_type, step); + InitPredicateInfoMap(node, pred_while); + + // Assign governing predicates for instructions in the loop; the traversal order doesn't matter. + for (HBlocksInLoopIterator block_it(*node->loop_info); + !block_it.Done(); + block_it.Advance()) { + HBasicBlock* cur_block = block_it.Current(); + + for (HInstructionIterator it(cur_block->GetInstructions()); !it.Done(); it.Advance()) { + auto i = vector_map_->find(it.Current()); + if (i != vector_map_->end()) { + HInstruction* instr = i->second; + + if (!instr->IsVecOperation()) { + continue; + } + // There are cases when a vector instruction, which corresponds to some instruction in the + // original scalar loop, is located not in the newly created vector loop but + // in the vector loop preheader (and hence recorded in vector_external_set_). + // + // Governing predicates will be set for such instructions separately. + bool in_vector_loop = vector_header_->GetLoopInformation()->Contains(*instr->GetBlock()); + DCHECK_IMPLIES(!in_vector_loop, + vector_external_set_->find(instr) != vector_external_set_->end()); + + if (in_vector_loop && + !instr->AsVecOperation()->IsPredicated()) { + HVecOperation* op = instr->AsVecOperation(); + HVecPredSetOperation* pred = predicate_info_map_->Get(cur_block)->GetControlPredicate(); + op->SetMergingGoverningPredicate(pred); + } + } + } + } + + FinalizePhisForNewLoop(phi, lo); +} + +void HLoopOptimization::GenerateNewLoopBodyOnce(LoopNode* node, + DataType::Type induc_type, + HInstruction* step) { + // Generate instruction map. + vector_map_->clear(); + HLoopInformation* loop_info = node->loop_info; + + // Traverse the data flow of the loop, in the original program order. + for (HBlocksInLoopReversePostOrderIterator block_it(*loop_info); + !block_it.Done(); + block_it.Advance()) { + HBasicBlock* cur_block = block_it.Current(); + + if (cur_block == loop_info->GetHeader()) { + continue; + } + + for (HInstructionIterator it(cur_block->GetInstructions()); !it.Done(); it.Advance()) { bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true); DCHECK(vectorized_def); } - // Generate body from the instruction map, but in original program order. - HEnvironment* env = vector_header_->GetFirstInstruction()->GetEnvironment(); - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + } + + // Generate body from the instruction map, in the original program order. + HEnvironment* env = vector_header_->GetFirstInstruction()->GetEnvironment(); + for (HBlocksInLoopReversePostOrderIterator block_it(*loop_info); + !block_it.Done(); + block_it.Advance()) { + HBasicBlock* cur_block = block_it.Current(); + + if (cur_block == loop_info->GetHeader()) { + continue; + } + + for (HInstructionIterator it(cur_block->GetInstructions()); !it.Done(); it.Advance()) { auto i = vector_map_->find(it.Current()); if (i != vector_map_->end() && !i->second->IsInBlock()) { Insert(vector_body_, i->second); - if (IsInPredicatedVectorizationMode() && i->second->IsVecOperation()) { - HVecOperation* op = i->second->AsVecOperation(); - op->SetMergingGoverningPredicate(set_pred); - } // Deal with instructions that need an environment, such as the scalar intrinsics. if (i->second->NeedsEnvironment()) { i->second->CopyEnvironmentFromWithLoopPhiAdjustment(env, vector_header_); } } } - // Generate the induction. - vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step); - Insert(vector_body_, vector_index_); } + // Generate the induction. + vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step); + Insert(vector_body_, vector_index_); +} + +void HLoopOptimization::FinalizePhisForNewLoop(HPhi* phi, HInstruction* lo) { // Finalize phi inputs for the reductions (if any). for (auto i = reductions_->begin(); i != reductions_->end(); ++i) { if (!i->first->IsPhi()) { @@ -1442,10 +1773,13 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, VectorizeDotProdIdiom(node, instruction, generate_code, type, restrictions) || (TrySetVectorType(type, &restrictions) && VectorizeUse(node, instruction, generate_code, type, restrictions))) { + DCHECK(!instruction->IsPhi()); if (generate_code) { - HInstruction* new_red = vector_map_->Get(instruction); - vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second)); - vector_permanent_map_->Overwrite(redit->second, new_red); + HInstruction* new_red_vec_op = vector_map_->Get(instruction); + HInstruction* original_phi = redit->second; + DCHECK(original_phi->IsPhi()); + vector_permanent_map_->Put(new_red_vec_op, vector_map_->Get(original_phi)); + vector_permanent_map_->Overwrite(original_phi, new_red_vec_op); } return true; } @@ -1455,6 +1789,10 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, if (instruction->IsGoto()) { return true; } + + if (instruction->IsIf()) { + return VectorizeIfCondition(node, instruction, generate_code, restrictions); + } // Otherwise accept only expressions with no effects outside the immediate loop-body. // Note that actual uses are inspected during right-hand-side tree traversal. return !IsUsedOutsideLoop(node->loop_info, instruction) @@ -1485,9 +1823,7 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, // Deal with vector restrictions. bool is_string_char_at = instruction->AsArrayGet()->IsStringCharAt(); - if (is_string_char_at && (HasVectorRestrictions(restrictions, kNoStringCharAt) || - IsInPredicatedVectorizationMode())) { - // TODO: Support CharAt for predicated mode. + if (is_string_char_at && (HasVectorRestrictions(restrictions, kNoStringCharAt))) { return false; } // Accept a right-hand-side array base[index] for @@ -1676,6 +2012,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case InstructionSet::kThumb2: // Allow vectorization for all ARM devices, because Android assumes that // ARM 32-bit always supports advanced SIMD (64-bit SIMD). + *restrictions |= kNoIfCond; switch (type) { case DataType::Type::kBool: case DataType::Type::kUint8: @@ -1701,6 +2038,13 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict DCHECK_EQ(simd_register_size_ % DataType::Size(type), 0u); switch (type) { case DataType::Type::kBool: + *restrictions |= kNoDiv | + kNoSignedHAdd | + kNoUnsignedHAdd | + kNoUnroundedHAdd | + kNoSAD | + kNoIfCond; + return TrySetVectorLength(type, vector_length); case DataType::Type::kUint8: case DataType::Type::kInt8: *restrictions |= kNoDiv | @@ -1712,6 +2056,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kUint16: case DataType::Type::kInt16: *restrictions |= kNoDiv | + kNoStringCharAt | // TODO: support in predicated mode. kNoSignedHAdd | kNoUnsignedHAdd | kNoUnroundedHAdd | @@ -1722,13 +2067,13 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict *restrictions |= kNoDiv | kNoSAD; return TrySetVectorLength(type, vector_length); case DataType::Type::kInt64: - *restrictions |= kNoDiv | kNoSAD; + *restrictions |= kNoDiv | kNoSAD | kNoIfCond; return TrySetVectorLength(type, vector_length); case DataType::Type::kFloat32: - *restrictions |= kNoReduction; + *restrictions |= kNoReduction | kNoIfCond; return TrySetVectorLength(type, vector_length); case DataType::Type::kFloat64: - *restrictions |= kNoReduction; + *restrictions |= kNoReduction | kNoIfCond; return TrySetVectorLength(type, vector_length); default: break; @@ -1737,6 +2082,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict } else { // Allow vectorization for all ARM devices, because Android assumes that // ARMv8 AArch64 always supports advanced SIMD (128-bit SIMD). + *restrictions |= kNoIfCond; switch (type) { case DataType::Type::kBool: case DataType::Type::kUint8: @@ -1767,6 +2113,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case InstructionSet::kX86: case InstructionSet::kX86_64: // Allow vectorization for SSE4.1-enabled X86 devices only (128-bit SIMD). + *restrictions |= kNoIfCond; if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) { switch (type) { case DataType::Type::kBool: @@ -1855,15 +2202,7 @@ void HLoopOptimization::GenerateVecInv(HInstruction* org, DataType::Type type) { vector = new (global_allocator_) HVecReplicateScalar(global_allocator_, input, type, vector_length_, kNoDexPc); vector_permanent_map_->Put(org, Insert(vector_preheader_, vector)); - if (IsInPredicatedVectorizationMode()) { - HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_, - graph_->GetIntConstant(1), - type, - vector_length_, - 0u); - vector_preheader_->InsertInstructionBefore(set_pred, vector); - vector->AsVecOperation()->SetMergingGoverningPredicate(set_pred); - } + vector_external_set_->insert(vector); } vector_map_->Put(org, vector); } @@ -1936,18 +2275,18 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org, vector_map_->Put(org, vector); } -void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) { - DCHECK(reductions_->find(phi) != reductions_->end()); - DCHECK(reductions_->Get(phi->InputAt(1)) == phi); +void HLoopOptimization::GenerateVecReductionPhi(HPhi* orig_phi) { + DCHECK(reductions_->find(orig_phi) != reductions_->end()); + DCHECK(reductions_->Get(orig_phi->InputAt(1)) == orig_phi); HInstruction* vector = nullptr; if (vector_mode_ == kSequential) { HPhi* new_phi = new (global_allocator_) HPhi( - global_allocator_, kNoRegNumber, 0, phi->GetType()); + global_allocator_, kNoRegNumber, 0, orig_phi->GetType()); vector_header_->AddPhi(new_phi); vector = new_phi; } else { // Link vector reduction back to prior unrolled update, or a first phi. - auto it = vector_permanent_map_->find(phi); + auto it = vector_permanent_map_->find(orig_phi); if (it != vector_permanent_map_->end()) { vector = it->second; } else { @@ -1957,7 +2296,7 @@ void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) { vector = new_phi; } } - vector_map_->Put(phi, vector); + vector_map_->Put(orig_phi, vector); } void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction) { @@ -1992,15 +2331,7 @@ void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* r vector_length, kNoDexPc)); } - if (IsInPredicatedVectorizationMode()) { - HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_, - graph_->GetIntConstant(1), - type, - vector_length, - 0u); - vector_preheader_->InsertInstructionBefore(set_pred, new_init); - new_init->AsVecOperation()->SetMergingGoverningPredicate(set_pred); - } + vector_external_set_->insert(new_init); } else { new_init = ReduceAndExtractIfNeeded(new_init); } @@ -2026,23 +2357,15 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct // x = REDUCE( [x_1, .., x_n] ) // y = x_1 // along the exit of the defining loop. - HInstruction* reduce = new (global_allocator_) HVecReduce( + HVecReduce* reduce = new (global_allocator_) HVecReduce( global_allocator_, instruction, type, vector_length, kind, kNoDexPc); exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction()); + vector_external_set_->insert(reduce); instruction = new (global_allocator_) HVecExtractScalar( global_allocator_, reduce, type, vector_length, 0, kNoDexPc); exit->InsertInstructionAfter(instruction, reduce); - if (IsInPredicatedVectorizationMode()) { - HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_, - graph_->GetIntConstant(1), - type, - vector_length, - 0u); - exit->InsertInstructionBefore(set_pred, reduce); - reduce->AsVecOperation()->SetMergingGoverningPredicate(set_pred); - instruction->AsVecOperation()->SetMergingGoverningPredicate(set_pred); - } + vector_external_set_->insert(instruction); } } return instruction; @@ -2057,10 +2380,10 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct } \ break; -void HLoopOptimization::GenerateVecOp(HInstruction* org, - HInstruction* opa, - HInstruction* opb, - DataType::Type type) { +HInstruction* HLoopOptimization::GenerateVecOp(HInstruction* org, + HInstruction* opa, + HInstruction* opb, + DataType::Type type) { uint32_t dex_pc = org->GetDexPc(); HInstruction* vector = nullptr; DataType::Type org_type = org->GetType(); @@ -2130,11 +2453,23 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, GENERATE_VEC( new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc), new (global_allocator_) HAbs(org_type, opa, dex_pc)); + case HInstruction::kEqual: { + // Special case. + if (vector_mode_ == kVector) { + vector = new (global_allocator_) HVecCondition( + global_allocator_, opa, opb, type, vector_length_, dex_pc); + } else { + DCHECK(vector_mode_ == kSequential); + UNREACHABLE(); + } + } + break; default: break; } // switch CHECK(vector != nullptr) << "Unsupported SIMD operator"; vector_map_->Put(org, vector); + return vector; } #undef GENERATE_VEC @@ -2374,6 +2709,89 @@ bool HLoopOptimization::VectorizeDotProdIdiom(LoopNode* node, return false; } +bool HLoopOptimization::VectorizeIfCondition(LoopNode* node, + HInstruction* hif, + bool generate_code, + uint64_t restrictions) { + DCHECK(hif->IsIf()); + HInstruction* if_input = hif->InputAt(0); + + if (!if_input->HasOnlyOneNonEnvironmentUse()) { + // Avoid the complications of the condition used as materialized boolean. + return false; + } + + if (!if_input->IsEqual()) { + // TODO: Support other condition types. + return false; + } + + HCondition* cond = if_input->AsCondition(); + HInstruction* opa = cond->InputAt(0); + HInstruction* opb = cond->InputAt(1); + DataType::Type type = GetNarrowerType(opa, opb); + + if (!DataType::IsIntegralType(type)) { + return false; + } + + bool is_unsigned = false; + HInstruction* opa_promoted = opa; + HInstruction* opb_promoted = opb; + bool is_int_case = DataType::Type::kInt32 == opa->GetType() && + DataType::Type::kInt32 == opb->GetType(); + + // Condition arguments should be either both int32 or consistently extended signed/unsigned + // narrower operands. + if (!is_int_case && + !IsNarrowerOperands(opa, opb, type, &opa_promoted, &opb_promoted, &is_unsigned)) { + return false; + } + type = HVecOperation::ToProperType(type, is_unsigned); + + // For narrow types, explicit type conversion may have been + // optimized way, so set the no hi bits restriction here. + if (DataType::Size(type) <= 2) { + restrictions |= kNoHiBits; + } + + if (!TrySetVectorType(type, &restrictions) || + HasVectorRestrictions(restrictions, kNoIfCond)) { + return false; + } + + if (generate_code && vector_mode_ != kVector) { // de-idiom + opa_promoted = opa; + opb_promoted = opb; + } + + if (VectorizeUse(node, opa_promoted, generate_code, type, restrictions) && + VectorizeUse(node, opb_promoted, generate_code, type, restrictions)) { + if (generate_code) { + HInstruction* vec_cond = GenerateVecOp(cond, + vector_map_->Get(opa_promoted), + vector_map_->Get(opb_promoted), + type); + + if (vector_mode_ == kVector) { + HInstruction* vec_pred_not = new (global_allocator_) HVecPredNot( + global_allocator_, vec_cond, type, vector_length_, hif->GetDexPc()); + + vector_map_->Put(hif, vec_pred_not); + BlockPredicateInfo* pred_info = predicate_info_map_->Get(hif->GetBlock()); + pred_info->SetControlFlowInfo(vec_cond->AsVecPredSetOperation(), + vec_pred_not->AsVecPredSetOperation()); + } else { + DCHECK(vector_mode_ == kSequential); + UNREACHABLE(); + } + } + return true; + } + + return false; +} + // // Vectorization heuristics. // @@ -2423,6 +2841,8 @@ bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) { // TODO: trip count is really unsigned entity, provided the guarding test // is satisfied; deal with this more carefully later uint32_t max_peel = MaxNumberPeeled(); + // Peeling is not supported in predicated mode. + DCHECK_IMPLIES(IsInPredicatedVectorizationMode(), max_peel == 0u); if (vector_length_ == 0) { return false; // nothing found } else if (trip_count < 0) { @@ -2686,4 +3106,67 @@ bool HLoopOptimization::CanRemoveCycle() { return true; } +void HLoopOptimization::PreparePredicateInfoMap(LoopNode* node) { + HLoopInformation* loop_info = node->loop_info; + + DCHECK(IsPredicatedLoopControlFlowSupported(loop_info)); + + for (HBlocksInLoopIterator block_it(*loop_info); + !block_it.Done(); + block_it.Advance()) { + HBasicBlock* cur_block = block_it.Current(); + BlockPredicateInfo* pred_info = new (loop_allocator_) BlockPredicateInfo(); + + predicate_info_map_->Put(cur_block, pred_info); + } +} + +void HLoopOptimization::InitPredicateInfoMap(LoopNode* node, + HVecPredSetOperation* loop_main_pred) { + HLoopInformation* loop_info = node->loop_info; + HBasicBlock* header = loop_info->GetHeader(); + BlockPredicateInfo* header_info = predicate_info_map_->Get(header); + // Loop header is a special case; it doesn't have a false predicate because we + // would just exit the loop then. + header_info->SetControlFlowInfo(loop_main_pred, loop_main_pred); + + size_t blocks_in_loop = header->GetLoopInformation()->GetBlocks().NumSetBits(); + if (blocks_in_loop == 2) { + for (HBasicBlock* successor : header->GetSuccessors()) { + if (loop_info->Contains(*successor)) { + // This is loop second block - body. + BlockPredicateInfo* body_info = predicate_info_map_->Get(successor); + body_info->SetControlPredicate(loop_main_pred); + return; + } + } + UNREACHABLE(); + } + + // TODO: support predicated vectorization of CF loop of more complex structure. + DCHECK(HasLoopDiamondStructure(loop_info)); + HBasicBlock* header_succ_0 = header->GetSuccessors()[0]; + HBasicBlock* header_succ_1 = header->GetSuccessors()[1]; + HBasicBlock* diamond_top = loop_info->Contains(*header_succ_0) ? + header_succ_0 : + header_succ_1; + + HIf* diamond_hif = diamond_top->GetLastInstruction()->AsIf(); + HBasicBlock* diamond_true = diamond_hif->IfTrueSuccessor(); + HBasicBlock* diamond_false = diamond_hif->IfFalseSuccessor(); + HBasicBlock* back_edge = diamond_true->GetSingleSuccessor(); + + BlockPredicateInfo* diamond_top_info = predicate_info_map_->Get(diamond_top); + BlockPredicateInfo* diamond_true_info = predicate_info_map_->Get(diamond_true); + BlockPredicateInfo* diamond_false_info = predicate_info_map_->Get(diamond_false); + BlockPredicateInfo* back_edge_info = predicate_info_map_->Get(back_edge); + + diamond_top_info->SetControlPredicate(header_info->GetTruePredicate()); + + diamond_true_info->SetControlPredicate(diamond_top_info->GetTruePredicate()); + diamond_false_info->SetControlPredicate(diamond_top_info->GetFalsePredicate()); + + back_edge_info->SetControlPredicate(header_info->GetTruePredicate()); +} + } // namespace art diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 6dd778ba74..86a9f0fcb8 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -101,6 +101,7 @@ class HLoopOptimization : public HOptimization { kNoSAD = 1 << 11, // no sum of absolute differences (SAD) kNoWideSAD = 1 << 12, // no sum of absolute differences (SAD) with operand widening kNoDotProd = 1 << 13, // no dot product + kNoIfCond = 1 << 14, // no if condition conversion }; /* @@ -136,6 +137,95 @@ class HLoopOptimization : public HOptimization { bool is_string_char_at; // compressed string read }; + // This structure describes the control flow (CF) -> data flow (DF) conversion of the loop + // with control flow (see below) for the purpose of predicated autovectorization. + // + // Lets define "loops without control-flow" (or non-CF loops) as loops with two consecutive + // blocks and without the branching structure except for the loop exit. And + // "loop with control-flow" (or CF-loops) - all other loops. + // + // In the execution of the original CF-loop on each iteration some basic block Y will be + // either executed or not executed, depending on the control flow of the loop. More + // specifically, a block will be executed if all the conditional branches of the nodes in + // the control dependency graph for that block Y are taken according to the path from the loop + // header to that basic block. + // + // This is the key idea of CF->DF conversion: a boolean value + // 'ctrl_pred == cond1 && cond2 && ...' will determine whether the basic block Y will be + // executed, where cond_K is whether the branch of the node K in the control dependency + // graph upward traversal was taken in the 'right' direction. + // + // Def.: BB Y is control dependent on BB X iff + // (1) there exists a directed path P from X to Y with any basic block Z in P (excluding X + // and Y) post-dominated by Y and + // (2) X is not post-dominated by Y. + // ... + // X + // false / \ true + // / \ + // ... + // | + // Y + // ... + // + // When doing predicated autovectorization of a CF loop, we use the CF->DF conversion approach: + // 1) do the data analysis and vector operation creation as if it was a non-CF loop. + // 2) for each HIf block create two vector predicate setting instructions - for True and False + // edges/paths. + // 3) assign a governing vector predicate (see comments near HVecPredSetOperation) + // to each vector operation Alpha in the loop (including to those vector predicate setting + // instructions created in #2); do this by: + // - finding the immediate control dependent block of the instruction Alpha's block. + // - choosing the True or False predicate setting instruction (created in #2) depending + // on the path to the instruction. + // + // For more information check the papers: + // + // - Allen, John R and Kennedy, Ken and Porterfield, Carrie and Warren, Joe, + // “Conversion of Control Dependence to Data Dependence,” in Proceedings of the 10th ACM + // SIGACT-SIGPLAN Symposium on Principles of Programming Languages, 1983, pp. 177–189. + // - JEANNE FERRANTE, KARL J. OTTENSTEIN, JOE D. WARREN, + // "The Program Dependence Graph and Its Use in Optimization" + // + class BlockPredicateInfo : public ArenaObject<kArenaAllocLoopOptimization> { + public: + BlockPredicateInfo() : + control_predicate_(nullptr), + true_predicate_(nullptr), + false_predicate_(nullptr) {} + + void SetControlFlowInfo(HVecPredSetOperation* true_predicate, + HVecPredSetOperation* false_predicate) { + DCHECK(!HasControlFlowOps()); + true_predicate_ = true_predicate; + false_predicate_ = false_predicate; + } + + bool HasControlFlowOps() const { + // Note: a block must have both T/F predicates set or none of them. + DCHECK_EQ(true_predicate_ == nullptr, false_predicate_ == nullptr); + return true_predicate_ != nullptr; + } + + HVecPredSetOperation* GetControlPredicate() const { return control_predicate_; } + void SetControlPredicate(HVecPredSetOperation* control_predicate) { + control_predicate_ = control_predicate; + } + + HVecPredSetOperation* GetTruePredicate() const { return true_predicate_; } + HVecPredSetOperation* GetFalsePredicate() const { return false_predicate_; } + + private: + // Vector control predicate operation, associated with the block which will determine + // the active lanes for all vector operations, originated from this block. + HVecPredSetOperation* control_predicate_; + + // Vector predicate instruction, associated with the true sucessor of the block. + HVecPredSetOperation* true_predicate_; + // Vector predicate instruction, associated with the false sucessor of the block. + HVecPredSetOperation* false_predicate_; + }; + // // Loop setup and traversal. // @@ -203,15 +293,95 @@ class HLoopOptimization : public HOptimization { // Vectorization analysis and synthesis. // - bool ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count); - void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count); - void GenerateNewLoop(LoopNode* node, - HBasicBlock* block, - HBasicBlock* new_preheader, - HInstruction* lo, - HInstruction* hi, - HInstruction* step, - uint32_t unroll); + // Returns whether the data flow requirements are met for vectorization. + // + // - checks whether instructions are vectorizable for the target. + // - conducts data dependence analysis for array references. + // - additionally, collects info on peeling and aligment strategy. + bool CanVectorizeDataFlow(LoopNode* node, HBasicBlock* header, bool collect_alignment_info); + + // Does the checks (common for predicated and traditional mode) for the loop. + bool ShouldVectorizeCommon(LoopNode* node, HPhi* main_phi, int64_t trip_count); + + // Try to vectorize the loop, returns whether it was successful. + // + // There are two versions/algorithms: + // - Predicated: all the vector operations have governing predicates which control + // which individual vector lanes will be active (see HVecPredSetOperation for more details). + // Example: vectorization using AArch64 SVE. + // - Traditional: a regular mode in which all vector operations lanes are unconditionally + // active. + // Example: vectoriation using AArch64 NEON. + bool TryVectorizePredicated(LoopNode* node, + HBasicBlock* body, + HBasicBlock* exit, + HPhi* main_phi, + int64_t trip_count); + + bool TryVectorizedTraditional(LoopNode* node, + HBasicBlock* body, + HBasicBlock* exit, + HPhi* main_phi, + int64_t trip_count); + + // Vectorizes the loop for which all checks have been already done. + void VectorizePredicated(LoopNode* node, + HBasicBlock* block, + HBasicBlock* exit); + void VectorizeTraditional(LoopNode* node, + HBasicBlock* block, + HBasicBlock* exit, + int64_t trip_count); + + // Performs final steps for whole vectorization process: links reduction, removes the original + // scalar loop, updates loop info. + void FinalizeVectorization(LoopNode* node); + + // Helpers that do the vector instruction synthesis for the previously created loop; create + // and fill the loop body with instructions. + // + // A version to generate a vector loop in predicated mode. + void GenerateNewLoopPredicated(LoopNode* node, + HBasicBlock* new_preheader, + HInstruction* lo, + HInstruction* hi, + HInstruction* step); + + // A version to generate a vector loop in traditional mode or to generate + // a scalar loop for both modes. + void GenerateNewLoopScalarOrTraditional(LoopNode* node, + HBasicBlock* new_preheader, + HInstruction* lo, + HInstruction* hi, + HInstruction* step, + uint32_t unroll); + + // + // Helpers for GenerateNewLoop*. + // + + // Updates vectorization bookkeeping date for the new loop, creates and returns + // its main induction Phi. + HPhi* InitializeForNewLoop(HBasicBlock* new_preheader, HInstruction* lo); + + // Finalizes reduction and induction phis' inputs for the newly created loop. + void FinalizePhisForNewLoop(HPhi* phi, HInstruction* lo); + + // Creates empty predicate info object for each basic block and puts it into the map. + void PreparePredicateInfoMap(LoopNode* node); + + // Set up block true/false predicates using info, collected through data flow and control + // dependency analysis. + void InitPredicateInfoMap(LoopNode* node, HVecPredSetOperation* loop_main_pred); + + // Performs instruction synthesis for the loop body. + void GenerateNewLoopBodyOnce(LoopNode* node, + DataType::Type induc_type, + HInstruction* step); + + // Returns whether the vector loop needs runtime disambiguation test for array refs. + bool NeedsArrayRefsDisambiguationTest() const { return vector_runtime_test_a_ != nullptr; } + bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code); bool VectorizeUse(LoopNode* node, HInstruction* instruction, @@ -239,10 +409,10 @@ class HLoopOptimization : public HOptimization { void GenerateVecReductionPhi(HPhi* phi); void GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction); HInstruction* ReduceAndExtractIfNeeded(HInstruction* instruction); - void GenerateVecOp(HInstruction* org, - HInstruction* opa, - HInstruction* opb, - DataType::Type type); + HInstruction* GenerateVecOp(HInstruction* org, + HInstruction* opa, + HInstruction* opb, + DataType::Type type); // Vectorization idioms. bool VectorizeSaturationIdiom(LoopNode* node, @@ -265,6 +435,10 @@ class HLoopOptimization : public HOptimization { bool generate_code, DataType::Type type, uint64_t restrictions); + bool VectorizeIfCondition(LoopNode* node, + HInstruction* instruction, + bool generate_code, + uint64_t restrictions); // Vectorization heuristics. Alignment ComputeAlignment(HInstruction* offset, @@ -369,6 +543,16 @@ class HLoopOptimization : public HOptimization { // Contents reside in phase-local heap memory. ScopedArenaSafeMap<HInstruction*, HInstruction*>* vector_permanent_map_; + // Tracks vector operations that are inserted outside of the loop (preheader, exit) + // as part of vectorization (e.g. replicate scalar for loop invariants and reduce ops + // for loop reductions). + ScopedArenaSet<HInstruction*>* vector_external_set_; + + // A mapping between a basic block of the original loop and its associated PredicateInfo. + // + // Only used in predicated loop vectorization mode. + ScopedArenaSafeMap<HBasicBlock*, BlockPredicateInfo*>* predicate_info_map_; + // Temporary vectorization bookkeeping. VectorMode vector_mode_; // synthesis mode HBasicBlock* vector_preheader_; // preheader of the new loop diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc index 7f694fb655..49e3c0418f 100644 --- a/compiler/optimizing/loop_optimization_test.cc +++ b/compiler/optimizing/loop_optimization_test.cc @@ -30,6 +30,7 @@ namespace art HIDDEN { class LoopOptimizationTest : public OptimizingUnitTest { protected: void SetUp() override { + TEST_SETUP_DISABLED_FOR_RISCV64(); OptimizingUnitTest::SetUp(); graph_ = CreateGraph(); @@ -44,6 +45,7 @@ class LoopOptimizationTest : public OptimizingUnitTest { } void TearDown() override { + TEST_TEARDOWN_DISABLED_FOR_RISCV64(); codegen_.reset(); compiler_options_.reset(); graph_ = nullptr; @@ -134,17 +136,20 @@ class LoopOptimizationTest : public OptimizingUnitTest { // TEST_F(LoopOptimizationTest, NoLoops) { + TEST_DISABLED_FOR_RISCV64(); PerformAnalysis(); EXPECT_EQ("", LoopStructure()); } TEST_F(LoopOptimizationTest, SingleLoop) { + TEST_DISABLED_FOR_RISCV64(); AddLoop(entry_block_, return_block_); PerformAnalysis(); EXPECT_EQ("[]", LoopStructure()); } TEST_F(LoopOptimizationTest, LoopNest10) { + TEST_DISABLED_FOR_RISCV64(); HBasicBlock* b = entry_block_; HBasicBlock* s = return_block_; for (int i = 0; i < 10; i++) { @@ -156,6 +161,7 @@ TEST_F(LoopOptimizationTest, LoopNest10) { } TEST_F(LoopOptimizationTest, LoopSequence10) { + TEST_DISABLED_FOR_RISCV64(); HBasicBlock* b = entry_block_; HBasicBlock* s = return_block_; for (int i = 0; i < 10; i++) { @@ -167,6 +173,7 @@ TEST_F(LoopOptimizationTest, LoopSequence10) { } TEST_F(LoopOptimizationTest, LoopSequenceOfNests) { + TEST_DISABLED_FOR_RISCV64(); HBasicBlock* b = entry_block_; HBasicBlock* s = return_block_; for (int i = 0; i < 10; i++) { @@ -194,6 +201,7 @@ TEST_F(LoopOptimizationTest, LoopSequenceOfNests) { } TEST_F(LoopOptimizationTest, LoopNestWithSequence) { + TEST_DISABLED_FOR_RISCV64(); HBasicBlock* b = entry_block_; HBasicBlock* s = return_block_; for (int i = 0; i < 10; i++) { @@ -215,6 +223,7 @@ TEST_F(LoopOptimizationTest, LoopNestWithSequence) { // // This is a test for nodes.cc functionality - HGraph::SimplifyLoop. TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) { + TEST_DISABLED_FOR_RISCV64(); // Can't use AddLoop as we want special order for blocks predecessors. HBasicBlock* header = new (GetAllocator()) HBasicBlock(graph_); HBasicBlock* body = new (GetAllocator()) HBasicBlock(graph_); @@ -260,6 +269,7 @@ TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) { // // This is a test for nodes.cc functionality - HGraph::SimplifyLoop. TEST_F(LoopOptimizationTest, SimplifyLoopSinglePreheader) { + TEST_DISABLED_FOR_RISCV64(); HBasicBlock* header = AddLoop(entry_block_, return_block_); header->InsertInstructionBefore( diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 3790058879..2cfe5b3ae2 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -35,7 +35,9 @@ #include "class_root-inl.h" #include "code_generator.h" #include "common_dominator.h" +#include "intrinsic_objects.h" #include "intrinsics.h" +#include "intrinsics_list.h" #include "mirror/class-inl.h" #include "scoped_thread_state_change-inl.h" #include "ssa_builder.h" @@ -254,6 +256,14 @@ GraphAnalysisResult HGraph::BuildDominatorTree() { return kAnalysisSuccess; } +GraphAnalysisResult HGraph::RecomputeDominatorTree() { + DCHECK(!HasIrreducibleLoops()) << "Recomputing loop information in graphs with irreducible loops " + << "is unsupported, as it could lead to loop header changes"; + ClearLoopInformation(); + ClearDominanceInformation(); + return BuildDominatorTree(); +} + void HGraph::ClearDominanceInformation() { for (HBasicBlock* block : GetActiveBlocks()) { block->ClearDominanceInformation(); @@ -1488,12 +1498,12 @@ bool HInstructionList::FoundBefore(const HInstruction* instruction1, const HInstruction* instruction2) const { DCHECK_EQ(instruction1->GetBlock(), instruction2->GetBlock()); for (HInstructionIterator it(*this); !it.Done(); it.Advance()) { - if (it.Current() == instruction1) { - return true; - } if (it.Current() == instruction2) { return false; } + if (it.Current() == instruction1) { + return true; + } } LOG(FATAL) << "Did not find an order between two instructions of the same block."; UNREACHABLE(); @@ -1815,10 +1825,12 @@ void HGraphVisitor::VisitBasicBlock(HBasicBlock* block) { } } -HConstant* HTypeConversion::TryStaticEvaluation() const { - HGraph* graph = GetBlock()->GetGraph(); - if (GetInput()->IsIntConstant()) { - int32_t value = GetInput()->AsIntConstant()->GetValue(); +HConstant* HTypeConversion::TryStaticEvaluation() const { return TryStaticEvaluation(GetInput()); } + +HConstant* HTypeConversion::TryStaticEvaluation(HInstruction* input) const { + HGraph* graph = input->GetBlock()->GetGraph(); + if (input->IsIntConstant()) { + int32_t value = input->AsIntConstant()->GetValue(); switch (GetResultType()) { case DataType::Type::kInt8: return graph->GetIntConstant(static_cast<int8_t>(value), GetDexPc()); @@ -1837,8 +1849,8 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { default: return nullptr; } - } else if (GetInput()->IsLongConstant()) { - int64_t value = GetInput()->AsLongConstant()->GetValue(); + } else if (input->IsLongConstant()) { + int64_t value = input->AsLongConstant()->GetValue(); switch (GetResultType()) { case DataType::Type::kInt8: return graph->GetIntConstant(static_cast<int8_t>(value), GetDexPc()); @@ -1857,8 +1869,8 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { default: return nullptr; } - } else if (GetInput()->IsFloatConstant()) { - float value = GetInput()->AsFloatConstant()->GetValue(); + } else if (input->IsFloatConstant()) { + float value = input->AsFloatConstant()->GetValue(); switch (GetResultType()) { case DataType::Type::kInt32: if (std::isnan(value)) @@ -1881,8 +1893,8 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { default: return nullptr; } - } else if (GetInput()->IsDoubleConstant()) { - double value = GetInput()->AsDoubleConstant()->GetValue(); + } else if (input->IsDoubleConstant()) { + double value = input->AsDoubleConstant()->GetValue(); switch (GetResultType()) { case DataType::Type::kInt32: if (std::isnan(value)) @@ -1909,41 +1921,47 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { return nullptr; } -HConstant* HUnaryOperation::TryStaticEvaluation() const { - if (GetInput()->IsIntConstant()) { - return Evaluate(GetInput()->AsIntConstant()); - } else if (GetInput()->IsLongConstant()) { - return Evaluate(GetInput()->AsLongConstant()); +HConstant* HUnaryOperation::TryStaticEvaluation() const { return TryStaticEvaluation(GetInput()); } + +HConstant* HUnaryOperation::TryStaticEvaluation(HInstruction* input) const { + if (input->IsIntConstant()) { + return Evaluate(input->AsIntConstant()); + } else if (input->IsLongConstant()) { + return Evaluate(input->AsLongConstant()); } else if (kEnableFloatingPointStaticEvaluation) { - if (GetInput()->IsFloatConstant()) { - return Evaluate(GetInput()->AsFloatConstant()); - } else if (GetInput()->IsDoubleConstant()) { - return Evaluate(GetInput()->AsDoubleConstant()); + if (input->IsFloatConstant()) { + return Evaluate(input->AsFloatConstant()); + } else if (input->IsDoubleConstant()) { + return Evaluate(input->AsDoubleConstant()); } } return nullptr; } HConstant* HBinaryOperation::TryStaticEvaluation() const { - if (GetLeft()->IsIntConstant() && GetRight()->IsIntConstant()) { - return Evaluate(GetLeft()->AsIntConstant(), GetRight()->AsIntConstant()); - } else if (GetLeft()->IsLongConstant()) { - if (GetRight()->IsIntConstant()) { + return TryStaticEvaluation(GetLeft(), GetRight()); +} + +HConstant* HBinaryOperation::TryStaticEvaluation(HInstruction* left, HInstruction* right) const { + if (left->IsIntConstant() && right->IsIntConstant()) { + return Evaluate(left->AsIntConstant(), right->AsIntConstant()); + } else if (left->IsLongConstant()) { + if (right->IsIntConstant()) { // The binop(long, int) case is only valid for shifts and rotations. DCHECK(IsShl() || IsShr() || IsUShr() || IsRor()) << DebugName(); - return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsIntConstant()); - } else if (GetRight()->IsLongConstant()) { - return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsLongConstant()); + return Evaluate(left->AsLongConstant(), right->AsIntConstant()); + } else if (right->IsLongConstant()) { + return Evaluate(left->AsLongConstant(), right->AsLongConstant()); } - } else if (GetLeft()->IsNullConstant() && GetRight()->IsNullConstant()) { + } else if (left->IsNullConstant() && right->IsNullConstant()) { // The binop(null, null) case is only valid for equal and not-equal conditions. DCHECK(IsEqual() || IsNotEqual()) << DebugName(); - return Evaluate(GetLeft()->AsNullConstant(), GetRight()->AsNullConstant()); + return Evaluate(left->AsNullConstant(), right->AsNullConstant()); } else if (kEnableFloatingPointStaticEvaluation) { - if (GetLeft()->IsFloatConstant() && GetRight()->IsFloatConstant()) { - return Evaluate(GetLeft()->AsFloatConstant(), GetRight()->AsFloatConstant()); - } else if (GetLeft()->IsDoubleConstant() && GetRight()->IsDoubleConstant()) { - return Evaluate(GetLeft()->AsDoubleConstant(), GetRight()->AsDoubleConstant()); + if (left->IsFloatConstant() && right->IsFloatConstant()) { + return Evaluate(left->AsFloatConstant(), right->AsFloatConstant()); + } else if (left->IsDoubleConstant() && right->IsDoubleConstant()) { + return Evaluate(left->AsDoubleConstant(), right->AsDoubleConstant()); } } return nullptr; @@ -2797,8 +2815,11 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { if (HasMonitorOperations()) { outer_graph->SetHasMonitorOperations(true); } - if (HasSIMD()) { - outer_graph->SetHasSIMD(true); + if (HasTraditionalSIMD()) { + outer_graph->SetHasTraditionalSIMD(true); + } + if (HasPredicatedSIMD()) { + outer_graph->SetHasPredicatedSIMD(true); } if (HasAlwaysThrowingInvokes()) { outer_graph->SetHasAlwaysThrowingInvokes(true); @@ -2989,12 +3010,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { } } if (rerun_loop_analysis) { - DCHECK(!outer_graph->HasIrreducibleLoops()) - << "Recomputing loop information in graphs with irreducible loops " - << "is unsupported, as it could lead to loop header changes"; - outer_graph->ClearLoopInformation(); - outer_graph->ClearDominanceInformation(); - outer_graph->BuildDominatorTree(); + outer_graph->RecomputeDominatorTree(); } else if (rerun_dominance) { outer_graph->ClearDominanceInformation(); outer_graph->ComputeDominanceInformation(); @@ -3026,9 +3042,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { replacement = outer_graph->GetDoubleConstant( current->AsDoubleConstant()->GetValue(), current->GetDexPc()); } else if (current->IsParameterValue()) { - if (kIsDebugBuild - && invoke->IsInvokeStaticOrDirect() - && invoke->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()) { + if (kIsDebugBuild && + invoke->IsInvokeStaticOrDirect() && + invoke->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()) { // Ensure we do not use the last input of `invoke`, as it // contains a clinit check which is not an actual argument. size_t last_input_index = invoke->InputCount() - 1; @@ -3125,6 +3141,8 @@ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { new_pre_header, old_pre_header, /* replace_if_back_edge= */ false); } +// Creates a new two-basic-block loop and inserts it between original loop header and +// original loop exit; also adjusts dominators, post order and new LoopInformation. HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header, HBasicBlock* body, HBasicBlock* exit) { @@ -3346,6 +3364,21 @@ std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckReq } } +bool HInvokeStaticOrDirect::CanBeNull() const { + if (GetType() != DataType::Type::kReference || IsStringInit()) { + return false; + } + switch (GetIntrinsic()) { +#define DEFINE_BOXED_CASE(name, unused1, unused2, unused3, unused4) \ + case Intrinsics::k##name##ValueOf: \ + return false; + BOXED_TYPES(DEFINE_BOXED_CASE) +#undef DEFINE_BOXED_CASE + default: + return true; + } +} + bool HInvokeVirtual::CanDoImplicitNullCheckOn(HInstruction* obj) const { if (obj != InputAt(0)) { return false; @@ -3518,9 +3551,7 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) { static_assert( \ static_cast<uint32_t>(Intrinsics::k ## Name) <= (kAccIntrinsicBits >> CTZ(kAccIntrinsicBits)), \ "Instrinsics enumeration space overflow."); -#include "intrinsics_list.h" - INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES) #undef CHECK_INTRINSICS_ENUM_VALUES // Function that returns whether an intrinsic needs an environment or not. @@ -3531,9 +3562,7 @@ static inline IntrinsicNeedsEnvironment NeedsEnvironmentIntrinsic(Intrinsics i) #define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnv, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ return NeedsEnv; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS } return kNeedsEnvironment; @@ -3547,9 +3576,7 @@ static inline IntrinsicSideEffects GetSideEffectsIntrinsic(Intrinsics i) { #define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnv, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ return SideEffects; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS } return kAllSideEffects; @@ -3563,9 +3590,7 @@ static inline IntrinsicExceptions GetExceptionsIntrinsic(Intrinsics i) { #define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnv, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ return Exceptions; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST + ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef OPTIMIZING_INTRINSICS } return kCanThrow; diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 28112d176a..0efe8f4335 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -253,7 +253,7 @@ class ReferenceTypeInfo : ValueObject { bool IsNonPrimitiveArrayClass() const REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(IsValid()); - return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray(); + return IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray(); } bool CanArrayHold(ReferenceTypeInfo rti) const REQUIRES_SHARED(Locks::mutator_lock_) { @@ -403,7 +403,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { has_bounds_checks_(false), has_try_catch_(false), has_monitor_operations_(false), - has_simd_(false), + has_traditional_simd_(false), + has_predicated_simd_(false), has_loops_(false), has_irreducible_loops_(false), has_direct_critical_native_call_(false), @@ -466,6 +467,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { void ClearLoopInformation(); void FindBackEdges(ArenaBitVector* visited); GraphAnalysisResult BuildDominatorTree(); + GraphAnalysisResult RecomputeDominatorTree(); void SimplifyCFG(); void SimplifyCatchBlocks(); @@ -708,8 +710,13 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasMonitorOperations() const { return has_monitor_operations_; } void SetHasMonitorOperations(bool value) { has_monitor_operations_ = value; } - bool HasSIMD() const { return has_simd_; } - void SetHasSIMD(bool value) { has_simd_ = value; } + bool HasTraditionalSIMD() { return has_traditional_simd_; } + void SetHasTraditionalSIMD(bool value) { has_traditional_simd_ = value; } + + bool HasPredicatedSIMD() { return has_predicated_simd_; } + void SetHasPredicatedSIMD(bool value) { has_predicated_simd_ = value; } + + bool HasSIMD() const { return has_traditional_simd_ || has_predicated_simd_; } bool HasLoops() const { return has_loops_; } void SetHasLoops(bool value) { has_loops_ = value; } @@ -822,10 +829,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // DexRegisterMap to be present to allow deadlock analysis for non-debuggable code. bool has_monitor_operations_; - // Flag whether SIMD instructions appear in the graph. If true, the - // code generators may have to be more careful spilling the wider + // Flags whether SIMD (traditional or predicated) instructions appear in the graph. + // If either is true, the code generators may have to be more careful spilling the wider // contents of SIMD registers. - bool has_simd_; + bool has_traditional_simd_; + bool has_predicated_simd_; // Flag whether there are any loops in the graph. We can skip loop // optimization if it's false. @@ -1544,7 +1552,6 @@ class HLoopInformationOutwardIterator : public ValueObject { M(If, Instruction) \ M(InstanceFieldGet, Instruction) \ M(InstanceFieldSet, Instruction) \ - M(PredicatedInstanceFieldGet, Instruction) \ M(InstanceOf, Instruction) \ M(IntConstant, Constant) \ M(IntermediateAddress, Instruction) \ @@ -1636,7 +1643,9 @@ class HLoopInformationOutwardIterator : public ValueObject { M(VecStore, VecMemoryOperation) \ M(VecPredSetAll, VecPredSetOperation) \ M(VecPredWhile, VecPredSetOperation) \ - M(VecPredCondition, VecOperation) \ + M(VecPredToBoolean, VecOperation) \ + M(VecCondition, VecPredSetOperation) \ + M(VecPredNot, VecPredSetOperation) \ #define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ FOR_EACH_CONCRETE_INSTRUCTION_SCALAR_COMMON(M) \ @@ -1659,6 +1668,8 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) +#define FOR_EACH_CONCRETE_INSTRUCTION_RISCV64(M) + #ifndef ART_ENABLE_CODEGEN_x86 #define FOR_EACH_CONCRETE_INSTRUCTION_X86(M) #else @@ -1715,7 +1726,7 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION) const char* DebugName() const override { return #type; } \ HInstruction* Clone(ArenaAllocator* arena) const override { \ DCHECK(IsClonable()); \ - return new (arena) H##type(*this->As##type()); \ + return new (arena) H##type(*this); \ } \ void Accept(HGraphVisitor* visitor) override @@ -2062,12 +2073,12 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { ArtMethod* method, uint32_t dex_pc, HInstruction* holder) - : vregs_(number_of_vregs, allocator->Adapter(kArenaAllocEnvironmentVRegs)), - locations_(allocator->Adapter(kArenaAllocEnvironmentLocations)), - parent_(nullptr), - method_(method), - dex_pc_(dex_pc), - holder_(holder) { + : vregs_(number_of_vregs, allocator->Adapter(kArenaAllocEnvironmentVRegs)), + locations_(allocator->Adapter(kArenaAllocEnvironmentLocations)), + parent_(nullptr), + method_(method), + dex_pc_(dex_pc), + holder_(holder) { } ALWAYS_INLINE HEnvironment(ArenaAllocator* allocator, @@ -2183,9 +2194,14 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { std::ostream& operator<<(std::ostream& os, const HInstruction& rhs); // Iterates over the Environments -class HEnvironmentIterator : public ValueObject, - public std::iterator<std::forward_iterator_tag, HEnvironment*> { +class HEnvironmentIterator : public ValueObject { public: + using iterator_category = std::forward_iterator_tag; + using value_type = HEnvironment*; + using difference_type = ptrdiff_t; + using pointer = void; + using reference = void; + explicit HEnvironmentIterator(HEnvironment* cur) : cur_(cur) {} HEnvironment* operator*() const { @@ -2355,9 +2371,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { return true; } - virtual bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const { - return false; - } + virtual bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const { return false; } // If this instruction will do an implicit null check, return the `HNullCheck` associated // with it. Otherwise return null. @@ -2553,7 +2567,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { #define INSTRUCTION_TYPE_CAST(type, super) \ const H##type* As##type() const; \ - H##type* As##type(); + H##type* As##type(); \ + const H##type* As##type##OrNull() const; \ + H##type* As##type##OrNull(); FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CAST) #undef INSTRUCTION_TYPE_CAST @@ -2568,7 +2584,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // // Note: HEnvironment and some other fields are not copied and are set to default values, see // 'explicit HInstruction(const HInstruction& other)' for details. - virtual HInstruction* Clone(ArenaAllocator* arena ATTRIBUTE_UNUSED) const { + virtual HInstruction* Clone([[maybe_unused]] ArenaAllocator* arena) const { LOG(FATAL) << "Cloning is not implemented for the instruction " << DebugName() << " " << GetId(); UNREACHABLE(); @@ -2596,7 +2612,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // Returns whether any data encoded in the two instructions is equal. // This method does not look at the inputs. Both instructions must be // of the same type, otherwise the method has undefined behavior. - virtual bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const { + virtual bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const { return false; } @@ -2729,7 +2745,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { private: using InstructionKindField = - BitField<InstructionKind, kFieldInstructionKind, kFieldInstructionKindSize>; + BitField<InstructionKind, kFieldInstructionKind, kFieldInstructionKindSize>; void FixUpUserRecordsAfterUseInsertion(HUseList<HInstruction*>::iterator fixup_end) { auto before_use_node = uses_.before_begin(); @@ -2904,9 +2920,14 @@ class HBackwardInstructionIterator : public ValueObject { }; template <typename InnerIter> -struct HSTLInstructionIterator : public ValueObject, - public std::iterator<std::forward_iterator_tag, HInstruction*> { +struct HSTLInstructionIterator : public ValueObject { public: + using iterator_category = std::forward_iterator_tag; + using value_type = HInstruction*; + using difference_type = ptrdiff_t; + using pointer = void; + using reference = void; + static_assert(std::is_same_v<InnerIter, HBackwardInstructionIterator> || std::is_same_v<InnerIter, HInstructionIterator> || std::is_same_v<InnerIter, HInstructionIteratorHandleChanges>, @@ -3164,7 +3185,7 @@ class HPhi final : public HVariableInputSizeInstruction { bool IsVRegEquivalentOf(const HInstruction* other) const { return other != nullptr && other->IsPhi() - && other->AsPhi()->GetBlock() == GetBlock() + && other->GetBlock() == GetBlock() && other->AsPhi()->GetRegNumber() == GetRegNumber(); } @@ -3270,7 +3291,7 @@ class HConstant : public HExpression<0> { class HNullConstant final : public HConstant { public: - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -3497,7 +3518,9 @@ class HDoubleConstant final : public HConstant { class HIf final : public HExpression<1> { public: explicit HIf(HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HExpression(kIf, SideEffects::None(), dex_pc) { + : HExpression(kIf, SideEffects::None(), dex_pc), + true_count_(std::numeric_limits<uint16_t>::max()), + false_count_(std::numeric_limits<uint16_t>::max()) { SetRawInputAt(0, input); } @@ -3512,10 +3535,20 @@ class HIf final : public HExpression<1> { return GetBlock()->GetSuccessors()[1]; } + void SetTrueCount(uint16_t count) { true_count_ = count; } + uint16_t GetTrueCount() const { return true_count_; } + + void SetFalseCount(uint16_t count) { false_count_ = count; } + uint16_t GetFalseCount() const { return false_count_; } + DECLARE_INSTRUCTION(If); protected: DEFAULT_COPY_CONSTRUCTOR(If); + + private: + uint16_t true_count_; + uint16_t false_count_; }; @@ -3639,7 +3672,8 @@ class HDeoptimize final : public HVariableInputSizeInstruction { bool CanBeMoved() const override { return GetPackedFlag<kFieldCanBeMoved>(); } bool InstructionDataEquals(const HInstruction* other) const override { - return (other->CanBeMoved() == CanBeMoved()) && (other->AsDeoptimize()->GetKind() == GetKind()); + return (other->CanBeMoved() == CanBeMoved()) && + (other->AsDeoptimize()->GetDeoptimizationKind() == GetDeoptimizationKind()); } bool NeedsEnvironment() const override { return true; } @@ -3827,7 +3861,7 @@ class HUnaryOperation : public HExpression<1> { DataType::Type GetResultType() const { return GetType(); } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -3836,6 +3870,9 @@ class HUnaryOperation : public HExpression<1> { // be evaluated as a constant, return null. HConstant* TryStaticEvaluation() const; + // Same but for `input` instead of GetInput(). + HConstant* TryStaticEvaluation(HInstruction* input) const; + // Apply this operation to `x`. virtual HConstant* Evaluate(HIntConstant* x) const = 0; virtual HConstant* Evaluate(HLongConstant* x) const = 0; @@ -3903,7 +3940,7 @@ class HBinaryOperation : public HExpression<2> { } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -3912,16 +3949,19 @@ class HBinaryOperation : public HExpression<2> { // be evaluated as a constant, return null. HConstant* TryStaticEvaluation() const; + // Same but for `left` and `right` instead of GetLeft() and GetRight(). + HConstant* TryStaticEvaluation(HInstruction* left, HInstruction* right) const; + // Apply this operation to `x` and `y`. - virtual HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED, - HNullConstant* y ATTRIBUTE_UNUSED) const { + virtual HConstant* Evaluate([[maybe_unused]] HNullConstant* x, + [[maybe_unused]] HNullConstant* y) const { LOG(FATAL) << DebugName() << " is not defined for the (null, null) case."; UNREACHABLE(); } virtual HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const = 0; virtual HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const = 0; - virtual HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED, - HIntConstant* y ATTRIBUTE_UNUSED) const { + virtual HConstant* Evaluate([[maybe_unused]] HLongConstant* x, + [[maybe_unused]] HIntConstant* y) const { LOG(FATAL) << DebugName() << " is not defined for the (long, int) case."; UNREACHABLE(); } @@ -4049,8 +4089,8 @@ class HEqual final : public HCondition { bool IsCommutative() const override { return true; } - HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED, - HNullConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HNullConstant* x, + [[maybe_unused]] HNullConstant* y) const override { return MakeConstantCondition(true, GetDexPc()); } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { @@ -4096,8 +4136,8 @@ class HNotEqual final : public HCondition { bool IsCommutative() const override { return true; } - HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED, - HNullConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HNullConstant* x, + [[maybe_unused]] HNullConstant* y) const override { return MakeConstantCondition(false, GetDexPc()); } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { @@ -4303,13 +4343,13 @@ class HBelow final : public HCondition { HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -4345,13 +4385,13 @@ class HBelowOrEqual final : public HCondition { HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -4387,13 +4427,13 @@ class HAbove final : public HCondition { HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -4429,13 +4469,13 @@ class HAboveOrEqual final : public HCondition { HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -4522,7 +4562,7 @@ class HCompare final : public HBinaryOperation { return GetBias() == ComparisonBias::kGtBias; } - static SideEffects SideEffectsForArchRuntimeCalls(DataType::Type type ATTRIBUTE_UNUSED) { + static SideEffects SideEffectsForArchRuntimeCalls([[maybe_unused]] DataType::Type type) { // Comparisons do not require a runtime call in any back end. return SideEffects::None(); } @@ -4859,8 +4899,7 @@ class HInvokePolymorphic final : public HInvoke { // to pass intrinsic information to the HInvokePolymorphic node. ArtMethod* resolved_method, MethodReference resolved_method_reference, - dex::ProtoIndex proto_idx, - bool enable_intrinsic_opt) + dex::ProtoIndex proto_idx) : HInvoke(kInvokePolymorphic, allocator, number_of_arguments, @@ -4871,9 +4910,8 @@ class HInvokePolymorphic final : public HInvoke { resolved_method, resolved_method_reference, kPolymorphic, - enable_intrinsic_opt), - proto_idx_(proto_idx) { - } + /* enable_intrinsic_opt= */ true), + proto_idx_(proto_idx) {} bool IsClonable() const override { return true; } @@ -5015,15 +5053,13 @@ class HInvokeStaticOrDirect final : public HInvoke { return input_records; } - bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override { + bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const override { // We do not access the method via object reference, so we cannot do an implicit null check. // TODO: for intrinsics we can generate implicit null checks. return false; } - bool CanBeNull() const override { - return GetType() == DataType::Type::kReference && !IsStringInit(); - } + bool CanBeNull() const override; MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; } CodePtrLocation GetCodePtrLocation() const { @@ -5599,10 +5635,14 @@ class HMin final : public HBinaryOperation { ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); } // TODO: Evaluation for floating-point values. - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; } + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { + return nullptr; + } + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { + return nullptr; + } DECLARE_INSTRUCTION(Min); @@ -5634,10 +5674,14 @@ class HMax final : public HBinaryOperation { ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); } // TODO: Evaluation for floating-point values. - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; } + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { + return nullptr; + } + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { + return nullptr; + } DECLARE_INSTRUCTION(Max); @@ -5699,7 +5743,7 @@ class HDivZeroCheck final : public HExpression<1> { bool IsClonable() const override { return true; } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -5736,18 +5780,18 @@ class HShl final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc()); } - HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED, - HLongConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HLongConstant* value, + [[maybe_unused]] HLongConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for the (long, long) case."; UNREACHABLE(); } - HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED, - HFloatConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* value, + [[maybe_unused]] HFloatConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED, - HDoubleConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value, + [[maybe_unused]] HDoubleConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5782,18 +5826,18 @@ class HShr final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc()); } - HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED, - HLongConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HLongConstant* value, + [[maybe_unused]] HLongConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for the (long, long) case."; UNREACHABLE(); } - HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED, - HFloatConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* value, + [[maybe_unused]] HFloatConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED, - HDoubleConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value, + [[maybe_unused]] HDoubleConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5830,18 +5874,18 @@ class HUShr final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc()); } - HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED, - HLongConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HLongConstant* value, + [[maybe_unused]] HLongConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for the (long, long) case."; UNREACHABLE(); } - HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED, - HFloatConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* value, + [[maybe_unused]] HFloatConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED, - HDoubleConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value, + [[maybe_unused]] HDoubleConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5873,13 +5917,13 @@ class HAnd final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5911,13 +5955,13 @@ class HOr final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5949,13 +5993,13 @@ class HXor final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5993,18 +6037,18 @@ class HRor final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc()); } - HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED, - HLongConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HLongConstant* value, + [[maybe_unused]] HLongConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for the (long, long) case."; UNREACHABLE(); } - HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED, - HFloatConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* value, + [[maybe_unused]] HFloatConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED, - HDoubleConstant* distance ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value, + [[maybe_unused]] HDoubleConstant* distance) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -6067,7 +6111,7 @@ class HNot final : public HUnaryOperation { } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -6079,11 +6123,11 @@ class HNot final : public HUnaryOperation { HConstant* Evaluate(HLongConstant* x) const override { return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -6101,7 +6145,7 @@ class HBooleanNot final : public HUnaryOperation { } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -6113,15 +6157,15 @@ class HBooleanNot final : public HUnaryOperation { HConstant* Evaluate(HIntConstant* x) const override { return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HLongConstant* x) const override { LOG(FATAL) << DebugName() << " is not defined for long values"; UNREACHABLE(); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -6148,7 +6192,7 @@ class HTypeConversion final : public HExpression<1> { bool IsClonable() const override { return true; } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } // Return whether the conversion is implicit. This includes conversion to the same type. @@ -6160,6 +6204,9 @@ class HTypeConversion final : public HExpression<1> { // containing the result. If the input cannot be converted, return nullptr. HConstant* TryStaticEvaluation() const; + // Same but for `input` instead of GetInput(). + HConstant* TryStaticEvaluation(HInstruction* input) const; + DECLARE_INSTRUCTION(TypeConversion); protected: @@ -6180,7 +6227,7 @@ class HNullCheck final : public HExpression<1> { bool IsClonable() const override { return true; } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -6321,96 +6368,6 @@ class HInstanceFieldGet final : public HExpression<1> { const FieldInfo field_info_; }; -class HPredicatedInstanceFieldGet final : public HExpression<2> { - public: - HPredicatedInstanceFieldGet(HInstanceFieldGet* orig, - HInstruction* target, - HInstruction* default_val) - : HExpression(kPredicatedInstanceFieldGet, - orig->GetFieldType(), - orig->GetSideEffects(), - orig->GetDexPc()), - field_info_(orig->GetFieldInfo()) { - // NB Default-val is at 0 so we can avoid doing a move. - SetRawInputAt(1, target); - SetRawInputAt(0, default_val); - } - - HPredicatedInstanceFieldGet(HInstruction* value, - ArtField* field, - HInstruction* default_value, - DataType::Type field_type, - MemberOffset field_offset, - bool is_volatile, - uint32_t field_idx, - uint16_t declaring_class_def_index, - const DexFile& dex_file, - uint32_t dex_pc) - : HExpression(kPredicatedInstanceFieldGet, - field_type, - SideEffects::FieldReadOfType(field_type, is_volatile), - dex_pc), - field_info_(field, - field_offset, - field_type, - is_volatile, - field_idx, - declaring_class_def_index, - dex_file) { - SetRawInputAt(1, value); - SetRawInputAt(0, default_value); - } - - bool IsClonable() const override { - return true; - } - bool CanBeMoved() const override { - return !IsVolatile(); - } - - HInstruction* GetDefaultValue() const { - return InputAt(0); - } - HInstruction* GetTarget() const { - return InputAt(1); - } - - bool InstructionDataEquals(const HInstruction* other) const override { - const HPredicatedInstanceFieldGet* other_get = other->AsPredicatedInstanceFieldGet(); - return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue() && - GetDefaultValue() == other_get->GetDefaultValue(); - } - - bool CanDoImplicitNullCheckOn(HInstruction* obj) const override { - return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value()); - } - - size_t ComputeHashCode() const override { - return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue(); - } - - bool IsFieldAccess() const override { return true; } - const FieldInfo& GetFieldInfo() const override { return field_info_; } - MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } - DataType::Type GetFieldType() const { return field_info_.GetFieldType(); } - bool IsVolatile() const { return field_info_.IsVolatile(); } - - void SetType(DataType::Type new_type) { - DCHECK(DataType::IsIntegralType(GetType())); - DCHECK(DataType::IsIntegralType(new_type)); - DCHECK_EQ(DataType::Size(GetType()), DataType::Size(new_type)); - SetPackedField<TypeField>(new_type); - } - - DECLARE_INSTRUCTION(PredicatedInstanceFieldGet); - - protected: - DEFAULT_COPY_CONSTRUCTOR(PredicatedInstanceFieldGet); - - private: - const FieldInfo field_info_; -}; - enum class WriteBarrierKind { // Emit the write barrier, with a runtime optimization which checks if the value that it is being // set is null. @@ -6455,7 +6412,6 @@ class HInstanceFieldSet final : public HExpression<2> { declaring_class_def_index, dex_file) { SetPackedFlag<kFlagValueCanBeNull>(true); - SetPackedFlag<kFlagIsPredicatedSet>(false); SetPackedField<WriteBarrierKindField>(WriteBarrierKind::kEmitWithNullCheck); SetRawInputAt(0, object); SetRawInputAt(1, value); @@ -6475,8 +6431,6 @@ class HInstanceFieldSet final : public HExpression<2> { HInstruction* GetValue() const { return InputAt(1); } bool GetValueCanBeNull() const { return GetPackedFlag<kFlagValueCanBeNull>(); } void ClearValueCanBeNull() { SetPackedFlag<kFlagValueCanBeNull>(false); } - bool GetIsPredicatedSet() const { return GetPackedFlag<kFlagIsPredicatedSet>(); } - void SetIsPredicatedSet(bool value = true) { SetPackedFlag<kFlagIsPredicatedSet>(value); } WriteBarrierKind GetWriteBarrierKind() { return GetPackedField<WriteBarrierKindField>(); } void SetWriteBarrierKind(WriteBarrierKind kind) { DCHECK(kind != WriteBarrierKind::kEmitWithNullCheck) @@ -6491,8 +6445,7 @@ class HInstanceFieldSet final : public HExpression<2> { private: static constexpr size_t kFlagValueCanBeNull = kNumberOfGenericPackedBits; - static constexpr size_t kFlagIsPredicatedSet = kFlagValueCanBeNull + 1; - static constexpr size_t kWriteBarrierKind = kFlagIsPredicatedSet + 1; + static constexpr size_t kWriteBarrierKind = kFlagValueCanBeNull + 1; static constexpr size_t kWriteBarrierKindSize = MinimumBitsToStore(static_cast<size_t>(WriteBarrierKind::kLast)); static constexpr size_t kNumberOfInstanceFieldSetPackedBits = @@ -6511,12 +6464,12 @@ class HArrayGet final : public HExpression<2> { HInstruction* index, DataType::Type type, uint32_t dex_pc) - : HArrayGet(array, - index, - type, - SideEffects::ArrayReadOfType(type), - dex_pc, - /* is_string_char_at= */ false) { + : HArrayGet(array, + index, + type, + SideEffects::ArrayReadOfType(type), + dex_pc, + /* is_string_char_at= */ false) { } HArrayGet(HInstruction* array, @@ -6533,10 +6486,10 @@ class HArrayGet final : public HExpression<2> { bool IsClonable() const override { return true; } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } - bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override { + bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const override { // TODO: We can be smarter here. // Currently, unless the array is the result of NewArray, the array access is always // preceded by some form of null NullCheck necessary for the bounds check, usually @@ -6640,7 +6593,7 @@ class HArraySet final : public HExpression<3> { // Can throw ArrayStoreException. bool CanThrow() const override { return NeedsTypeCheck(); } - bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override { + bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const override { // TODO: Same as for ArrayGet. return false; } @@ -6746,7 +6699,7 @@ class HArrayLength final : public HExpression<1> { bool IsClonable() const override { return true; } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } bool CanDoImplicitNullCheckOn(HInstruction* obj) const override { @@ -6790,7 +6743,7 @@ class HBoundsCheck final : public HExpression<2> { bool IsClonable() const override { return true; } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -7000,17 +6953,15 @@ class HLoadClass final : public HInstruction { bool CanCallRuntime() const { return NeedsAccessCheck() || MustGenerateClinitCheck() || - GetLoadKind() == LoadKind::kRuntimeCall || - GetLoadKind() == LoadKind::kBssEntry; + NeedsBss() || + GetLoadKind() == LoadKind::kRuntimeCall; } bool CanThrow() const override { return NeedsAccessCheck() || MustGenerateClinitCheck() || // If the class is in the boot image, the lookup in the runtime call cannot throw. - ((GetLoadKind() == LoadKind::kRuntimeCall || - GetLoadKind() == LoadKind::kBssEntry) && - !IsInBootImage()); + ((GetLoadKind() == LoadKind::kRuntimeCall || NeedsBss()) && !IsInBootImage()); } ReferenceTypeInfo GetLoadedClassRTI() { @@ -7362,6 +7313,16 @@ class HLoadMethodHandle final : public HInstruction { class HLoadMethodType final : public HInstruction { public: + // Determines how to load the MethodType. + enum class LoadKind { + // Load from an entry in the .bss section using a PC-relative load. + kBssEntry, + // Load using a single runtime call. + kRuntimeCall, + + kLast = kRuntimeCall, + }; + HLoadMethodType(HCurrentMethod* current_method, dex::ProtoIndex proto_index, const DexFile& dex_file, @@ -7373,6 +7334,7 @@ class HLoadMethodType final : public HInstruction { special_input_(HUserRecord<HInstruction*>(current_method)), proto_index_(proto_index), dex_file_(dex_file) { + SetPackedField<LoadKindField>(LoadKind::kRuntimeCall); } using HInstruction::GetInputRecords; // Keep the const version visible. @@ -7383,6 +7345,12 @@ class HLoadMethodType final : public HInstruction { bool IsClonable() const override { return true; } + void SetLoadKind(LoadKind load_kind); + + LoadKind GetLoadKind() const { + return GetPackedField<LoadKindField>(); + } + dex::ProtoIndex GetProtoIndex() const { return proto_index_; } const DexFile& GetDexFile() const { return dex_file_; } @@ -7401,6 +7369,14 @@ class HLoadMethodType final : public HInstruction { DEFAULT_COPY_CONSTRUCTOR(LoadMethodType); private: + static constexpr size_t kFieldLoadKind = kNumberOfGenericPackedBits; + static constexpr size_t kFieldLoadKindSize = + MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast)); + static constexpr size_t kNumberOfLoadMethodTypePackedBits = kFieldLoadKind + kFieldLoadKindSize; + static_assert(kNumberOfLoadMethodTypePackedBits <= kMaxNumberOfPackedBits, + "Too many packed fields."); + using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>; + // The special input is the HCurrentMethod for kRuntimeCall. HUserRecord<HInstruction*> special_input_; @@ -7408,6 +7384,17 @@ class HLoadMethodType final : public HInstruction { const DexFile& dex_file_; }; +std::ostream& operator<<(std::ostream& os, HLoadMethodType::LoadKind rhs); + +// Note: defined outside class to see operator<<(., HLoadMethodType::LoadKind). +inline void HLoadMethodType::SetLoadKind(LoadKind load_kind) { + // The load kind should be determined before inserting the instruction to the graph. + DCHECK(GetBlock() == nullptr); + DCHECK(GetEnvironment() == nullptr); + DCHECK_EQ(GetLoadKind(), LoadKind::kRuntimeCall); + SetPackedField<LoadKindField>(load_kind); +} + /** * Performs an initialization check on its Class object input. */ @@ -7423,7 +7410,7 @@ class HClinitCheck final : public HExpression<1> { } // TODO: Make ClinitCheck clonable. bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -8343,7 +8330,7 @@ class HSelect final : public HExpression<3> { HInstruction* GetCondition() const { return InputAt(2); } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } @@ -8351,6 +8338,12 @@ class HSelect final : public HExpression<3> { return GetTrueValue()->CanBeNull() || GetFalseValue()->CanBeNull(); } + void UpdateType() { + DCHECK_EQ(HPhi::ToPhiType(GetTrueValue()->GetType()), + HPhi::ToPhiType(GetFalseValue()->GetType())); + SetPackedField<TypeField>(HPhi::ToPhiType(GetTrueValue()->GetType())); + } + DECLARE_INSTRUCTION(Select); protected: @@ -8513,7 +8506,7 @@ class HIntermediateAddress final : public HExpression<2> { bool IsClonable() const override { return true; } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } bool IsActualObject() const override { return false; } @@ -8550,7 +8543,7 @@ class HGraphVisitor : public ValueObject { graph_(graph) {} virtual ~HGraphVisitor() {} - virtual void VisitInstruction(HInstruction* instruction ATTRIBUTE_UNUSED) {} + virtual void VisitInstruction([[maybe_unused]] HInstruction* instruction) {} virtual void VisitBasicBlock(HBasicBlock* block); // Visit the graph following basic block insertion order. @@ -8623,7 +8616,7 @@ class CloneAndReplaceInstructionVisitor final : public HGraphDelegateVisitor { DISALLOW_COPY_AND_ASSIGN(CloneAndReplaceInstructionVisitor); }; -// Iterator over the blocks that art part of the loop. Includes blocks part +// Iterator over the blocks that are part of the loop; includes blocks which are part // of an inner loop. The order in which the blocks are iterated is on their // block id. class HBlocksInLoopIterator : public ValueObject { @@ -8656,7 +8649,7 @@ class HBlocksInLoopIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopIterator); }; -// Iterator over the blocks that art part of the loop. Includes blocks part +// Iterator over the blocks that are part of the loop; includes blocks which are part // of an inner loop. The order in which the blocks are iterated is reverse // post order. class HBlocksInLoopReversePostOrderIterator : public ValueObject { @@ -8689,6 +8682,39 @@ class HBlocksInLoopReversePostOrderIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopReversePostOrderIterator); }; +// Iterator over the blocks that are part of the loop; includes blocks which are part +// of an inner loop. The order in which the blocks are iterated is post order. +class HBlocksInLoopPostOrderIterator : public ValueObject { + public: + explicit HBlocksInLoopPostOrderIterator(const HLoopInformation& info) + : blocks_in_loop_(info.GetBlocks()), + blocks_(info.GetHeader()->GetGraph()->GetReversePostOrder()), + index_(blocks_.size() - 1) { + if (!blocks_in_loop_.IsBitSet(blocks_[index_]->GetBlockId())) { + Advance(); + } + } + + bool Done() const { return index_ < 0; } + HBasicBlock* Current() const { return blocks_[index_]; } + void Advance() { + --index_; + for (; index_ >= 0; --index_) { + if (blocks_in_loop_.IsBitSet(blocks_[index_]->GetBlockId())) { + break; + } + } + } + + private: + const BitVector& blocks_in_loop_; + const ArenaVector<HBasicBlock*>& blocks_; + + int32_t index_; + + DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopPostOrderIterator); +}; + // Returns int64_t value of a properly typed constant. inline int64_t Int64FromConstant(HConstant* constant) { if (constant->IsIntConstant()) { @@ -8752,10 +8778,18 @@ inline bool IsZeroBitPattern(HInstruction* instruction) { #define INSTRUCTION_TYPE_CAST(type, super) \ inline const H##type* HInstruction::As##type() const { \ - return Is##type() ? down_cast<const H##type*>(this) : nullptr; \ + DCHECK(Is##type()); \ + return down_cast<const H##type*>(this); \ } \ inline H##type* HInstruction::As##type() { \ - return Is##type() ? static_cast<H##type*>(this) : nullptr; \ + DCHECK(Is##type()); \ + return down_cast<H##type*>(this); \ + } \ + inline const H##type* HInstruction::As##type##OrNull() const { \ + return Is##type() ? down_cast<const H##type*>(this) : nullptr; \ + } \ + inline H##type* HInstruction::As##type##OrNull() { \ + return Is##type() ? down_cast<H##type*>(this) : nullptr; \ } FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CAST) diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h index 27e610328f..4b0187d536 100644 --- a/compiler/optimizing/nodes_shared.h +++ b/compiler/optimizing/nodes_shared.h @@ -105,13 +105,13 @@ class HBitwiseNegatedRight final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -160,7 +160,7 @@ class HIntermediateAddressIndex final : public HExpression<3> { bool IsClonable() const override { return true; } bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { + bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override { return true; } bool IsActualObject() const override { return false; } diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index 73f6c40a0d..6a60d6be01 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -1384,8 +1384,8 @@ class HVecPredWhile final : public HVecPredSetOperation { static constexpr size_t kCondKind = HVecOperation::kNumberOfVectorOpPackedBits; static constexpr size_t kCondKindSize = MinimumBitsToStore(static_cast<size_t>(CondKind::kLast)); - static constexpr size_t kNumberOfVecPredConditionPackedBits = kCondKind + kCondKindSize; - static_assert(kNumberOfVecPredConditionPackedBits <= kMaxNumberOfPackedBits, + static constexpr size_t kNumberOfVecPredWhilePackedBits = kCondKind + kCondKindSize; + static_assert(kNumberOfVecPredWhilePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using CondKindField = BitField<CondKind, kCondKind, kCondKindSize>; @@ -1395,13 +1395,13 @@ class HVecPredWhile final : public HVecPredSetOperation { // Evaluates the predicate condition (PCondKind) for a vector predicate; outputs // a scalar boolean value result. // -// Note: as VecPredCondition can be also predicated, only active elements (determined by the +// Note: as VecPredToBoolean can be also predicated, only active elements (determined by the // instruction's governing predicate) of the input vector predicate are used for condition // evaluation. // // Note: this instruction is currently used as a workaround for the fact that IR instructions // can't have more than one output. -class HVecPredCondition final : public HVecOperation { +class HVecPredToBoolean final : public HVecOperation { public: // To get more info on the condition kinds please see "2.2 Process state, PSTATE" section of // "ARM Architecture Reference Manual Supplement. The Scalable Vector Extension (SVE), @@ -1418,13 +1418,13 @@ class HVecPredCondition final : public HVecOperation { kEnumLast = kPLast }; - HVecPredCondition(ArenaAllocator* allocator, + HVecPredToBoolean(ArenaAllocator* allocator, HInstruction* input, PCondKind pred_cond, DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecOperation(kVecPredCondition, + : HVecOperation(kVecPredToBoolean, allocator, packed_type, SideEffects::None(), @@ -1447,19 +1447,86 @@ class HVecPredCondition final : public HVecOperation { return GetPackedField<CondKindField>(); } - DECLARE_INSTRUCTION(VecPredCondition); + DECLARE_INSTRUCTION(VecPredToBoolean); protected: // Additional packed bits. static constexpr size_t kCondKind = HVecOperation::kNumberOfVectorOpPackedBits; static constexpr size_t kCondKindSize = MinimumBitsToStore(static_cast<size_t>(PCondKind::kEnumLast)); - static constexpr size_t kNumberOfVecPredConditionPackedBits = kCondKind + kCondKindSize; - static_assert(kNumberOfVecPredConditionPackedBits <= kMaxNumberOfPackedBits, + static constexpr size_t kNumberOfVecPredToBooleanPackedBits = kCondKind + kCondKindSize; + static_assert(kNumberOfVecPredToBooleanPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using CondKindField = BitField<PCondKind, kCondKind, kCondKindSize>; - DEFAULT_COPY_CONSTRUCTOR(VecPredCondition); + DEFAULT_COPY_CONSTRUCTOR(VecPredToBoolean); +}; + +// Evaluates condition for pairwise elements in two input vectors and sets the result +// as an output predicate vector. +// +// viz. [ p1, .. , pn ] = [ x1 OP y1 , x2 OP y2, .. , xn OP yn] where OP is CondKind +// condition. +// +// Currently only kEqual is supported by this vector instruction - we don't even define +// the kCondType here. +// TODO: support other condition ops. +class HVecCondition final : public HVecPredSetOperation { + public: + HVecCondition(ArenaAllocator* allocator, + HInstruction* left, + HInstruction* right, + DataType::Type packed_type, + size_t vector_length, + uint32_t dex_pc) : + HVecPredSetOperation(kVecCondition, + allocator, + packed_type, + SideEffects::None(), + /* number_of_inputs= */ 2, + vector_length, + dex_pc) { + DCHECK(left->IsVecOperation()); + DCHECK(!left->IsVecPredSetOperation()); + DCHECK(right->IsVecOperation()); + DCHECK(!right->IsVecPredSetOperation()); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + + DECLARE_INSTRUCTION(VecCondition); + + protected: + DEFAULT_COPY_CONSTRUCTOR(VecCondition); +}; + +// Inverts every component in the predicate vector. +// +// viz. [ p1, .. , pn ] = [ !px1 , !px2 , .. , !pxn ]. +class HVecPredNot final : public HVecPredSetOperation { + public: + HVecPredNot(ArenaAllocator* allocator, + HInstruction* input, + DataType::Type packed_type, + size_t vector_length, + uint32_t dex_pc) : + HVecPredSetOperation(kVecPredNot, + allocator, + packed_type, + SideEffects::None(), + /* number_of_inputs= */ 1, + vector_length, + dex_pc) { + DCHECK(input->IsVecOperation()); + DCHECK(input->IsVecPredSetOperation()); + + SetRawInputAt(0, input); + } + + DECLARE_INSTRUCTION(VecPredNot); + + protected: + DEFAULT_COPY_CONSTRUCTOR(VecPredNot); }; } // namespace art diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h index e246390aa5..14d9823355 100644 --- a/compiler/optimizing/nodes_x86.h +++ b/compiler/optimizing/nodes_x86.h @@ -149,13 +149,13 @@ class HX86AndNot final : public HBinaryOperation { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x, + [[maybe_unused]] HFloatConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x, + [[maybe_unused]] HDoubleConstant* y) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -196,11 +196,11 @@ class HX86MaskOrResetLeastSetBit final : public HUnaryOperation { HConstant* Evaluate(HLongConstant* x) const override { return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HFloatConstant* x) const override { LOG(FATAL) << DebugName() << "is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override { + HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x) const override { LOG(FATAL) << DebugName() << "is not defined for double values"; UNREACHABLE(); } diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index 12e9a1046d..16045d447c 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -23,6 +23,9 @@ #ifdef ART_ENABLE_CODEGEN_arm64 #include "instruction_simplifier_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 +#include "critical_native_abi_fixup_riscv64.h" +#endif #ifdef ART_ENABLE_CODEGEN_x86 #include "pc_relative_fixups_x86.h" #include "instruction_simplifier_x86.h" @@ -109,6 +112,10 @@ const char* OptimizationPassName(OptimizationPass pass) { case OptimizationPass::kInstructionSimplifierArm64: return arm64::InstructionSimplifierArm64::kInstructionSimplifierArm64PassName; #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 + case OptimizationPass::kCriticalNativeAbiFixupRiscv64: + return riscv64::CriticalNativeAbiFixupRiscv64::kCriticalNativeAbiFixupRiscv64PassName; +#endif #ifdef ART_ENABLE_CODEGEN_x86 case OptimizationPass::kPcRelativeFixupsX86: return x86::PcRelativeFixups::kPcRelativeFixupsX86PassName; @@ -155,6 +162,9 @@ OptimizationPass OptimizationPassByName(const std::string& pass_name) { #ifdef ART_ENABLE_CODEGEN_arm64 X(OptimizationPass::kInstructionSimplifierArm64); #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 + X(OptimizationPass::kCriticalNativeAbiFixupRiscv64); +#endif #ifdef ART_ENABLE_CODEGEN_x86 X(OptimizationPass::kPcRelativeFixupsX86); X(OptimizationPass::kX86MemoryOperandGeneration); @@ -290,7 +300,7 @@ ArenaVector<HOptimization*> ConstructOptimizations( #ifdef ART_ENABLE_CODEGEN_arm case OptimizationPass::kInstructionSimplifierArm: DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; - opt = new (allocator) arm::InstructionSimplifierArm(graph, stats); + opt = new (allocator) arm::InstructionSimplifierArm(graph, codegen, stats); break; case OptimizationPass::kCriticalNativeAbiFixupArm: DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; @@ -300,7 +310,13 @@ ArenaVector<HOptimization*> ConstructOptimizations( #ifdef ART_ENABLE_CODEGEN_arm64 case OptimizationPass::kInstructionSimplifierArm64: DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; - opt = new (allocator) arm64::InstructionSimplifierArm64(graph, stats); + opt = new (allocator) arm64::InstructionSimplifierArm64(graph, codegen, stats); + break; +#endif +#ifdef ART_ENABLE_CODEGEN_riscv64 + case OptimizationPass::kCriticalNativeAbiFixupRiscv64: + DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; + opt = new (allocator) riscv64::CriticalNativeAbiFixupRiscv64(graph, stats); break; #endif #ifdef ART_ENABLE_CODEGEN_x86 @@ -313,8 +329,8 @@ ArenaVector<HOptimization*> ConstructOptimizations( opt = new (allocator) x86::X86MemoryOperandGeneration(graph, codegen, stats); break; case OptimizationPass::kInstructionSimplifierX86: - opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats); - break; + opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats); + break; #endif #ifdef ART_ENABLE_CODEGEN_x86_64 case OptimizationPass::kInstructionSimplifierX86_64: diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index 134e3cdc7a..57c5f4639c 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -93,6 +93,9 @@ enum class OptimizationPass { #ifdef ART_ENABLE_CODEGEN_arm64 kInstructionSimplifierArm64, #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 + kCriticalNativeAbiFixupRiscv64, +#endif #ifdef ART_ENABLE_CODEGEN_x86 kPcRelativeFixupsX86, kInstructionSimplifierX86, diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index f12e748941..9df4932f3c 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -35,9 +35,6 @@ namespace vixl32 = vixl::aarch32; namespace art HIDDEN { -// Run the tests only on host. -#ifndef ART_TARGET_ANDROID - class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { public: // Enable this flag to generate the expected outputs. @@ -89,7 +86,7 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { void Finish() { code_gen_->GenerateFrameExit(); - code_gen_->Finalize(&code_allocator_); + code_gen_->Finalize(); } void Check(InstructionSet isa, @@ -97,7 +94,7 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { const std::vector<uint8_t>& expected_asm, const std::vector<uint8_t>& expected_cfi) { // Get the outputs. - ArrayRef<const uint8_t> actual_asm = code_allocator_.GetMemory(); + ArrayRef<const uint8_t> actual_asm = code_gen_->GetCode(); Assembler* opt_asm = code_gen_->GetAssembler(); ArrayRef<const uint8_t> actual_cfi(*(opt_asm->cfi().data())); @@ -123,27 +120,9 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { } private: - class InternalCodeAllocator : public CodeAllocator { - public: - InternalCodeAllocator() {} - - uint8_t* Allocate(size_t size) override { - memory_.resize(size); - return memory_.data(); - } - - ArrayRef<const uint8_t> GetMemory() const override { return ArrayRef<const uint8_t>(memory_); } - - private: - std::vector<uint8_t> memory_; - - DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator); - }; - HGraph* graph_; std::unique_ptr<CodeGenerator> code_gen_; ArenaVector<HBasicBlock*> blocks_; - InternalCodeAllocator code_allocator_; }; #define TEST_ISA(isa) \ @@ -162,26 +141,15 @@ TEST_ISA(kThumb2) #endif #ifdef ART_ENABLE_CODEGEN_arm64 -// Run the tests for ARM64 only with Baker read barriers, as the +// Run the tests for ARM64 only if the Marking Register is reserved as the // expected generated code saves and restore X21 and X22 (instead of // X20 and X21), as X20 is used as Marking Register in the Baker read // barrier configuration, and as such is removed from the set of // callee-save registers in the ARM64 code generator of the Optimizing // compiler. -// -// We can't use compile-time macros for read-barrier as the introduction -// of userfaultfd-GC has made it a runtime choice. -TEST_F(OptimizingCFITest, kArm64) { - if (kUseBakerReadBarrier && gUseReadBarrier) { - std::vector<uint8_t> expected_asm( - expected_asm_kArm64, - expected_asm_kArm64 + arraysize(expected_asm_kArm64)); - std::vector<uint8_t> expected_cfi( - expected_cfi_kArm64, - expected_cfi_kArm64 + arraysize(expected_cfi_kArm64)); - TestImpl(InstructionSet::kArm64, "kArm64", expected_asm, expected_cfi); - } -} +#if defined(RESERVE_MARKING_REGISTER) +TEST_ISA(kArm64) +#endif #endif #ifdef ART_ENABLE_CODEGEN_x86 @@ -217,6 +185,4 @@ TEST_F(OptimizingCFITest, kThumb2Adjust) { } #endif -#endif // ART_TARGET_ANDROID - } // namespace art diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 00eb6e5c42..d458462226 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -53,6 +53,7 @@ #include "oat_quick_method_header.h" #include "optimizing/write_barrier_elimination.h" #include "prepare_for_register_allocation.h" +#include "profiling_info_builder.h" #include "reference_type_propagation.h" #include "register_allocator_linear_scan.h" #include "select_generator.h" @@ -69,28 +70,6 @@ static constexpr size_t kArenaAllocatorMemoryReportThreshold = 8 * MB; static constexpr const char* kPassNameSeparator = "$"; /** - * Used by the code generator, to allocate the code in a vector. - */ -class CodeVectorAllocator final : public CodeAllocator { - public: - explicit CodeVectorAllocator(ArenaAllocator* allocator) - : memory_(allocator->Adapter(kArenaAllocCodeBuffer)) {} - - uint8_t* Allocate(size_t size) override { - memory_.resize(size); - return &memory_[0]; - } - - ArrayRef<const uint8_t> GetMemory() const override { return ArrayRef<const uint8_t>(memory_); } - uint8_t* GetData() { return memory_.data(); } - - private: - ArenaVector<uint8_t> memory_; - - DISALLOW_COPY_AND_ASSIGN(CodeVectorAllocator); -}; - -/** * Filter to apply to the visualizer. Methods whose name contain that filter will * be dumped. */ @@ -361,7 +340,6 @@ class OptimizingCompiler final : public Compiler { // Create a 'CompiledMethod' for an optimized graph. CompiledMethod* Emit(ArenaAllocator* allocator, - CodeVectorAllocator* code_allocator, CodeGenerator* codegen, bool is_intrinsic, const dex::CodeItem* item) const; @@ -372,10 +350,8 @@ class OptimizingCompiler final : public Compiler { // 1) Builds the graph. Returns null if it failed to build it. // 2) Transforms the graph to SSA. Returns null if it failed. // 3) Runs optimizations on the graph, including register allocator. - // 4) Generates code with the `code_allocator` provided. CodeGenerator* TryCompile(ArenaAllocator* allocator, ArenaStack* arena_stack, - CodeVectorAllocator* code_allocator, const DexCompilationUnit& dex_compilation_unit, ArtMethod* method, CompilationKind compilation_kind, @@ -383,7 +359,6 @@ class OptimizingCompiler final : public Compiler { CodeGenerator* TryCompileIntrinsic(ArenaAllocator* allocator, ArenaStack* arena_stack, - CodeVectorAllocator* code_allocator, const DexCompilationUnit& dex_compilation_unit, ArtMethod* method, VariableSizedHandleScope* handles) const; @@ -440,24 +415,33 @@ void OptimizingCompiler::DumpInstructionSetFeaturesToCfg() const { std::string isa_string = std::string("isa:") + GetInstructionSetString(features->GetInstructionSet()); std::string features_string = "isa_features:" + features->GetFeatureString(); + std::string read_barrier_type = "none"; + if (compiler_options.EmitReadBarrier()) { + if (art::kUseBakerReadBarrier) + read_barrier_type = "baker"; + else if (art::kUseTableLookupReadBarrier) + read_barrier_type = "tablelookup"; + } + std::string read_barrier_string = ART_FORMAT("read_barrier_type:{}", read_barrier_type); // It is assumed that visualizer_output_ is empty when calling this function, hence the fake // compilation block containing the ISA features will be printed at the beginning of the .cfg // file. - *visualizer_output_ - << HGraphVisualizer::InsertMetaDataAsCompilationBlock(isa_string + ' ' + features_string); + *visualizer_output_ << HGraphVisualizer::InsertMetaDataAsCompilationBlock( + isa_string + ' ' + features_string + ' ' + read_barrier_string); } -bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx ATTRIBUTE_UNUSED, - const DexFile& dex_file ATTRIBUTE_UNUSED) const { +bool OptimizingCompiler::CanCompileMethod([[maybe_unused]] uint32_t method_idx, + [[maybe_unused]] const DexFile& dex_file) const { return true; } static bool IsInstructionSetSupported(InstructionSet instruction_set) { - return instruction_set == InstructionSet::kArm - || instruction_set == InstructionSet::kArm64 - || instruction_set == InstructionSet::kThumb2 - || instruction_set == InstructionSet::kX86 - || instruction_set == InstructionSet::kX86_64; + return instruction_set == InstructionSet::kArm || + instruction_set == InstructionSet::kArm64 || + instruction_set == InstructionSet::kThumb2 || + instruction_set == InstructionSet::kRiscv64 || + instruction_set == InstructionSet::kX86 || + instruction_set == InstructionSet::kX86_64; } bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph, @@ -469,7 +453,7 @@ bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph, case InstructionSet::kThumb2: case InstructionSet::kArm: { OptimizationDef arm_optimizations[] = { - OptDef(OptimizationPass::kCriticalNativeAbiFixupArm), + OptDef(OptimizationPass::kCriticalNativeAbiFixupArm), }; return RunOptimizations(graph, codegen, @@ -478,10 +462,22 @@ bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph, arm_optimizations); } #endif +#if defined(ART_ENABLE_CODEGEN_riscv64) + case InstructionSet::kRiscv64: { + OptimizationDef riscv64_optimizations[] = { + OptDef(OptimizationPass::kCriticalNativeAbiFixupRiscv64), + }; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + riscv64_optimizations); + } +#endif #ifdef ART_ENABLE_CODEGEN_x86 case InstructionSet::kX86: { OptimizationDef x86_optimizations[] = { - OptDef(OptimizationPass::kPcRelativeFixupsX86), + OptDef(OptimizationPass::kPcRelativeFixupsX86), }; return RunOptimizations(graph, codegen, @@ -508,11 +504,11 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, case InstructionSet::kThumb2: case InstructionSet::kArm: { OptimizationDef arm_optimizations[] = { - OptDef(OptimizationPass::kInstructionSimplifierArm), - OptDef(OptimizationPass::kSideEffectsAnalysis), - OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), - OptDef(OptimizationPass::kCriticalNativeAbiFixupArm), - OptDef(OptimizationPass::kScheduling) + OptDef(OptimizationPass::kInstructionSimplifierArm), + OptDef(OptimizationPass::kSideEffectsAnalysis), + OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), + OptDef(OptimizationPass::kCriticalNativeAbiFixupArm), + OptDef(OptimizationPass::kScheduling) }; return RunOptimizations(graph, codegen, @@ -524,10 +520,10 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, #ifdef ART_ENABLE_CODEGEN_arm64 case InstructionSet::kArm64: { OptimizationDef arm64_optimizations[] = { - OptDef(OptimizationPass::kInstructionSimplifierArm64), - OptDef(OptimizationPass::kSideEffectsAnalysis), - OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), - OptDef(OptimizationPass::kScheduling) + OptDef(OptimizationPass::kInstructionSimplifierArm64), + OptDef(OptimizationPass::kSideEffectsAnalysis), + OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), + OptDef(OptimizationPass::kScheduling) }; return RunOptimizations(graph, codegen, @@ -536,14 +532,28 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, arm64_optimizations); } #endif +#if defined(ART_ENABLE_CODEGEN_riscv64) + case InstructionSet::kRiscv64: { + OptimizationDef riscv64_optimizations[] = { + OptDef(OptimizationPass::kSideEffectsAnalysis), + OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), + OptDef(OptimizationPass::kCriticalNativeAbiFixupRiscv64) + }; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + riscv64_optimizations); + } +#endif #ifdef ART_ENABLE_CODEGEN_x86 case InstructionSet::kX86: { OptimizationDef x86_optimizations[] = { - OptDef(OptimizationPass::kInstructionSimplifierX86), - OptDef(OptimizationPass::kSideEffectsAnalysis), - OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), - OptDef(OptimizationPass::kPcRelativeFixupsX86), - OptDef(OptimizationPass::kX86MemoryOperandGeneration) + OptDef(OptimizationPass::kInstructionSimplifierX86), + OptDef(OptimizationPass::kSideEffectsAnalysis), + OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), + OptDef(OptimizationPass::kPcRelativeFixupsX86), + OptDef(OptimizationPass::kX86MemoryOperandGeneration) }; return RunOptimizations(graph, codegen, @@ -555,10 +565,10 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, #ifdef ART_ENABLE_CODEGEN_x86_64 case InstructionSet::kX86_64: { OptimizationDef x86_64_optimizations[] = { - OptDef(OptimizationPass::kInstructionSimplifierX86_64), - OptDef(OptimizationPass::kSideEffectsAnalysis), - OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), - OptDef(OptimizationPass::kX86MemoryOperandGeneration) + OptDef(OptimizationPass::kInstructionSimplifierX86_64), + OptDef(OptimizationPass::kSideEffectsAnalysis), + OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), + OptDef(OptimizationPass::kX86MemoryOperandGeneration) }; return RunOptimizations(graph, codegen, @@ -633,68 +643,68 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, } OptimizationDef optimizations[] = { - // Initial optimizations. - OptDef(OptimizationPass::kConstantFolding), - OptDef(OptimizationPass::kInstructionSimplifier), - OptDef(OptimizationPass::kDeadCodeElimination, - "dead_code_elimination$initial"), - // Inlining. - OptDef(OptimizationPass::kInliner), - // Simplification (if inlining occurred, or if we analyzed the invoke as "always throwing"). - OptDef(OptimizationPass::kConstantFolding, - "constant_folding$after_inlining", - OptimizationPass::kInliner), - OptDef(OptimizationPass::kInstructionSimplifier, - "instruction_simplifier$after_inlining", - OptimizationPass::kInliner), - OptDef(OptimizationPass::kDeadCodeElimination, - "dead_code_elimination$after_inlining", - OptimizationPass::kInliner), - // GVN. - OptDef(OptimizationPass::kSideEffectsAnalysis, - "side_effects$before_gvn"), - OptDef(OptimizationPass::kGlobalValueNumbering), - // Simplification (TODO: only if GVN occurred). - OptDef(OptimizationPass::kSelectGenerator), - OptDef(OptimizationPass::kAggressiveConstantFolding, - "constant_folding$after_gvn"), - OptDef(OptimizationPass::kInstructionSimplifier, - "instruction_simplifier$after_gvn"), - OptDef(OptimizationPass::kDeadCodeElimination, - "dead_code_elimination$after_gvn"), - // High-level optimizations. - OptDef(OptimizationPass::kSideEffectsAnalysis, - "side_effects$before_licm"), - OptDef(OptimizationPass::kInvariantCodeMotion), - OptDef(OptimizationPass::kInductionVarAnalysis), - OptDef(OptimizationPass::kBoundsCheckElimination), - OptDef(OptimizationPass::kLoopOptimization), - // Simplification. - OptDef(OptimizationPass::kConstantFolding, - "constant_folding$after_loop_opt"), - OptDef(OptimizationPass::kAggressiveInstructionSimplifier, - "instruction_simplifier$after_loop_opt"), - OptDef(OptimizationPass::kDeadCodeElimination, - "dead_code_elimination$after_loop_opt"), - // Other high-level optimizations. - OptDef(OptimizationPass::kLoadStoreElimination), - OptDef(OptimizationPass::kCHAGuardOptimization), - OptDef(OptimizationPass::kCodeSinking), - // Simplification. - OptDef(OptimizationPass::kConstantFolding, - "constant_folding$before_codegen"), - // The codegen has a few assumptions that only the instruction simplifier - // can satisfy. For example, the code generator does not expect to see a - // HTypeConversion from a type to the same type. - OptDef(OptimizationPass::kAggressiveInstructionSimplifier, - "instruction_simplifier$before_codegen"), - // Simplification may result in dead code that should be removed prior to - // code generation. - OptDef(OptimizationPass::kDeadCodeElimination, - "dead_code_elimination$before_codegen"), - // Eliminate constructor fences after code sinking to avoid - // complicated sinking logic to split a fence with many inputs. - OptDef(OptimizationPass::kConstructorFenceRedundancyElimination) + // Initial optimizations. + OptDef(OptimizationPass::kConstantFolding), + OptDef(OptimizationPass::kInstructionSimplifier), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$initial"), + // Inlining. + OptDef(OptimizationPass::kInliner), + // Simplification (if inlining occurred, or if we analyzed the invoke as "always throwing"). + OptDef(OptimizationPass::kConstantFolding, + "constant_folding$after_inlining", + OptimizationPass::kInliner), + OptDef(OptimizationPass::kInstructionSimplifier, + "instruction_simplifier$after_inlining", + OptimizationPass::kInliner), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$after_inlining", + OptimizationPass::kInliner), + // GVN. + OptDef(OptimizationPass::kSideEffectsAnalysis, + "side_effects$before_gvn"), + OptDef(OptimizationPass::kGlobalValueNumbering), + // Simplification (TODO: only if GVN occurred). + OptDef(OptimizationPass::kSelectGenerator), + OptDef(OptimizationPass::kAggressiveConstantFolding, + "constant_folding$after_gvn"), + OptDef(OptimizationPass::kInstructionSimplifier, + "instruction_simplifier$after_gvn"), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$after_gvn"), + // High-level optimizations. + OptDef(OptimizationPass::kSideEffectsAnalysis, + "side_effects$before_licm"), + OptDef(OptimizationPass::kInvariantCodeMotion), + OptDef(OptimizationPass::kInductionVarAnalysis), + OptDef(OptimizationPass::kBoundsCheckElimination), + OptDef(OptimizationPass::kLoopOptimization), + // Simplification. + OptDef(OptimizationPass::kConstantFolding, + "constant_folding$after_loop_opt"), + OptDef(OptimizationPass::kAggressiveInstructionSimplifier, + "instruction_simplifier$after_loop_opt"), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$after_loop_opt"), + // Other high-level optimizations. + OptDef(OptimizationPass::kLoadStoreElimination), + OptDef(OptimizationPass::kCHAGuardOptimization), + OptDef(OptimizationPass::kCodeSinking), + // Simplification. + OptDef(OptimizationPass::kConstantFolding, + "constant_folding$before_codegen"), + // The codegen has a few assumptions that only the instruction simplifier + // can satisfy. For example, the code generator does not expect to see a + // HTypeConversion from a type to the same type. + OptDef(OptimizationPass::kAggressiveInstructionSimplifier, + "instruction_simplifier$before_codegen"), + // Simplification may result in dead code that should be removed prior to + // code generation. + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$before_codegen"), + // Eliminate constructor fences after code sinking to avoid + // complicated sinking logic to split a fence with many inputs. + OptDef(OptimizationPass::kConstructorFenceRedundancyElimination) }; RunOptimizations(graph, codegen, @@ -719,7 +729,6 @@ static ArenaVector<linker::LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* } CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator, - CodeVectorAllocator* code_allocator, CodeGenerator* codegen, bool is_intrinsic, const dex::CodeItem* code_item_for_osr_check) const { @@ -729,7 +738,7 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator, CompiledCodeStorage* storage = GetCompiledCodeStorage(); CompiledMethod* compiled_method = storage->CreateCompiledMethod( codegen->GetInstructionSet(), - code_allocator->GetMemory(), + codegen->GetCode(), ArrayRef<const uint8_t>(stack_map), ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), ArrayRef<const linker::LinkerPatch>(linker_patches), @@ -749,7 +758,6 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator, CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, ArenaStack* arena_stack, - CodeVectorAllocator* code_allocator, const DexCompilationUnit& dex_compilation_unit, ArtMethod* method, CompilationKind compilation_kind, @@ -828,8 +836,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, jit::Jit* jit = Runtime::Current()->GetJit(); if (jit != nullptr) { ProfilingInfo* info = jit->GetCodeCache()->GetProfilingInfo(method, Thread::Current()); - DCHECK_IMPLIES(compilation_kind == CompilationKind::kBaseline, info != nullptr) - << "Compiling a method baseline should always have a ProfilingInfo"; graph->SetProfilingInfo(info); } @@ -913,8 +919,23 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, &pass_observer, regalloc_strategy, compilation_stats_.get()); + // If we are compiling baseline and we haven't created a profiling info for + // this method already, do it now. + if (jit != nullptr && + compilation_kind == CompilationKind::kBaseline && + graph->GetProfilingInfo() == nullptr) { + ProfilingInfoBuilder( + graph, codegen->GetCompilerOptions(), codegen.get(), compilation_stats_.get()).Run(); + // We expect a profiling info to be created and attached to the graph. + // However, we may have run out of memory trying to create it, so in this + // case just abort the compilation. + if (graph->GetProfilingInfo() == nullptr) { + MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit); + return nullptr; + } + } - codegen->Compile(code_allocator); + codegen->Compile(); pass_observer.DumpDisassembly(); MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledBytecode); @@ -924,7 +945,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( ArenaAllocator* allocator, ArenaStack* arena_stack, - CodeVectorAllocator* code_allocator, const DexCompilationUnit& dex_compilation_unit, ArtMethod* method, VariableSizedHandleScope* handles) const { @@ -986,9 +1006,9 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( } OptimizationDef optimizations[] = { - // The codegen has a few assumptions that only the instruction simplifier - // can satisfy. - OptDef(OptimizationPass::kInstructionSimplifier), + // The codegen has a few assumptions that only the instruction simplifier + // can satisfy. + OptDef(OptimizationPass::kInstructionSimplifier), }; RunOptimizations(graph, codegen.get(), @@ -1013,7 +1033,7 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( return nullptr; } - codegen->Compile(code_allocator); + codegen->Compile(); pass_observer.DumpDisassembly(); VLOG(compiler) << "Compiled intrinsic: " << method->GetIntrinsic() @@ -1037,7 +1057,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item, DCHECK(runtime->IsAotCompiler()); ArenaAllocator allocator(runtime->GetArenaPool()); ArenaStack arena_stack(runtime->GetArenaPool()); - CodeVectorAllocator code_allocator(&allocator); std::unique_ptr<CodeGenerator> codegen; bool compiled_intrinsic = false; { @@ -1071,7 +1090,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item, codegen.reset( TryCompileIntrinsic(&allocator, &arena_stack, - &code_allocator, dex_compilation_unit, method, &handles)); @@ -1083,7 +1101,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item, codegen.reset( TryCompile(&allocator, &arena_stack, - &code_allocator, dex_compilation_unit, method, compiler_options.IsBaseline() @@ -1094,7 +1111,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item, } if (codegen.get() != nullptr) { compiled_method = Emit(&allocator, - &code_allocator, codegen.get(), compiled_intrinsic, compiled_intrinsic ? nullptr : code_item); @@ -1177,19 +1193,16 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, /*verified_method=*/ nullptr, dex_cache, compiling_class); - CodeVectorAllocator code_allocator(&allocator); // Go to native so that we don't block GC during compilation. ScopedThreadSuspension sts(soa.Self(), ThreadState::kNative); std::unique_ptr<CodeGenerator> codegen( TryCompileIntrinsic(&allocator, &arena_stack, - &code_allocator, dex_compilation_unit, method, &handles)); if (codegen != nullptr) { return Emit(&allocator, - &code_allocator, codegen.get(), /*is_intrinsic=*/ true, /*item=*/ nullptr); @@ -1221,7 +1234,7 @@ Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options, return new OptimizingCompiler(compiler_options, storage); } -bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) { +bool EncodeArtMethodInInlineInfo([[maybe_unused]] ArtMethod* method) { // Note: the runtime is null only for unit testing. return Runtime::Current() == nullptr || !Runtime::Current()->IsAotCompiler(); } @@ -1328,7 +1341,6 @@ bool OptimizingCompiler::JitCompile(Thread* self, debug_info, /* is_full_debug_info= */ compiler_options.GetGenerateDebugInfo(), compilation_kind, - /* has_should_deoptimize_flag= */ false, cha_single_implementation_list)) { code_cache->Free(self, region, reserved_code.data(), reserved_data.data()); return false; @@ -1342,7 +1354,6 @@ bool OptimizingCompiler::JitCompile(Thread* self, } ArenaStack arena_stack(runtime->GetJitArenaPool()); - CodeVectorAllocator code_allocator(&allocator); VariableSizedHandleScope handles(self); std::unique_ptr<CodeGenerator> codegen; @@ -1365,7 +1376,6 @@ bool OptimizingCompiler::JitCompile(Thread* self, codegen.reset( TryCompile(&allocator, &arena_stack, - &code_allocator, dex_compilation_unit, method, compilation_kind, @@ -1381,7 +1391,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, ArrayRef<const uint8_t> reserved_data; if (!code_cache->Reserve(self, region, - code_allocator.GetMemory().size(), + codegen->GetAssembler()->CodeSize(), stack_map.size(), /*number_of_roots=*/codegen->GetNumberOfJitRoots(), method, @@ -1394,7 +1404,9 @@ bool OptimizingCompiler::JitCompile(Thread* self, const uint8_t* roots_data = reserved_data.data(); std::vector<Handle<mirror::Object>> roots; - codegen->EmitJitRoots(code_allocator.GetData(), roots_data, &roots); + codegen->EmitJitRoots(const_cast<uint8_t*>(codegen->GetAssembler()->CodeBufferBaseAddress()), + roots_data, + &roots); // The root Handle<>s filled by the codegen reference entries in the VariableSizedHandleScope. DCHECK(std::all_of(roots.begin(), roots.end(), @@ -1418,7 +1430,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.is_optimized = true; info.is_code_address_text_relative = false; info.code_address = reinterpret_cast<uintptr_t>(code); - info.code_size = code_allocator.GetMemory().size(); + info.code_size = codegen->GetAssembler()->CodeSize(), info.frame_size_in_bytes = codegen->GetFrameSize(); info.code_info = stack_map.size() == 0 ? nullptr : stack_map.data(); info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()); @@ -1429,22 +1441,23 @@ bool OptimizingCompiler::JitCompile(Thread* self, region, method, reserved_code, - code_allocator.GetMemory(), + codegen->GetCode(), reserved_data, roots, ArrayRef<const uint8_t>(stack_map), debug_info, /* is_full_debug_info= */ compiler_options.GetGenerateDebugInfo(), compilation_kind, - codegen->GetGraph()->HasShouldDeoptimizeFlag(), codegen->GetGraph()->GetCHASingleImplementationList())) { + CHECK_EQ(CodeInfo::HasShouldDeoptimizeFlag(stack_map.data()), + codegen->GetGraph()->HasShouldDeoptimizeFlag()); code_cache->Free(self, region, reserved_code.data(), reserved_data.data()); return false; } Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); if (jit_logger != nullptr) { - jit_logger->WriteLog(code, code_allocator.GetMemory().size(), method); + jit_logger->WriteLog(code, codegen->GetAssembler()->CodeSize(), method); } if (kArenaAllocatorCountAllocations) { diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index a1d0a5a845..4549af3cbf 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -47,6 +47,7 @@ enum class MethodCompilationStat { kUnresolvedFieldNotAFastAccess, kRemovedCheckedCast, kRemovedDeadInstruction, + kRemovedDeadPhi, kRemovedTry, kRemovedNullCheck, kNotCompiledSkipped, @@ -130,8 +131,6 @@ enum class MethodCompilationStat { kPartialLSEPossible, kPartialStoreRemoved, kPartialAllocationMoved, - kPredicatedLoadAdded, - kPredicatedStoreAdded, kDevirtualized, kLastStat }; diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 2e05c41f01..77e6420df8 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -244,7 +244,6 @@ class OptimizingUnitTestHelper { auto container = std::make_shared<MemoryDexFileContainer>(dex_data, sizeof(StandardDexFile::Header)); dex_files_.emplace_back(new StandardDexFile(dex_data, - sizeof(StandardDexFile::Header), "no_location", /*location_checksum*/ 0, /*oat_dex_file*/ nullptr, diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index a1c05e9cad..d2b993280d 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -81,8 +81,8 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { message_ << ")"; } - void SpillScratch(int reg ATTRIBUTE_UNUSED) override {} - void RestoreScratch(int reg ATTRIBUTE_UNUSED) override {} + void SpillScratch([[maybe_unused]] int reg) override {} + void RestoreScratch([[maybe_unused]] int reg) override {} std::string GetMessage() const { return message_.str(); @@ -126,7 +126,7 @@ class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap { return scratch; } - void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) override {} + void FreeScratchLocation([[maybe_unused]] Location loc) override {} void EmitMove(size_t index) override { MoveOperands* move = moves_[index]; diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index d3da3d3ce1..c2d5ec7b60 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -62,7 +62,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor { } void VisitReturn(HReturn* ret) override { - HConstant* value = ret->InputAt(0)->AsConstant(); + HConstant* value = ret->InputAt(0)->AsConstantOrNull(); if ((value != nullptr && DataType::IsFloatingPointType(value->GetType()))) { ReplaceInput(ret, value, 0, true); } @@ -95,7 +95,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor { } void BinaryFP(HBinaryOperation* bin) { - HConstant* rhs = bin->InputAt(1)->AsConstant(); + HConstant* rhs = bin->InputAt(1)->AsConstantOrNull(); if (rhs != nullptr && DataType::IsFloatingPointType(rhs->GetType())) { ReplaceInput(bin, rhs, 1, false); } @@ -193,7 +193,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor { } void HandleInvoke(HInvoke* invoke) { - HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); + HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirectOrNull(); // If this is an invoke-static/-direct with PC-relative addressing (within boot image // or using .bss or .data.bimg.rel.ro), we need the PC-relative address base. @@ -207,7 +207,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor { base_added = true; } - HInvokeInterface* invoke_interface = invoke->AsInvokeInterface(); + HInvokeInterface* invoke_interface = invoke->AsInvokeInterfaceOrNull(); if (invoke_interface != nullptr && IsPcRelativeMethodLoadKind(invoke_interface->GetHiddenArgumentLoadKind())) { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(invoke); @@ -219,7 +219,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor { // Ensure that we can load FP arguments from the constant area. HInputsRef inputs = invoke->GetInputs(); for (size_t i = 0; i < inputs.size(); i++) { - HConstant* input = inputs[i]->AsConstant(); + HConstant* input = inputs[i]->AsConstantOrNull(); if (input != nullptr && DataType::IsFloatingPointType(input->GetType())) { ReplaceInput(invoke, input, i, true); } @@ -235,6 +235,9 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor { LOG(FATAL) << "Unreachable min/max/abs: intrinsics should have been lowered " "to IR nodes by instruction simplifier"; UNREACHABLE(); + case Intrinsics::kByteValueOf: + case Intrinsics::kShortValueOf: + case Intrinsics::kCharacterValueOf: case Intrinsics::kIntegerValueOf: // This intrinsic can be call free if it loads the address of the boot image object. // If we're compiling PIC, we need the address base for loading from .data.bimg.rel.ro. diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 398b10abf3..1e99732d03 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -180,6 +180,11 @@ bool PrepareForRegisterAllocation::CanEmitConditionAt(HCondition* condition, return false; } + if (GetGraph()->IsCompilingBaseline() && compiler_options_.ProfileBranches()) { + // To do branch profiling, we cannot emit conditions at use site. + return false; + } + if (user->IsIf() || user->IsDeoptimize()) { return true; } diff --git a/compiler/optimizing/profiling_info_builder.cc b/compiler/optimizing/profiling_info_builder.cc new file mode 100644 index 0000000000..7888753830 --- /dev/null +++ b/compiler/optimizing/profiling_info_builder.cc @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "profiling_info_builder.h" + +#include "art_method-inl.h" +#include "code_generator.h" +#include "driver/compiler_options.h" +#include "dex/code_item_accessors-inl.h" +#include "jit/profiling_info.h" +#include "optimizing_compiler_stats.h" +#include "scoped_thread_state_change-inl.h" + +namespace art HIDDEN { + +void ProfilingInfoBuilder::Run() { + DCHECK_EQ(GetGraph()->GetProfilingInfo(), nullptr); + // Order does not matter. + for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) { + // No need to visit the phis. + for (HInstructionIteratorHandleChanges inst_it(block->GetInstructions()); !inst_it.Done(); + inst_it.Advance()) { + inst_it.Current()->Accept(this); + } + } + + ScopedObjectAccess soa(Thread::Current()); + GetGraph()->SetProfilingInfo( + ProfilingInfo::Create(soa.Self(), GetGraph()->GetArtMethod(), inline_caches_)); +} + +void ProfilingInfoBuilder::HandleInvoke(HInvoke* invoke) { + DCHECK(!invoke->GetEnvironment()->IsFromInlinedInvoke()); + if (IsInlineCacheUseful(invoke, codegen_)) { + inline_caches_.push_back(invoke->GetDexPc()); + } +} + +void ProfilingInfoBuilder::VisitInvokeInterface(HInvokeInterface* invoke) { + HandleInvoke(invoke); +} + +void ProfilingInfoBuilder::VisitInvokeVirtual(HInvokeVirtual* invoke) { + HandleInvoke(invoke); +} + +bool ProfilingInfoBuilder::IsInlineCacheUseful(HInvoke* invoke, CodeGenerator* codegen) { + DCHECK(invoke->IsInvokeVirtual() || invoke->IsInvokeInterface()); + if (codegen->IsImplementedIntrinsic(invoke)) { + return false; + } + if (!invoke->GetBlock()->GetGraph()->IsCompilingBaseline()) { + return false; + } + if (Runtime::Current()->IsAotCompiler()) { + return false; + } + if (invoke->InputAt(0)->GetReferenceTypeInfo().IsExact()) { + return false; + } + if (invoke->GetResolvedMethod() != nullptr) { + ScopedObjectAccess soa(Thread::Current()); + if (invoke->GetResolvedMethod()->IsFinal() || + invoke->GetResolvedMethod()->GetDeclaringClass()->IsFinal()) { + return false; + } + } + return true; +} + +InlineCache* ProfilingInfoBuilder::GetInlineCache(ProfilingInfo* info, HInvoke* instruction) { + DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke()); + ScopedObjectAccess soa(Thread::Current()); + return info->GetInlineCache(instruction->GetDexPc()); +} + +} // namespace art diff --git a/compiler/optimizing/profiling_info_builder.h b/compiler/optimizing/profiling_info_builder.h new file mode 100644 index 0000000000..2185b0eed3 --- /dev/null +++ b/compiler/optimizing/profiling_info_builder.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_PROFILING_INFO_BUILDER_H_ +#define ART_COMPILER_OPTIMIZING_PROFILING_INFO_BUILDER_H_ + +#include "base/macros.h" +#include "nodes.h" + +namespace art HIDDEN { + +class CodeGenerator; +class CompilerOptions; +class InlineCache; +class ProfilingInfo; + +class ProfilingInfoBuilder : public HGraphDelegateVisitor { + public: + ProfilingInfoBuilder(HGraph* graph, + const CompilerOptions& compiler_options, + CodeGenerator* codegen, + OptimizingCompilerStats* stats = nullptr) + : HGraphDelegateVisitor(graph, stats), + codegen_(codegen), + compiler_options_(compiler_options) {} + + void Run(); + + static constexpr const char* kProfilingInfoBuilderPassName = + "profiling_info_builder"; + + static InlineCache* GetInlineCache(ProfilingInfo* info, HInvoke* invoke); + static bool IsInlineCacheUseful(HInvoke* invoke, CodeGenerator* codegen); + + private: + void VisitInvokeVirtual(HInvokeVirtual* invoke) override; + void VisitInvokeInterface(HInvokeInterface* invoke) override; + + void HandleInvoke(HInvoke* invoke); + + CodeGenerator* codegen_; + [[maybe_unused]] const CompilerOptions& compiler_options_; + std::vector<uint32_t> inline_caches_; + + DISALLOW_COPY_AND_ASSIGN(ProfilingInfoBuilder); +}; + +} // namespace art + + +#endif // ART_COMPILER_OPTIMIZING_PROFILING_INFO_BUILDER_H_ diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 91bae5f49b..6f44d45ed4 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -63,7 +63,6 @@ class ReferenceTypePropagation::RTPVisitor final : public HGraphDelegateVisitor void VisitLoadException(HLoadException* instr) override; void VisitNewArray(HNewArray* instr) override; void VisitParameterValue(HParameterValue* instr) override; - void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* instr) override; void VisitInstanceFieldGet(HInstanceFieldGet* instr) override; void VisitStaticFieldGet(HStaticFieldGet* instr) override; void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) override; @@ -254,7 +253,7 @@ static void BoundTypeForClassCheck(HInstruction* check) { HInstruction* input_two = compare->InputAt(1); HLoadClass* load_class = input_one->IsLoadClass() ? input_one->AsLoadClass() - : input_two->AsLoadClass(); + : input_two->AsLoadClassOrNull(); if (load_class == nullptr) { return; } @@ -266,7 +265,7 @@ static void BoundTypeForClassCheck(HInstruction* check) { } HInstruction* field_get = (load_class == input_one) ? input_two : input_one; - if (!field_get->IsInstanceFieldGet() && !field_get->IsPredicatedInstanceFieldGet()) { + if (!field_get->IsInstanceFieldGet()) { return; } HInstruction* receiver = field_get->InputAt(0); @@ -335,7 +334,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitBasicBlock(HBasicBlock* block) { } void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfNotNull(HBasicBlock* block) { - HIf* ifInstruction = block->GetLastInstruction()->AsIf(); + HIf* ifInstruction = block->GetLastInstruction()->AsIfOrNull(); if (ifInstruction == nullptr) { return; } @@ -453,7 +452,7 @@ static bool MatchIfInstanceOf(HIf* ifInstruction, // If that's the case insert an HBoundType instruction to bound the type of `x` // to `ClassX` in the scope of the dominated blocks. void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock* block) { - HIf* ifInstruction = block->GetLastInstruction()->AsIf(); + HIf* ifInstruction = block->GetLastInstruction()->AsIfOrNull(); if (ifInstruction == nullptr) { return; } @@ -539,9 +538,14 @@ void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction* DCHECK_EQ(instr->GetType(), DataType::Type::kReference); ScopedObjectAccess soa(Thread::Current()); - ObjPtr<mirror::DexCache> dex_cache = FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_); - ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->LookupResolvedType( - type_idx, dex_cache, dex_cache->GetClassLoader()); + StackHandleScope<2> hs(soa.Self()); + Handle<mirror::DexCache> dex_cache = + hs.NewHandle(FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_)); + Handle<mirror::ClassLoader> loader = hs.NewHandle(dex_cache->GetClassLoader()); + ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->ResolveType( + type_idx, dex_cache, loader); + DCHECK_EQ(klass == nullptr, soa.Self()->IsExceptionPending()); + soa.Self()->ClearException(); // Clean up the exception left by type resolution if any. SetClassAsTypeInfo(instr, klass, is_exact); } @@ -582,11 +586,6 @@ void ReferenceTypePropagation::RTPVisitor::UpdateFieldAccessTypeInfo(HInstructio SetClassAsTypeInfo(instr, klass, /* is_exact= */ false); } -void ReferenceTypePropagation::RTPVisitor::VisitPredicatedInstanceFieldGet( - HPredicatedInstanceFieldGet* instr) { - UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo()); -} - void ReferenceTypePropagation::RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) { UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo()); } @@ -704,7 +703,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) { } void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) { - HBoundType* bound_type = check_cast->GetNext()->AsBoundType(); + HBoundType* bound_type = check_cast->GetNext()->AsBoundTypeOrNull(); if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) { // The next instruction is not an uninitialized BoundType. This must be // an RTP pass after SsaBuilder and we do not need to do anything. diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc index 2b012fcd67..ffd94e56b5 100644 --- a/compiler/optimizing/reference_type_propagation_test.cc +++ b/compiler/optimizing/reference_type_propagation_test.cc @@ -468,7 +468,7 @@ TEST_P(LoopReferenceTypePropagationTestGroup, RunVisitTest) { LoopOptions lo(GetParam()); std::default_random_engine g( lo.initial_null_state_ != InitialNullState::kTrueRandom ? 42 : std::rand()); - std::uniform_int_distribution<bool> uid(false, true); + std::uniform_int_distribution<int> uid(0, 1); RunVisitListTest([&](std::vector<HInstruction*>& lst, HInstruction* null_input) { auto pred_null = false; auto next_null = [&]() { @@ -482,7 +482,7 @@ TEST_P(LoopReferenceTypePropagationTestGroup, RunVisitTest) { return pred_null; case InitialNullState::kRandomSetSeed: case InitialNullState::kTrueRandom: - return uid(g); + return uid(g) > 0; } }; HPhi* nulled_phi = lo.null_insertion_ >= 0 ? lst[lo.null_insertion_]->AsPhi() : nullptr; diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index 53e11f2c3d..a4b1698b8d 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -531,9 +531,9 @@ void RegisterAllocationResolver::AddInputMoveFor(HInstruction* input, HInstruction* previous = user->GetPrevious(); HParallelMove* move = nullptr; - if (previous == nullptr - || !previous->IsParallelMove() - || previous->GetLifetimePosition() < user->GetLifetimePosition()) { + if (previous == nullptr || + !previous->IsParallelMove() || + previous->GetLifetimePosition() < user->GetLifetimePosition()) { move = new (allocator_) HParallelMove(allocator_); move->SetLifetimePosition(user->GetLifetimePosition()); user->GetBlock()->InsertInstructionBefore(move, user); @@ -593,7 +593,7 @@ void RegisterAllocationResolver::InsertParallelMoveAt(size_t position, } else if (IsInstructionEnd(position)) { // Move must happen after the instruction. DCHECK(!at->IsControlFlow()); - move = at->GetNext()->AsParallelMove(); + move = at->GetNext()->AsParallelMoveOrNull(); // This is a parallel move for connecting siblings in a same block. We need to // differentiate it with moves for connecting blocks, and input moves. if (move == nullptr || move->GetLifetimePosition() > position) { @@ -604,15 +604,15 @@ void RegisterAllocationResolver::InsertParallelMoveAt(size_t position, } else { // Move must happen before the instruction. HInstruction* previous = at->GetPrevious(); - if (previous == nullptr - || !previous->IsParallelMove() - || previous->GetLifetimePosition() != position) { + if (previous == nullptr || + !previous->IsParallelMove() || + previous->GetLifetimePosition() != position) { // If the previous is a parallel move, then its position must be lower // than the given `position`: it was added just after the non-parallel // move instruction that precedes `instruction`. - DCHECK(previous == nullptr - || !previous->IsParallelMove() - || previous->GetLifetimePosition() < position); + DCHECK(previous == nullptr || + !previous->IsParallelMove() || + previous->GetLifetimePosition() < position); move = new (allocator_) HParallelMove(allocator_); move->SetLifetimePosition(position); at->GetBlock()->InsertInstructionBefore(move, at); @@ -643,8 +643,9 @@ void RegisterAllocationResolver::InsertParallelMoveAtExitOf(HBasicBlock* block, // This is a parallel move for connecting blocks. We need to differentiate // it with moves for connecting siblings in a same block, and output moves. size_t position = last->GetLifetimePosition(); - if (previous == nullptr || !previous->IsParallelMove() - || previous->AsParallelMove()->GetLifetimePosition() != position) { + if (previous == nullptr || + !previous->IsParallelMove() || + previous->AsParallelMove()->GetLifetimePosition() != position) { move = new (allocator_) HParallelMove(allocator_); move->SetLifetimePosition(position); block->InsertInstructionBefore(move, last); @@ -662,7 +663,7 @@ void RegisterAllocationResolver::InsertParallelMoveAtEntryOf(HBasicBlock* block, if (source.Equals(destination)) return; HInstruction* first = block->GetFirstInstruction(); - HParallelMove* move = first->AsParallelMove(); + HParallelMove* move = first->AsParallelMoveOrNull(); size_t position = block->GetLifetimeStart(); // This is a parallel move for connecting blocks. We need to differentiate // it with moves for connecting siblings in a same block, and input moves. @@ -686,7 +687,7 @@ void RegisterAllocationResolver::InsertMoveAfter(HInstruction* instruction, } size_t position = instruction->GetLifetimePosition() + 1; - HParallelMove* move = instruction->GetNext()->AsParallelMove(); + HParallelMove* move = instruction->GetNext()->AsParallelMoveOrNull(); // This is a parallel move for moving the output of an instruction. We need // to differentiate with input moves, moves for connecting siblings in a // and moves for connecting blocks. diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index e4c2d74908..f8b057d4a8 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -23,7 +23,6 @@ #include "base/scoped_arena_containers.h" #include "base/bit_vector-inl.h" #include "code_generator.h" -#include "register_allocator_graph_color.h" #include "register_allocator_linear_scan.h" #include "ssa_liveness_analysis.h" @@ -45,8 +44,8 @@ std::unique_ptr<RegisterAllocator> RegisterAllocator::Create(ScopedArenaAllocato return std::unique_ptr<RegisterAllocator>( new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis)); case kRegisterAllocatorGraphColor: - return std::unique_ptr<RegisterAllocator>( - new (allocator) RegisterAllocatorGraphColor(allocator, codegen, analysis)); + LOG(FATAL) << "Graph coloring register allocator has been removed."; + UNREACHABLE(); default: LOG(FATAL) << "Invalid register allocation strategy: " << strategy; UNREACHABLE(); diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc deleted file mode 100644 index a7c891d4e7..0000000000 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ /dev/null @@ -1,2086 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "register_allocator_graph_color.h" - -#include "code_generator.h" -#include "linear_order.h" -#include "register_allocation_resolver.h" -#include "ssa_liveness_analysis.h" -#include "thread-current-inl.h" - -namespace art HIDDEN { - -// Highest number of registers that we support for any platform. This can be used for std::bitset, -// for example, which needs to know its size at compile time. -static constexpr size_t kMaxNumRegs = 32; - -// The maximum number of graph coloring attempts before triggering a DCHECK. -// This is meant to catch changes to the graph coloring algorithm that undermine its forward -// progress guarantees. Forward progress for the algorithm means splitting live intervals on -// every graph coloring attempt so that eventually the interference graph will be sparse enough -// to color. The main threat to forward progress is trying to split short intervals which cannot be -// split further; this could cause infinite looping because the interference graph would never -// change. This is avoided by prioritizing short intervals before long ones, so that long -// intervals are split when coloring fails. -static constexpr size_t kMaxGraphColoringAttemptsDebug = 100; - -// We always want to avoid spilling inside loops. -static constexpr size_t kLoopSpillWeightMultiplier = 10; - -// If we avoid moves in single jump blocks, we can avoid jumps to jumps. -static constexpr size_t kSingleJumpBlockWeightMultiplier = 2; - -// We avoid moves in blocks that dominate the exit block, since these blocks will -// be executed on every path through the method. -static constexpr size_t kDominatesExitBlockWeightMultiplier = 2; - -enum class CoalesceKind { - kAdjacentSibling, // Prevents moves at interval split points. - kFixedOutputSibling, // Prevents moves from a fixed output location. - kFixedInput, // Prevents moves into a fixed input location. - kNonlinearControlFlow, // Prevents moves between blocks. - kPhi, // Prevents phi resolution moves. - kFirstInput, // Prevents a single input move. - kAnyInput, // May lead to better instruction selection / smaller encodings. -}; - -std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) { - return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind); -} - -static size_t LoopDepthAt(HBasicBlock* block) { - HLoopInformation* loop_info = block->GetLoopInformation(); - size_t depth = 0; - while (loop_info != nullptr) { - ++depth; - loop_info = loop_info->GetPreHeader()->GetLoopInformation(); - } - return depth; -} - -// Return the runtime cost of inserting a move instruction at the specified location. -static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) { - HBasicBlock* block = liveness.GetBlockFromPosition(position / 2); - DCHECK(block != nullptr); - size_t cost = 1; - if (block->IsSingleJump()) { - cost *= kSingleJumpBlockWeightMultiplier; - } - if (block->Dominates(block->GetGraph()->GetExitBlock())) { - cost *= kDominatesExitBlockWeightMultiplier; - } - for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) { - cost *= kLoopSpillWeightMultiplier; - } - return cost; -} - -// In general, we estimate coalesce priority by whether it will definitely avoid a move, -// and by how likely it is to create an interference graph that's harder to color. -static size_t ComputeCoalescePriority(CoalesceKind kind, - size_t position, - const SsaLivenessAnalysis& liveness) { - if (kind == CoalesceKind::kAnyInput) { - // This type of coalescing can affect instruction selection, but not moves, so we - // give it the lowest priority. - return 0; - } else { - return CostForMoveAt(position, liveness); - } -} - -enum class CoalesceStage { - kWorklist, // Currently in the iterative coalescing worklist. - kActive, // Not in a worklist, but could be considered again during iterative coalescing. - kInactive, // No longer considered until last-chance coalescing. - kDefunct, // Either the two nodes interfere, or have already been coalesced. -}; - -std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) { - return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage); -} - -// Represents a coalesce opportunity between two nodes. -struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> { - CoalesceOpportunity(InterferenceNode* a, - InterferenceNode* b, - CoalesceKind kind, - size_t position, - const SsaLivenessAnalysis& liveness) - : node_a(a), - node_b(b), - stage(CoalesceStage::kWorklist), - priority(ComputeCoalescePriority(kind, position, liveness)) {} - - // Compare two coalesce opportunities based on their priority. - // Return true if lhs has a lower priority than that of rhs. - static bool CmpPriority(const CoalesceOpportunity* lhs, - const CoalesceOpportunity* rhs) { - return lhs->priority < rhs->priority; - } - - InterferenceNode* const node_a; - InterferenceNode* const node_b; - - // The current stage of this coalesce opportunity, indicating whether it is in a worklist, - // and whether it should still be considered. - CoalesceStage stage; - - // The priority of this coalesce opportunity, based on heuristics. - const size_t priority; -}; - -enum class NodeStage { - kInitial, // Uninitialized. - kPrecolored, // Marks fixed nodes. - kSafepoint, // Marks safepoint nodes. - kPrunable, // Marks uncolored nodes in the interference graph. - kSimplifyWorklist, // Marks non-move-related nodes with degree less than the number of registers. - kFreezeWorklist, // Marks move-related nodes with degree less than the number of registers. - kSpillWorklist, // Marks nodes with degree greater or equal to the number of registers. - kPruned // Marks nodes already pruned from the interference graph. -}; - -std::ostream& operator<<(std::ostream& os, const NodeStage& stage) { - return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage); -} - -// Returns the estimated cost of spilling a particular live interval. -static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) { - if (interval->HasRegister()) { - // Intervals with a fixed register cannot be spilled. - return std::numeric_limits<float>::min(); - } - - size_t length = interval->GetLength(); - if (length == 1) { - // Tiny intervals should have maximum priority, since they cannot be split any further. - return std::numeric_limits<float>::max(); - } - - size_t use_weight = 0; - if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) { - // Cost for spilling at a register definition point. - use_weight += CostForMoveAt(interval->GetStart() + 1, liveness); - } - - // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e. - // [interval->GetStart() + 1, interval->GetEnd() + 1) - auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(), - interval->GetUses().end(), - interval->GetStart() + 1u, - interval->GetEnd() + 1u); - for (const UsePosition& use : matching_use_range) { - if (use.GetUser() != nullptr && use.RequiresRegister()) { - // Cost for spilling at a register use point. - use_weight += CostForMoveAt(use.GetUser()->GetLifetimePosition() - 1, liveness); - } - } - - // We divide by the length of the interval because we want to prioritize - // short intervals; we do not benefit much if we split them further. - return static_cast<float>(use_weight) / static_cast<float>(length); -} - -// Interference nodes make up the interference graph, which is the primary data structure in -// graph coloring register allocation. Each node represents a single live interval, and contains -// a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory, -// pre-colored nodes never contain outgoing edges (only incoming ones). -// -// As nodes are pruned from the interference graph, incoming edges of the pruned node are removed, -// but outgoing edges remain in order to later color the node based on the colors of its neighbors. -// -// Note that a pair interval is represented by a single node in the interference graph, which -// essentially requires two colors. One consequence of this is that the degree of a node is not -// necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum -// number of colors with which a node could interfere. We model this by giving edges different -// weights (1 or 2) to control how much it increases the degree of adjacent nodes. -// For example, the edge between two single nodes will have weight 1. On the other hand, -// the edge between a single node and a pair node will have weight 2. This is because the pair -// node could block up to two colors for the single node, and because the single node could -// block an entire two-register aligned slot for the pair node. -// The degree is defined this way because we use it to decide whether a node is guaranteed a color, -// and thus whether it is safe to prune it from the interference graph early on. -class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> { - public: - InterferenceNode(LiveInterval* interval, - const SsaLivenessAnalysis& liveness) - : stage(NodeStage::kInitial), - interval_(interval), - adjacent_nodes_(nullptr), - coalesce_opportunities_(nullptr), - out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0), - alias_(this), - spill_weight_(ComputeSpillWeight(interval, liveness)), - requires_color_(interval->RequiresRegister()), - needs_spill_slot_(false) { - DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval"; - } - - void AddInterference(InterferenceNode* other, - bool guaranteed_not_interfering_yet, - ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>>* storage) { - DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences"; - DCHECK_NE(this, other) << "Should not create self loops in the interference graph"; - DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another"; - DCHECK_NE(stage, NodeStage::kPruned); - DCHECK_NE(other->stage, NodeStage::kPruned); - if (adjacent_nodes_ == nullptr) { - ScopedArenaVector<InterferenceNode*>::allocator_type adapter(storage->get_allocator()); - storage->emplace_back(adapter); - adjacent_nodes_ = &storage->back(); - } - if (guaranteed_not_interfering_yet) { - DCHECK(!ContainsElement(GetAdjacentNodes(), other)); - adjacent_nodes_->push_back(other); - out_degree_ += EdgeWeightWith(other); - } else { - if (!ContainsElement(GetAdjacentNodes(), other)) { - adjacent_nodes_->push_back(other); - out_degree_ += EdgeWeightWith(other); - } - } - } - - void RemoveInterference(InterferenceNode* other) { - DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node"; - DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning"; - if (adjacent_nodes_ != nullptr) { - auto it = std::find(adjacent_nodes_->begin(), adjacent_nodes_->end(), other); - if (it != adjacent_nodes_->end()) { - adjacent_nodes_->erase(it); - out_degree_ -= EdgeWeightWith(other); - } - } - } - - bool ContainsInterference(InterferenceNode* other) const { - DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences"; - DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences"; - return ContainsElement(GetAdjacentNodes(), other); - } - - LiveInterval* GetInterval() const { - return interval_; - } - - ArrayRef<InterferenceNode*> GetAdjacentNodes() const { - return adjacent_nodes_ != nullptr - ? ArrayRef<InterferenceNode*>(*adjacent_nodes_) - : ArrayRef<InterferenceNode*>(); - } - - size_t GetOutDegree() const { - // Pre-colored nodes have infinite degree. - DCHECK_IMPLIES(IsPrecolored(), out_degree_ == std::numeric_limits<size_t>::max()); - return out_degree_; - } - - void AddCoalesceOpportunity(CoalesceOpportunity* opportunity, - ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>>* storage) { - if (coalesce_opportunities_ == nullptr) { - ScopedArenaVector<CoalesceOpportunity*>::allocator_type adapter(storage->get_allocator()); - storage->emplace_back(adapter); - coalesce_opportunities_ = &storage->back(); - } - coalesce_opportunities_->push_back(opportunity); - } - - void ClearCoalesceOpportunities() { - coalesce_opportunities_ = nullptr; - } - - bool IsMoveRelated() const { - for (CoalesceOpportunity* opportunity : GetCoalesceOpportunities()) { - if (opportunity->stage == CoalesceStage::kWorklist || - opportunity->stage == CoalesceStage::kActive) { - return true; - } - } - return false; - } - - // Return whether this node already has a color. - // Used to find fixed nodes in the interference graph before coloring. - bool IsPrecolored() const { - return interval_->HasRegister(); - } - - bool IsPair() const { - return interval_->HasHighInterval(); - } - - void SetAlias(InterferenceNode* rep) { - DCHECK_NE(rep->stage, NodeStage::kPruned); - DCHECK_EQ(this, alias_) << "Should only set a node's alias once"; - alias_ = rep; - } - - InterferenceNode* GetAlias() { - if (alias_ != this) { - // Recurse in order to flatten tree of alias pointers. - alias_ = alias_->GetAlias(); - } - return alias_; - } - - ArrayRef<CoalesceOpportunity*> GetCoalesceOpportunities() const { - return coalesce_opportunities_ != nullptr - ? ArrayRef<CoalesceOpportunity*>(*coalesce_opportunities_) - : ArrayRef<CoalesceOpportunity*>(); - } - - float GetSpillWeight() const { - return spill_weight_; - } - - bool RequiresColor() const { - return requires_color_; - } - - // We give extra weight to edges adjacent to pair nodes. See the general comment on the - // interference graph above. - size_t EdgeWeightWith(const InterferenceNode* other) const { - return (IsPair() || other->IsPair()) ? 2 : 1; - } - - bool NeedsSpillSlot() const { - return needs_spill_slot_; - } - - void SetNeedsSpillSlot() { - needs_spill_slot_ = true; - } - - // The current stage of this node, indicating which worklist it belongs to. - NodeStage stage; - - private: - // The live interval that this node represents. - LiveInterval* const interval_; - - // All nodes interfering with this one. - // We use an unsorted vector as a set, since a tree or hash set is too heavy for the - // set sizes that we encounter. Using a vector leads to much better performance. - ScopedArenaVector<InterferenceNode*>* adjacent_nodes_; // Owned by ColoringIteration. - - // Interference nodes that this node should be coalesced with to reduce moves. - ScopedArenaVector<CoalesceOpportunity*>* coalesce_opportunities_; // Owned by ColoringIteration. - - // The maximum number of colors with which this node could interfere. This could be more than - // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes. - // We use "out" degree because incoming edges come from nodes already pruned from the graph, - // and do not affect the coloring of this node. - // Pre-colored nodes are treated as having infinite degree. - size_t out_degree_; - - // The node representing this node in the interference graph. - // Initially set to `this`, and only changed if this node is coalesced into another. - InterferenceNode* alias_; - - // The cost of splitting and spilling this interval to the stack. - // Nodes with a higher spill weight should be prioritized when assigning registers. - // This is essentially based on use density and location; short intervals with many uses inside - // deeply nested loops have a high spill weight. - const float spill_weight_; - - const bool requires_color_; - - bool needs_spill_slot_; - - DISALLOW_COPY_AND_ASSIGN(InterferenceNode); -}; - -// The order in which we color nodes is important. To guarantee forward progress, -// we prioritize intervals that require registers, and after that we prioritize -// short intervals. That way, if we fail to color a node, it either won't require a -// register, or it will be a long interval that can be split in order to make the -// interference graph sparser. -// To improve code quality, we prioritize intervals used frequently in deeply nested loops. -// (This metric is secondary to the forward progress requirements above.) -// TODO: May also want to consider: -// - Constants (since they can be rematerialized) -// - Allocated spill slots -static bool HasGreaterNodePriority(const InterferenceNode* lhs, - const InterferenceNode* rhs) { - // (1) Prioritize the node that requires a color. - if (lhs->RequiresColor() != rhs->RequiresColor()) { - return lhs->RequiresColor(); - } - - // (2) Prioritize the interval that has a higher spill weight. - return lhs->GetSpillWeight() > rhs->GetSpillWeight(); -} - -// A ColoringIteration holds the many data structures needed for a single graph coloring attempt, -// and provides methods for each phase of the attempt. -class ColoringIteration { - public: - ColoringIteration(RegisterAllocatorGraphColor* register_allocator, - ScopedArenaAllocator* allocator, - bool processing_core_regs, - size_t num_regs) - : register_allocator_(register_allocator), - allocator_(allocator), - processing_core_regs_(processing_core_regs), - num_regs_(num_regs), - interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)), - prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), - pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), - simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)), - freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)), - spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)), - coalesce_worklist_(CoalesceOpportunity::CmpPriority, - allocator->Adapter(kArenaAllocRegisterAllocator)), - adjacent_nodes_links_(allocator->Adapter(kArenaAllocRegisterAllocator)), - coalesce_opportunities_links_(allocator->Adapter(kArenaAllocRegisterAllocator)) {} - - // Use the intervals collected from instructions to construct an - // interference graph mapping intervals to adjacency lists. - // Also, collect synthesized safepoint nodes, used to keep - // track of live intervals across safepoints. - // TODO: Should build safepoints elsewhere. - void BuildInterferenceGraph(const ScopedArenaVector<LiveInterval*>& intervals, - const ScopedArenaVector<InterferenceNode*>& physical_nodes); - - // Add coalesce opportunities to interference nodes. - void FindCoalesceOpportunities(); - - // Prune nodes from the interference graph to be colored later. Build - // a stack (pruned_nodes) containing these intervals in an order determined - // by various heuristics. - void PruneInterferenceGraph(); - - // Process pruned_intervals_ to color the interference graph, spilling when - // necessary. Returns true if successful. Else, some intervals have been - // split, and the interference graph should be rebuilt for another attempt. - bool ColorInterferenceGraph(); - - // Return prunable nodes. - // The register allocator will need to access prunable nodes after coloring - // in order to tell the code generator which registers have been assigned. - ArrayRef<InterferenceNode* const> GetPrunableNodes() const { - return ArrayRef<InterferenceNode* const>(prunable_nodes_); - } - - private: - // Create a coalesce opportunity between two nodes. - void CreateCoalesceOpportunity(InterferenceNode* a, - InterferenceNode* b, - CoalesceKind kind, - size_t position); - - // Add an edge in the interference graph, if valid. - // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion - // when possible. - void AddPotentialInterference(InterferenceNode* from, - InterferenceNode* to, - bool guaranteed_not_interfering_yet, - bool both_directions = true); - - // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors) - // may be pruned from the interference graph. - void FreezeMoves(InterferenceNode* node); - - // Prune a node from the interference graph, updating worklists if necessary. - void PruneNode(InterferenceNode* node); - - // Add coalesce opportunities associated with this node to the coalesce worklist. - void EnableCoalesceOpportunities(InterferenceNode* node); - - // If needed, from `node` from the freeze worklist to the simplify worklist. - void CheckTransitionFromFreezeWorklist(InterferenceNode* node); - - // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively. - bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into); - - // Return true if `from` and `into` are uncolored, and can be coalesced conservatively. - bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into); - - void Coalesce(CoalesceOpportunity* opportunity); - - // Merge `from` into `into` in the interference graph. - void Combine(InterferenceNode* from, InterferenceNode* into); - - // A reference to the register allocator instance, - // needed to split intervals and assign spill slots. - RegisterAllocatorGraphColor* register_allocator_; - - // A scoped arena allocator used for a single graph coloring attempt. - ScopedArenaAllocator* allocator_; - - const bool processing_core_regs_; - - const size_t num_regs_; - - // A map from live intervals to interference nodes. - ScopedArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_; - - // Uncolored nodes that should be pruned from the interference graph. - ScopedArenaVector<InterferenceNode*> prunable_nodes_; - - // A stack of nodes pruned from the interference graph, waiting to be pruned. - ScopedArenaStdStack<InterferenceNode*> pruned_nodes_; - - // A queue containing low degree, non-move-related nodes that can pruned immediately. - ScopedArenaDeque<InterferenceNode*> simplify_worklist_; - - // A queue containing low degree, move-related nodes. - ScopedArenaDeque<InterferenceNode*> freeze_worklist_; - - // A queue containing high degree nodes. - // If we have to prune from the spill worklist, we cannot guarantee - // the pruned node a color, so we order the worklist by priority. - ScopedArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_; - - // A queue containing coalesce opportunities. - // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those - // inside of loops) are more important than others. - ScopedArenaPriorityQueue<CoalesceOpportunity*, - decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_; - - // Storage for links to adjacent nodes for interference nodes. - // Using std::deque so that elements do not move when adding new ones. - ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>> adjacent_nodes_links_; - - // Storage for links to coalesce opportunities for interference nodes. - // Using std::deque so that elements do not move when adding new ones. - ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>> coalesce_opportunities_links_; - - DISALLOW_COPY_AND_ASSIGN(ColoringIteration); -}; - -static bool IsCoreInterval(LiveInterval* interval) { - return !DataType::IsFloatingPointType(interval->GetType()); -} - -static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) { - return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize; -} - -RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator, - CodeGenerator* codegen, - const SsaLivenessAnalysis& liveness, - bool iterative_move_coalescing) - : RegisterAllocator(allocator, codegen, liveness), - iterative_move_coalescing_(iterative_move_coalescing), - core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), - fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), - temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), - safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)), - physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), - physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), - num_int_spill_slots_(0), - num_double_spill_slots_(0), - num_float_spill_slots_(0), - num_long_spill_slots_(0), - catch_phi_spill_slot_counter_(0), - reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)), - reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) { - // Before we ask for blocked registers, set them up in the code generator. - codegen->SetupBlockedRegisters(); - - // Initialize physical core register live intervals and blocked registers. - // This includes globally blocked registers, such as the stack pointer. - physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr); - for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { - LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kInt32); - physical_core_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness); - physical_core_nodes_[i]->stage = NodeStage::kPrecolored; - core_intervals_.push_back(interval); - if (codegen_->IsBlockedCoreRegister(i)) { - interval->AddRange(0, liveness.GetMaxLifetimePosition()); - } - } - // Initialize physical floating point register live intervals and blocked registers. - physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr); - for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { - LiveInterval* interval = - LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kFloat32); - physical_fp_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness); - physical_fp_nodes_[i]->stage = NodeStage::kPrecolored; - fp_intervals_.push_back(interval); - if (codegen_->IsBlockedFloatingPointRegister(i)) { - interval->AddRange(0, liveness.GetMaxLifetimePosition()); - } - } -} - -RegisterAllocatorGraphColor::~RegisterAllocatorGraphColor() {} - -void RegisterAllocatorGraphColor::AllocateRegisters() { - // (1) Collect and prepare live intervals. - ProcessInstructions(); - - for (bool processing_core_regs : {true, false}) { - ScopedArenaVector<LiveInterval*>& intervals = processing_core_regs - ? core_intervals_ - : fp_intervals_; - size_t num_registers = processing_core_regs - ? codegen_->GetNumberOfCoreRegisters() - : codegen_->GetNumberOfFloatingPointRegisters(); - - size_t attempt = 0; - while (true) { - ++attempt; - DCHECK(attempt <= kMaxGraphColoringAttemptsDebug) - << "Exceeded debug max graph coloring register allocation attempts. " - << "This could indicate that the register allocator is not making forward progress, " - << "which could be caused by prioritizing the wrong live intervals. (Short intervals " - << "should be prioritized over long ones, because they cannot be split further.)"; - - // Many data structures are cleared between graph coloring attempts, so we reduce - // total memory usage by using a new scoped arena allocator for each attempt. - ScopedArenaAllocator coloring_attempt_allocator(allocator_->GetArenaStack()); - ColoringIteration iteration(this, - &coloring_attempt_allocator, - processing_core_regs, - num_registers); - - // (2) Build the interference graph. - ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs - ? physical_core_nodes_ - : physical_fp_nodes_; - iteration.BuildInterferenceGraph(intervals, physical_nodes); - - // (3) Add coalesce opportunities. - // If we have tried coloring the graph a suspiciously high number of times, give - // up on move coalescing, just in case the coalescing heuristics are not conservative. - // (This situation will be caught if DCHECKs are turned on.) - if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) { - iteration.FindCoalesceOpportunities(); - } - - // (4) Prune all uncolored nodes from interference graph. - iteration.PruneInterferenceGraph(); - - // (5) Color pruned nodes based on interferences. - bool successful = iteration.ColorInterferenceGraph(); - - // We manually clear coalesce opportunities for physical nodes, - // since they persist across coloring attempts. - for (InterferenceNode* node : physical_core_nodes_) { - node->ClearCoalesceOpportunities(); - } - for (InterferenceNode* node : physical_fp_nodes_) { - node->ClearCoalesceOpportunities(); - } - - if (successful) { - // Assign spill slots. - AllocateSpillSlots(iteration.GetPrunableNodes()); - - // Tell the code generator which registers were allocated. - // We only look at prunable_nodes because we already told the code generator about - // fixed intervals while processing instructions. We also ignore the fixed intervals - // placed at the top of catch blocks. - for (InterferenceNode* node : iteration.GetPrunableNodes()) { - LiveInterval* interval = node->GetInterval(); - if (interval->HasRegister()) { - Location low_reg = processing_core_regs - ? Location::RegisterLocation(interval->GetRegister()) - : Location::FpuRegisterLocation(interval->GetRegister()); - codegen_->AddAllocatedRegister(low_reg); - if (interval->HasHighInterval()) { - LiveInterval* high = interval->GetHighInterval(); - DCHECK(high->HasRegister()); - Location high_reg = processing_core_regs - ? Location::RegisterLocation(high->GetRegister()) - : Location::FpuRegisterLocation(high->GetRegister()); - codegen_->AddAllocatedRegister(high_reg); - } - } else { - DCHECK_IMPLIES(interval->HasHighInterval(), - !interval->GetHighInterval()->HasRegister()); - } - } - - break; - } - } // while unsuccessful - } // for processing_core_instructions - - // (6) Resolve locations and deconstruct SSA form. - RegisterAllocationResolver(codegen_, liveness_) - .Resolve(ArrayRef<HInstruction* const>(safepoints_), - reserved_art_method_slots_ + reserved_out_slots_, - num_int_spill_slots_, - num_long_spill_slots_, - num_float_spill_slots_, - num_double_spill_slots_, - catch_phi_spill_slot_counter_, - ArrayRef<LiveInterval* const>(temp_intervals_)); - - if (kIsDebugBuild) { - Validate(/*log_fatal_on_failure*/ true); - } -} - -bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) { - for (bool processing_core_regs : {true, false}) { - ScopedArenaAllocator allocator(allocator_->GetArenaStack()); - ScopedArenaVector<LiveInterval*> intervals( - allocator.Adapter(kArenaAllocRegisterAllocatorValidate)); - for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) { - HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); - LiveInterval* interval = instruction->GetLiveInterval(); - if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) { - intervals.push_back(instruction->GetLiveInterval()); - } - } - - ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs - ? physical_core_nodes_ - : physical_fp_nodes_; - for (InterferenceNode* fixed : physical_nodes) { - LiveInterval* interval = fixed->GetInterval(); - if (interval->GetFirstRange() != nullptr) { - // Ideally we would check fixed ranges as well, but currently there are times when - // two fixed intervals for the same register will overlap. For example, a fixed input - // and a fixed output may sometimes share the same register, in which there will be two - // fixed intervals for the same place. - } - } - - for (LiveInterval* temp : temp_intervals_) { - if (IsCoreInterval(temp) == processing_core_regs) { - intervals.push_back(temp); - } - } - - size_t spill_slots = num_int_spill_slots_ - + num_long_spill_slots_ - + num_float_spill_slots_ - + num_double_spill_slots_ - + catch_phi_spill_slot_counter_; - bool ok = ValidateIntervals(ArrayRef<LiveInterval* const>(intervals), - spill_slots, - reserved_art_method_slots_ + reserved_out_slots_, - *codegen_, - processing_core_regs, - log_fatal_on_failure); - if (!ok) { - return false; - } - } // for processing_core_regs - - return true; -} - -void RegisterAllocatorGraphColor::ProcessInstructions() { - for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) { - // Note that we currently depend on this ordering, since some helper - // code is designed for linear scan register allocation. - for (HBackwardInstructionIterator instr_it(block->GetInstructions()); - !instr_it.Done(); - instr_it.Advance()) { - ProcessInstruction(instr_it.Current()); - } - - for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { - ProcessInstruction(phi_it.Current()); - } - - if (block->IsCatchBlock() - || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) { - // By blocking all registers at the top of each catch block or irreducible loop, we force - // intervals belonging to the live-in set of the catch/header block to be spilled. - // TODO(ngeoffray): Phis in this block could be allocated in register. - size_t position = block->GetLifetimeStart(); - BlockRegisters(position, position + 1); - } - } -} - -bool RegisterAllocatorGraphColor::TryRemoveSuspendCheckEntry(HInstruction* instruction) { - LocationSummary* locations = instruction->GetLocations(); - if (instruction->IsSuspendCheckEntry() && !codegen_->NeedsSuspendCheckEntry()) { - // TODO: We do this here because we do not want the suspend check to artificially - // create live registers. We should find another place, but this is currently the - // simplest. - DCHECK_EQ(locations->GetTempCount(), 0u); - instruction->GetBlock()->RemoveInstruction(instruction); - return true; - } - return false; -} - -void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) { - LocationSummary* locations = instruction->GetLocations(); - if (locations == nullptr) { - return; - } - if (TryRemoveSuspendCheckEntry(instruction)) { - return; - } - - CheckForTempLiveIntervals(instruction); - CheckForSafepoint(instruction); - if (locations->WillCall()) { - // If a call will happen, create fixed intervals for caller-save registers. - // TODO: Note that it may be beneficial to later split intervals at this point, - // so that we allow last-minute moves from a caller-save register - // to a callee-save register. - BlockRegisters(instruction->GetLifetimePosition(), - instruction->GetLifetimePosition() + 1, - /*caller_save_only*/ true); - } - CheckForFixedInputs(instruction); - - LiveInterval* interval = instruction->GetLiveInterval(); - if (interval == nullptr) { - // Instructions lacking a valid output location do not have a live interval. - DCHECK(!locations->Out().IsValid()); - return; - } - - // Low intervals act as representatives for their corresponding high interval. - DCHECK(!interval->IsHighInterval()); - if (codegen_->NeedsTwoRegisters(interval->GetType())) { - interval->AddHighInterval(); - } - AddSafepointsFor(instruction); - CheckForFixedOutput(instruction); - AllocateSpillSlotForCatchPhi(instruction); - - ScopedArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval) - ? core_intervals_ - : fp_intervals_; - if (interval->HasSpillSlot() || instruction->IsConstant()) { - // Note that if an interval already has a spill slot, then its value currently resides - // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first - // register use. This is also true for constants, which can be materialized at any point. - size_t first_register_use = interval->FirstRegisterUse(); - if (first_register_use != kNoLifetime) { - LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1); - intervals.push_back(split); - } else { - // We won't allocate a register for this value. - } - } else { - intervals.push_back(interval); - } -} - -void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) { - // We simply block physical registers where necessary. - // TODO: Ideally we would coalesce the physical register with the register - // allocated to the input value, but this can be tricky if, e.g., there - // could be multiple physical register uses of the same value at the - // same instruction. Furthermore, there's currently no distinction between - // fixed inputs to a call (which will be clobbered) and other fixed inputs (which - // may not be clobbered). - LocationSummary* locations = instruction->GetLocations(); - size_t position = instruction->GetLifetimePosition(); - for (size_t i = 0; i < locations->GetInputCount(); ++i) { - Location input = locations->InAt(i); - if (input.IsRegister() || input.IsFpuRegister()) { - BlockRegister(input, position, position + 1); - codegen_->AddAllocatedRegister(input); - } else if (input.IsPair()) { - BlockRegister(input.ToLow(), position, position + 1); - BlockRegister(input.ToHigh(), position, position + 1); - codegen_->AddAllocatedRegister(input.ToLow()); - codegen_->AddAllocatedRegister(input.ToHigh()); - } - } -} - -void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) { - // If an instruction has a fixed output location, we give the live interval a register and then - // proactively split it just after the definition point to avoid creating too many interferences - // with a fixed node. - LiveInterval* interval = instruction->GetLiveInterval(); - Location out = interval->GetDefinedBy()->GetLocations()->Out(); - size_t position = instruction->GetLifetimePosition(); - DCHECK_GE(interval->GetEnd() - position, 2u); - - if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) { - out = instruction->GetLocations()->InAt(0); - } - - if (out.IsRegister() || out.IsFpuRegister()) { - interval->SetRegister(out.reg()); - codegen_->AddAllocatedRegister(out); - Split(interval, position + 1); - } else if (out.IsPair()) { - interval->SetRegister(out.low()); - interval->GetHighInterval()->SetRegister(out.high()); - codegen_->AddAllocatedRegister(out.ToLow()); - codegen_->AddAllocatedRegister(out.ToHigh()); - Split(interval, position + 1); - } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) { - interval->SetSpillSlot(out.GetStackIndex()); - } else { - DCHECK(out.IsUnallocated() || out.IsConstant()); - } -} - -void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) { - LiveInterval* interval = instruction->GetLiveInterval(); - for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) { - HInstruction* safepoint = safepoints_[safepoint_index - 1u]; - size_t safepoint_position = safepoint->GetLifetimePosition(); - - // Test that safepoints_ are ordered in the optimal way. - DCHECK(safepoint_index == safepoints_.size() || - safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position); - - if (safepoint_position == interval->GetStart()) { - // The safepoint is for this instruction, so the location of the instruction - // does not need to be saved. - DCHECK_EQ(safepoint_index, safepoints_.size()); - DCHECK_EQ(safepoint, instruction); - continue; - } else if (interval->IsDeadAt(safepoint_position)) { - break; - } else if (!interval->Covers(safepoint_position)) { - // Hole in the interval. - continue; - } - interval->AddSafepoint(safepoint); - } -} - -void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) { - LocationSummary* locations = instruction->GetLocations(); - size_t position = instruction->GetLifetimePosition(); - for (size_t i = 0; i < locations->GetTempCount(); ++i) { - Location temp = locations->GetTemp(i); - if (temp.IsRegister() || temp.IsFpuRegister()) { - BlockRegister(temp, position, position + 1); - codegen_->AddAllocatedRegister(temp); - } else { - DCHECK(temp.IsUnallocated()); - switch (temp.GetPolicy()) { - case Location::kRequiresRegister: { - LiveInterval* interval = - LiveInterval::MakeTempInterval(allocator_, DataType::Type::kInt32); - interval->AddTempUse(instruction, i); - core_intervals_.push_back(interval); - temp_intervals_.push_back(interval); - break; - } - - case Location::kRequiresFpuRegister: { - LiveInterval* interval = - LiveInterval::MakeTempInterval(allocator_, DataType::Type::kFloat64); - interval->AddTempUse(instruction, i); - fp_intervals_.push_back(interval); - temp_intervals_.push_back(interval); - if (codegen_->NeedsTwoRegisters(DataType::Type::kFloat64)) { - interval->AddHighInterval(/*is_temp*/ true); - temp_intervals_.push_back(interval->GetHighInterval()); - } - break; - } - - default: - LOG(FATAL) << "Unexpected policy for temporary location " - << temp.GetPolicy(); - } - } - } -} - -void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) { - LocationSummary* locations = instruction->GetLocations(); - - if (locations->NeedsSafepoint()) { - safepoints_.push_back(instruction); - } -} - -LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) { - if (interval->GetStart() < position && position < interval->GetEnd()) { - return Split(interval, position); - } else { - return interval; - } -} - -void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) { - DCHECK(!interval->IsHighInterval()); - - // Split just after a register definition. - if (interval->IsParent() && interval->DefinitionRequiresRegister()) { - interval = TrySplit(interval, interval->GetStart() + 1); - } - - // Process uses in the range [interval->GetStart(), interval->GetEnd()], i.e. - // [interval->GetStart(), interval->GetEnd() + 1) - auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(), - interval->GetUses().end(), - interval->GetStart(), - interval->GetEnd() + 1u); - // Split around register uses. - for (const UsePosition& use : matching_use_range) { - if (use.RequiresRegister()) { - size_t position = use.GetPosition(); - interval = TrySplit(interval, position - 1); - if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) { - // If we are at the very end of a basic block, we cannot split right - // at the use. Split just after instead. - interval = TrySplit(interval, position + 1); - } else { - interval = TrySplit(interval, position); - } - } - } -} - -void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) { - if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) { - HPhi* phi = instruction->AsPhi(); - LiveInterval* interval = phi->GetLiveInterval(); - - HInstruction* previous_phi = phi->GetPrevious(); - DCHECK(previous_phi == nullptr || - previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber()) - << "Phis expected to be sorted by vreg number, " - << "so that equivalent phis are adjacent."; - - if (phi->IsVRegEquivalentOf(previous_phi)) { - // Assign the same spill slot. - DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot()); - interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot()); - } else { - interval->SetSpillSlot(catch_phi_spill_slot_counter_); - catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded(); - } - } -} - -void RegisterAllocatorGraphColor::BlockRegister(Location location, - size_t start, - size_t end) { - DCHECK(location.IsRegister() || location.IsFpuRegister()); - int reg = location.reg(); - LiveInterval* interval = location.IsRegister() - ? physical_core_nodes_[reg]->GetInterval() - : physical_fp_nodes_[reg]->GetInterval(); - DCHECK(interval->GetRegister() == reg); - bool blocked_by_codegen = location.IsRegister() - ? codegen_->IsBlockedCoreRegister(reg) - : codegen_->IsBlockedFloatingPointRegister(reg); - if (blocked_by_codegen) { - // We've already blocked this register for the entire method. (And adding a - // range inside another range violates the preconditions of AddRange). - } else { - interval->AddRange(start, end); - } -} - -void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) { - for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { - if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) { - BlockRegister(Location::RegisterLocation(i), start, end); - } - } - for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { - if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) { - BlockRegister(Location::FpuRegisterLocation(i), start, end); - } - } -} - -void ColoringIteration::AddPotentialInterference(InterferenceNode* from, - InterferenceNode* to, - bool guaranteed_not_interfering_yet, - bool both_directions) { - if (from->IsPrecolored()) { - // We save space by ignoring outgoing edges from fixed nodes. - } else if (to->IsPrecolored()) { - // It is important that only a single node represents a given fixed register in the - // interference graph. We retrieve that node here. - const ScopedArenaVector<InterferenceNode*>& physical_nodes = - to->GetInterval()->IsFloatingPoint() ? register_allocator_->physical_fp_nodes_ - : register_allocator_->physical_core_nodes_; - InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()]; - from->AddInterference( - physical_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_); - DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister()); - DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node"; - - // If a node interferes with a fixed pair node, the weight of the edge may - // be inaccurate after using the alias of the pair node, because the alias of the pair node - // is a singular node. - // We could make special pair fixed nodes, but that ends up being too conservative because - // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of - // three rather than two. - // Instead, we explicitly add an interference with the high node of the fixed pair node. - // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals - // can be unaligned on x86 complicates things. - if (to->IsPair()) { - InterferenceNode* high_node = - physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()]; - DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(), - high_node->GetInterval()->GetRegister()); - from->AddInterference( - high_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_); - } - } else { - // Standard interference between two uncolored nodes. - from->AddInterference(to, guaranteed_not_interfering_yet, &adjacent_nodes_links_); - } - - if (both_directions) { - AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false); - } -} - -// Returns true if `in_node` represents an input interval of `out_node`, and the output interval -// is allowed to have the same register as the input interval. -// TODO: Ideally we should just produce correct intervals in liveness analysis. -// We would need to refactor the current live interval layout to do so, which is -// no small task. -static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) { - LiveInterval* output_interval = out_node->GetInterval(); - HInstruction* defined_by = output_interval->GetDefinedBy(); - if (defined_by == nullptr) { - // This must not be a definition point. - return false; - } - - LocationSummary* locations = defined_by->GetLocations(); - if (locations->OutputCanOverlapWithInputs()) { - // This instruction does not allow the output to reuse a register from an input. - return false; - } - - LiveInterval* input_interval = in_node->GetInterval(); - LiveInterval* next_sibling = input_interval->GetNextSibling(); - size_t def_position = defined_by->GetLifetimePosition(); - size_t use_position = def_position + 1; - if (next_sibling != nullptr && next_sibling->GetStart() == use_position) { - // The next sibling starts at the use position, so reusing the input register in the output - // would clobber the input before it's moved into the sibling interval location. - return false; - } - - if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) { - // The input interval is live after the use position. - return false; - } - - HInputsRef inputs = defined_by->GetInputs(); - for (size_t i = 0; i < inputs.size(); ++i) { - if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) { - DCHECK(input_interval->SameRegisterKind(*output_interval)); - return true; - } - } - - // The input interval was not an input for this instruction. - return false; -} - -void ColoringIteration::BuildInterferenceGraph( - const ScopedArenaVector<LiveInterval*>& intervals, - const ScopedArenaVector<InterferenceNode*>& physical_nodes) { - DCHECK(interval_node_map_.empty() && prunable_nodes_.empty()); - // Build the interference graph efficiently by ordering range endpoints - // by position and doing a linear sweep to find interferences. (That is, we - // jump from endpoint to endpoint, maintaining a set of intervals live at each - // point. If two nodes are ever in the live set at the same time, then they - // interfere with each other.) - // - // We order by both position and (secondarily) by whether the endpoint - // begins or ends a range; we want to process range endings before range - // beginnings at the same position because they should not conflict. - // - // For simplicity, we create a tuple for each endpoint, and then sort the tuples. - // Tuple contents: (position, is_range_beginning, node). - ScopedArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints( - allocator_->Adapter(kArenaAllocRegisterAllocator)); - - // We reserve plenty of space to avoid excessive copying. - range_endpoints.reserve(4 * prunable_nodes_.size()); - - for (LiveInterval* parent : intervals) { - for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) { - LiveRange* range = sibling->GetFirstRange(); - if (range != nullptr) { - InterferenceNode* node = - new (allocator_) InterferenceNode(sibling, register_allocator_->liveness_); - interval_node_map_.insert(std::make_pair(sibling, node)); - - if (sibling->HasRegister()) { - // Fixed nodes should alias the canonical node for the corresponding register. - node->stage = NodeStage::kPrecolored; - InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()]; - node->SetAlias(physical_node); - DCHECK_EQ(node->GetInterval()->GetRegister(), - physical_node->GetInterval()->GetRegister()); - } else { - node->stage = NodeStage::kPrunable; - prunable_nodes_.push_back(node); - } - - while (range != nullptr) { - range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node)); - range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node)); - range = range->GetNext(); - } - } - } - } - - // Sort the endpoints. - // We explicitly ignore the third entry of each tuple (the node pointer) in order - // to maintain determinism. - std::sort(range_endpoints.begin(), range_endpoints.end(), - [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs, - const std::tuple<size_t, bool, InterferenceNode*>& rhs) { - return std::tie(std::get<0>(lhs), std::get<1>(lhs)) - < std::tie(std::get<0>(rhs), std::get<1>(rhs)); - }); - - // Nodes live at the current position in the linear sweep. - ScopedArenaVector<InterferenceNode*> live(allocator_->Adapter(kArenaAllocRegisterAllocator)); - - // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the - // live set. When we encounter the end of a range, we remove the corresponding node - // from the live set. Nodes interfere if they are in the live set at the same time. - for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) { - bool is_range_beginning; - InterferenceNode* node; - size_t position; - // Extract information from the tuple, including the node this tuple represents. - std::tie(position, is_range_beginning, node) = *it; - - if (is_range_beginning) { - bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart(); - for (InterferenceNode* conflicting : live) { - DCHECK_NE(node, conflicting); - if (CheckInputOutputCanOverlap(conflicting, node)) { - // We do not add an interference, because the instruction represented by `node` allows - // its output to share a register with an input, represented here by `conflicting`. - } else { - AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet); - } - } - DCHECK(std::find(live.begin(), live.end(), node) == live.end()); - live.push_back(node); - } else { - // End of range. - auto live_it = std::find(live.begin(), live.end(), node); - DCHECK(live_it != live.end()); - live.erase(live_it); - } - } - DCHECK(live.empty()); -} - -void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a, - InterferenceNode* b, - CoalesceKind kind, - size_t position) { - DCHECK_EQ(a->IsPair(), b->IsPair()) - << "Nodes of different memory widths should never be coalesced"; - CoalesceOpportunity* opportunity = - new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_); - a->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_); - b->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_); - coalesce_worklist_.push(opportunity); -} - -// When looking for coalesce opportunities, we use the interval_node_map_ to find the node -// corresponding to an interval. Note that not all intervals are in this map, notably the parents -// of constants and stack arguments. (However, these interval should not be involved in coalesce -// opportunities anyway, because they're not going to be in registers.) -void ColoringIteration::FindCoalesceOpportunities() { - DCHECK(coalesce_worklist_.empty()); - - for (InterferenceNode* node : prunable_nodes_) { - LiveInterval* interval = node->GetInterval(); - - // Coalesce siblings. - LiveInterval* next_sibling = interval->GetNextSibling(); - if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) { - auto it = interval_node_map_.find(next_sibling); - if (it != interval_node_map_.end()) { - InterferenceNode* sibling_node = it->second; - CreateCoalesceOpportunity(node, - sibling_node, - CoalesceKind::kAdjacentSibling, - interval->GetEnd()); - } - } - - // Coalesce fixed outputs with this interval if this interval is an adjacent sibling. - LiveInterval* parent = interval->GetParent(); - if (parent->HasRegister() - && parent->GetNextSibling() == interval - && parent->GetEnd() == interval->GetStart()) { - auto it = interval_node_map_.find(parent); - if (it != interval_node_map_.end()) { - InterferenceNode* parent_node = it->second; - CreateCoalesceOpportunity(node, - parent_node, - CoalesceKind::kFixedOutputSibling, - parent->GetEnd()); - } - } - - // Try to prevent moves across blocks. - // Note that this does not lead to many succeeding coalesce attempts, so could be removed - // if found to add to compile time. - const SsaLivenessAnalysis& liveness = register_allocator_->liveness_; - if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) { - // If the start of this interval is at a block boundary, we look at the - // location of the interval in blocks preceding the block this interval - // starts at. This can avoid a move between the two blocks. - HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2); - for (HBasicBlock* predecessor : block->GetPredecessors()) { - size_t position = predecessor->GetLifetimeEnd() - 1; - LiveInterval* existing = interval->GetParent()->GetSiblingAt(position); - if (existing != nullptr) { - auto it = interval_node_map_.find(existing); - if (it != interval_node_map_.end()) { - InterferenceNode* existing_node = it->second; - CreateCoalesceOpportunity(node, - existing_node, - CoalesceKind::kNonlinearControlFlow, - position); - } - } - } - } - - // Coalesce phi inputs with the corresponding output. - HInstruction* defined_by = interval->GetDefinedBy(); - if (defined_by != nullptr && defined_by->IsPhi()) { - ArrayRef<HBasicBlock* const> predecessors(defined_by->GetBlock()->GetPredecessors()); - HInputsRef inputs = defined_by->GetInputs(); - - for (size_t i = 0, e = inputs.size(); i < e; ++i) { - // We want the sibling at the end of the appropriate predecessor block. - size_t position = predecessors[i]->GetLifetimeEnd() - 1; - LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position); - - auto it = interval_node_map_.find(input_interval); - if (it != interval_node_map_.end()) { - InterferenceNode* input_node = it->second; - CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position); - } - } - } - - // Coalesce output with first input when policy is kSameAsFirstInput. - if (defined_by != nullptr) { - Location out = defined_by->GetLocations()->Out(); - if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) { - LiveInterval* input_interval - = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1); - // TODO: Could we consider lifetime holes here? - if (input_interval->GetEnd() == interval->GetStart()) { - auto it = interval_node_map_.find(input_interval); - if (it != interval_node_map_.end()) { - InterferenceNode* input_node = it->second; - CreateCoalesceOpportunity(node, - input_node, - CoalesceKind::kFirstInput, - interval->GetStart()); - } - } - } - } - - // An interval that starts an instruction (that is, it is not split), may - // re-use the registers used by the inputs of that instruction, based on the - // location summary. - if (defined_by != nullptr) { - DCHECK(!interval->IsSplit()); - LocationSummary* locations = defined_by->GetLocations(); - if (!locations->OutputCanOverlapWithInputs()) { - HInputsRef inputs = defined_by->GetInputs(); - for (size_t i = 0; i < inputs.size(); ++i) { - size_t def_point = defined_by->GetLifetimePosition(); - // TODO: Getting the sibling at the def_point might not be quite what we want - // for fixed inputs, since the use will be *at* the def_point rather than after. - LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point); - if (input_interval != nullptr && - input_interval->HasHighInterval() == interval->HasHighInterval()) { - auto it = interval_node_map_.find(input_interval); - if (it != interval_node_map_.end()) { - InterferenceNode* input_node = it->second; - CreateCoalesceOpportunity(node, - input_node, - CoalesceKind::kAnyInput, - interval->GetStart()); - } - } - } - } - } - - // Try to prevent moves into fixed input locations. - // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e. - // [interval->GetStart() + 1, interval->GetEnd() + 1) - auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(), - interval->GetUses().end(), - interval->GetStart() + 1u, - interval->GetEnd() + 1u); - for (const UsePosition& use : matching_use_range) { - HInstruction* user = use.GetUser(); - if (user == nullptr) { - // User may be null for certain intervals, such as temp intervals. - continue; - } - LocationSummary* locations = user->GetLocations(); - Location input = locations->InAt(use.GetInputIndex()); - if (input.IsRegister() || input.IsFpuRegister()) { - // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes - // is currently not supported. - InterferenceNode* fixed_node = input.IsRegister() - ? register_allocator_->physical_core_nodes_[input.reg()] - : register_allocator_->physical_fp_nodes_[input.reg()]; - CreateCoalesceOpportunity(node, - fixed_node, - CoalesceKind::kFixedInput, - user->GetLifetimePosition()); - } - } - } // for node in prunable_nodes -} - -static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) { - return node->GetOutDegree() < num_regs; -} - -static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) { - return !IsLowDegreeNode(node, num_regs); -} - -void ColoringIteration::PruneInterferenceGraph() { - DCHECK(pruned_nodes_.empty() - && simplify_worklist_.empty() - && freeze_worklist_.empty() - && spill_worklist_.empty()); - // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes, - // and all others as high degree nodes. The distinction is important: low degree nodes are - // guaranteed a color, while high degree nodes are not. - - // Build worklists. Note that the coalesce worklist has already been - // filled by FindCoalesceOpportunities(). - for (InterferenceNode* node : prunable_nodes_) { - DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned"; - if (IsLowDegreeNode(node, num_regs_)) { - if (node->GetCoalesceOpportunities().empty()) { - // Simplify Worklist. - node->stage = NodeStage::kSimplifyWorklist; - simplify_worklist_.push_back(node); - } else { - // Freeze Worklist. - node->stage = NodeStage::kFreezeWorklist; - freeze_worklist_.push_back(node); - } - } else { - // Spill worklist. - node->stage = NodeStage::kSpillWorklist; - spill_worklist_.push(node); - } - } - - // Prune graph. - // Note that we do not remove a node from its current worklist if it moves to another, so it may - // be in multiple worklists at once; the node's `phase` says which worklist it is really in. - while (true) { - if (!simplify_worklist_.empty()) { - // Prune low-degree nodes. - // TODO: pop_back() should work as well, but it didn't; we get a - // failed check while pruning. We should look into this. - InterferenceNode* node = simplify_worklist_.front(); - simplify_worklist_.pop_front(); - DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list"; - DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree"; - DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related"; - PruneNode(node); - } else if (!coalesce_worklist_.empty()) { - // Coalesce. - CoalesceOpportunity* opportunity = coalesce_worklist_.top(); - coalesce_worklist_.pop(); - if (opportunity->stage == CoalesceStage::kWorklist) { - Coalesce(opportunity); - } - } else if (!freeze_worklist_.empty()) { - // Freeze moves and prune a low-degree move-related node. - InterferenceNode* node = freeze_worklist_.front(); - freeze_worklist_.pop_front(); - if (node->stage == NodeStage::kFreezeWorklist) { - DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree"; - DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related"; - FreezeMoves(node); - PruneNode(node); - } - } else if (!spill_worklist_.empty()) { - // We spill the lowest-priority node, because pruning a node earlier - // gives it a higher chance of being spilled. - InterferenceNode* node = spill_worklist_.top(); - spill_worklist_.pop(); - if (node->stage == NodeStage::kSpillWorklist) { - DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree"; - FreezeMoves(node); - PruneNode(node); - } - } else { - // Pruning complete. - break; - } - } - DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size()); -} - -void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) { - for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { - if (opportunity->stage == CoalesceStage::kActive) { - opportunity->stage = CoalesceStage::kWorklist; - coalesce_worklist_.push(opportunity); - } - } -} - -void ColoringIteration::PruneNode(InterferenceNode* node) { - DCHECK_NE(node->stage, NodeStage::kPruned); - DCHECK(!node->IsPrecolored()); - node->stage = NodeStage::kPruned; - pruned_nodes_.push(node); - - for (InterferenceNode* adj : node->GetAdjacentNodes()) { - DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes"; - - if (adj->IsPrecolored()) { - // No effect on pre-colored nodes; they're never pruned. - } else { - // Remove the interference. - bool was_high_degree = IsHighDegreeNode(adj, num_regs_); - DCHECK(adj->ContainsInterference(node)) - << "Missing reflexive interference from non-fixed node"; - adj->RemoveInterference(node); - - // Handle transitions from high degree to low degree. - if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) { - EnableCoalesceOpportunities(adj); - for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) { - EnableCoalesceOpportunities(adj_adj); - } - - DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist); - if (adj->IsMoveRelated()) { - adj->stage = NodeStage::kFreezeWorklist; - freeze_worklist_.push_back(adj); - } else { - adj->stage = NodeStage::kSimplifyWorklist; - simplify_worklist_.push_back(adj); - } - } - } - } -} - -void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) { - if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) { - DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist); - node->stage = NodeStage::kSimplifyWorklist; - simplify_worklist_.push_back(node); - } -} - -void ColoringIteration::FreezeMoves(InterferenceNode* node) { - for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { - if (opportunity->stage == CoalesceStage::kDefunct) { - // Constrained moves should remain constrained, since they will not be considered - // during last-chance coalescing. - } else { - opportunity->stage = CoalesceStage::kInactive; - } - InterferenceNode* other = opportunity->node_a->GetAlias() == node - ? opportunity->node_b->GetAlias() - : opportunity->node_a->GetAlias(); - if (other != node && other->stage == NodeStage::kFreezeWorklist) { - DCHECK(IsLowDegreeNode(node, num_regs_)); - CheckTransitionFromFreezeWorklist(other); - } - } -} - -bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from, - InterferenceNode* into) { - if (!into->IsPrecolored()) { - // The uncolored heuristic will cover this case. - return false; - } - if (from->IsPair() || into->IsPair()) { - // TODO: Merging from a pair node is currently not supported, since fixed pair nodes - // are currently represented as two single fixed nodes in the graph, and `into` is - // only one of them. (We may lose the implicit connections to the second one in a merge.) - return false; - } - - // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`. - // Reasons an adjacent node `adj` can be "ok": - // (1) If `adj` is low degree, interference with `into` will not affect its existing - // colorable guarantee. (Notice that coalescing cannot increase its degree.) - // (2) If `adj` is pre-colored, it already interferes with `into`. See (3). - // (3) If there's already an interference with `into`, coalescing will not add interferences. - for (InterferenceNode* adj : from->GetAdjacentNodes()) { - if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) { - // Ok. - } else { - return false; - } - } - return true; -} - -bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from, - InterferenceNode* into) { - if (into->IsPrecolored()) { - // The pre-colored heuristic will handle this case. - return false; - } - - // Arbitrary cap to improve compile time. Tests show that this has negligible affect - // on generated code. - if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) { - return false; - } - - // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors - // of high degree. (Low degree neighbors can be ignored, because they will eventually be - // pruned from the interference graph in the simplify stage.) - size_t high_degree_interferences = 0; - for (InterferenceNode* adj : from->GetAdjacentNodes()) { - if (IsHighDegreeNode(adj, num_regs_)) { - high_degree_interferences += from->EdgeWeightWith(adj); - } - } - for (InterferenceNode* adj : into->GetAdjacentNodes()) { - if (IsHighDegreeNode(adj, num_regs_)) { - if (from->ContainsInterference(adj)) { - // We've already counted this adjacent node. - // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that - // we should not have counted it at all. (This extends the textbook Briggs coalescing test, - // but remains conservative.) - if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) { - high_degree_interferences -= from->EdgeWeightWith(adj); - } - } else { - high_degree_interferences += into->EdgeWeightWith(adj); - } - } - } - - return high_degree_interferences < num_regs_; -} - -void ColoringIteration::Combine(InterferenceNode* from, - InterferenceNode* into) { - from->SetAlias(into); - - // Add interferences. - for (InterferenceNode* adj : from->GetAdjacentNodes()) { - bool was_low_degree = IsLowDegreeNode(adj, num_regs_); - AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false); - if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) { - // This is a (temporary) transition to a high degree node. Its degree will decrease again - // when we prune `from`, but it's best to be consistent about the current worklist. - adj->stage = NodeStage::kSpillWorklist; - spill_worklist_.push(adj); - } - } - - // Add coalesce opportunities. - for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) { - if (opportunity->stage != CoalesceStage::kDefunct) { - into->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_); - } - } - EnableCoalesceOpportunities(from); - - // Prune and update worklists. - PruneNode(from); - if (IsLowDegreeNode(into, num_regs_)) { - // Coalesce(...) takes care of checking for a transition to the simplify worklist. - DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist); - } else if (into->stage == NodeStage::kFreezeWorklist) { - // This is a transition to a high degree node. - into->stage = NodeStage::kSpillWorklist; - spill_worklist_.push(into); - } else { - DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored); - } -} - -void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) { - InterferenceNode* from = opportunity->node_a->GetAlias(); - InterferenceNode* into = opportunity->node_b->GetAlias(); - DCHECK_NE(from->stage, NodeStage::kPruned); - DCHECK_NE(into->stage, NodeStage::kPruned); - - if (from->IsPrecolored()) { - // If we have one pre-colored node, make sure it's the `into` node. - std::swap(from, into); - } - - if (from == into) { - // These nodes have already been coalesced. - opportunity->stage = CoalesceStage::kDefunct; - CheckTransitionFromFreezeWorklist(from); - } else if (from->IsPrecolored() || from->ContainsInterference(into)) { - // These nodes interfere. - opportunity->stage = CoalesceStage::kDefunct; - CheckTransitionFromFreezeWorklist(from); - CheckTransitionFromFreezeWorklist(into); - } else if (PrecoloredHeuristic(from, into) - || UncoloredHeuristic(from, into)) { - // We can coalesce these nodes. - opportunity->stage = CoalesceStage::kDefunct; - Combine(from, into); - CheckTransitionFromFreezeWorklist(into); - } else { - // We cannot coalesce, but we may be able to later. - opportunity->stage = CoalesceStage::kActive; - } -} - -// Build a mask with a bit set for each register assigned to some -// interval in `intervals`. -template <typename Container> -static std::bitset<kMaxNumRegs> BuildConflictMask(const Container& intervals) { - std::bitset<kMaxNumRegs> conflict_mask; - for (InterferenceNode* adjacent : intervals) { - LiveInterval* conflicting = adjacent->GetInterval(); - if (conflicting->HasRegister()) { - conflict_mask.set(conflicting->GetRegister()); - if (conflicting->HasHighInterval()) { - DCHECK(conflicting->GetHighInterval()->HasRegister()); - conflict_mask.set(conflicting->GetHighInterval()->GetRegister()); - } - } else { - DCHECK(!conflicting->HasHighInterval() - || !conflicting->GetHighInterval()->HasRegister()); - } - } - return conflict_mask; -} - -bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) { - return processing_core_regs - ? !codegen_->IsCoreCalleeSaveRegister(reg) - : !codegen_->IsFloatingPointCalleeSaveRegister(reg); -} - -static bool RegisterIsAligned(size_t reg) { - return reg % 2 == 0; -} - -static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) { - // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit. - // Note that CTZ is undefined if all bits are 0, so we special-case it. - return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong()); -} - -bool ColoringIteration::ColorInterferenceGraph() { - DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small"; - ScopedArenaVector<LiveInterval*> colored_intervals( - allocator_->Adapter(kArenaAllocRegisterAllocator)); - bool successful = true; - - while (!pruned_nodes_.empty()) { - InterferenceNode* node = pruned_nodes_.top(); - pruned_nodes_.pop(); - LiveInterval* interval = node->GetInterval(); - size_t reg = 0; - - InterferenceNode* alias = node->GetAlias(); - if (alias != node) { - // This node was coalesced with another. - LiveInterval* alias_interval = alias->GetInterval(); - if (alias_interval->HasRegister()) { - reg = alias_interval->GetRegister(); - DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg]) - << "This node conflicts with the register it was coalesced with"; - } else { - DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " " - << "Move coalescing was not conservative, causing a node to be coalesced " - << "with another node that could not be colored"; - if (interval->RequiresRegister()) { - successful = false; - } - } - } else { - // Search for free register(s). - std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes()); - if (interval->HasHighInterval()) { - // Note that the graph coloring allocator assumes that pair intervals are aligned here, - // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we - // change the alignment requirements here, we will have to update the algorithm (e.g., - // be more conservative about the weight of edges adjacent to pair nodes.) - while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) { - reg += 2; - } - - // Try to use a caller-save register first. - for (size_t i = 0; i < num_regs_ - 1; i += 2) { - bool low_caller_save = register_allocator_->IsCallerSave(i, processing_core_regs_); - bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_); - if (!conflict_mask[i] && !conflict_mask[i + 1]) { - if (low_caller_save && high_caller_save) { - reg = i; - break; - } else if (low_caller_save || high_caller_save) { - reg = i; - // Keep looking to try to get both parts in caller-save registers. - } - } - } - } else { - // Not a pair interval. - reg = FindFirstZeroInConflictMask(conflict_mask); - - // Try to use caller-save registers first. - for (size_t i = 0; i < num_regs_; ++i) { - if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) { - reg = i; - break; - } - } - } - - // Last-chance coalescing. - for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { - if (opportunity->stage == CoalesceStage::kDefunct) { - continue; - } - LiveInterval* other_interval = opportunity->node_a->GetAlias() == node - ? opportunity->node_b->GetAlias()->GetInterval() - : opportunity->node_a->GetAlias()->GetInterval(); - if (other_interval->HasRegister()) { - size_t coalesce_register = other_interval->GetRegister(); - if (interval->HasHighInterval()) { - if (!conflict_mask[coalesce_register] && - !conflict_mask[coalesce_register + 1] && - RegisterIsAligned(coalesce_register)) { - reg = coalesce_register; - break; - } - } else if (!conflict_mask[coalesce_register]) { - reg = coalesce_register; - break; - } - } - } - } - - if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) { - // Assign register. - DCHECK(!interval->HasRegister()); - interval->SetRegister(reg); - colored_intervals.push_back(interval); - if (interval->HasHighInterval()) { - DCHECK(!interval->GetHighInterval()->HasRegister()); - interval->GetHighInterval()->SetRegister(reg + 1); - colored_intervals.push_back(interval->GetHighInterval()); - } - } else if (interval->RequiresRegister()) { - // The interference graph is too dense to color. Make it sparser by - // splitting this live interval. - successful = false; - register_allocator_->SplitAtRegisterUses(interval); - // We continue coloring, because there may be additional intervals that cannot - // be colored, and that we should split. - } else { - // Spill. - node->SetNeedsSpillSlot(); - } - } - - // If unsuccessful, reset all register assignments. - if (!successful) { - for (LiveInterval* interval : colored_intervals) { - interval->ClearRegister(); - } - } - - return successful; -} - -void RegisterAllocatorGraphColor::AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes) { - // The register allocation resolver will organize the stack based on value type, - // so we assign stack slots for each value type separately. - ScopedArenaAllocator allocator(allocator_->GetArenaStack()); - ScopedArenaAllocatorAdapter<void> adapter = allocator.Adapter(kArenaAllocRegisterAllocator); - ScopedArenaVector<LiveInterval*> double_intervals(adapter); - ScopedArenaVector<LiveInterval*> long_intervals(adapter); - ScopedArenaVector<LiveInterval*> float_intervals(adapter); - ScopedArenaVector<LiveInterval*> int_intervals(adapter); - - // The set of parent intervals already handled. - ScopedArenaSet<LiveInterval*> seen(adapter); - - // Find nodes that need spill slots. - for (InterferenceNode* node : nodes) { - if (!node->NeedsSpillSlot()) { - continue; - } - - LiveInterval* parent = node->GetInterval()->GetParent(); - if (seen.find(parent) != seen.end()) { - // We've already handled this interval. - // This can happen if multiple siblings of the same interval request a stack slot. - continue; - } - seen.insert(parent); - - HInstruction* defined_by = parent->GetDefinedBy(); - if (parent->HasSpillSlot()) { - // We already have a spill slot for this value that we can reuse. - } else if (defined_by->IsParameterValue()) { - // Parameters already have a stack slot. - parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue())); - } else if (defined_by->IsCurrentMethod()) { - // The current method is always at stack slot 0. - parent->SetSpillSlot(0); - } else if (defined_by->IsConstant()) { - // Constants don't need a spill slot. - } else { - // We need to find a spill slot for this interval. Place it in the correct - // worklist to be processed later. - switch (node->GetInterval()->GetType()) { - case DataType::Type::kFloat64: - double_intervals.push_back(parent); - break; - case DataType::Type::kInt64: - long_intervals.push_back(parent); - break; - case DataType::Type::kFloat32: - float_intervals.push_back(parent); - break; - case DataType::Type::kReference: - case DataType::Type::kInt32: - case DataType::Type::kUint16: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kBool: - case DataType::Type::kInt16: - int_intervals.push_back(parent); - break; - case DataType::Type::kUint32: - case DataType::Type::kUint64: - case DataType::Type::kVoid: - LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType(); - UNREACHABLE(); - } - } - } - - // Color spill slots for each value type. - ColorSpillSlots(ArrayRef<LiveInterval* const>(double_intervals), &num_double_spill_slots_); - ColorSpillSlots(ArrayRef<LiveInterval* const>(long_intervals), &num_long_spill_slots_); - ColorSpillSlots(ArrayRef<LiveInterval* const>(float_intervals), &num_float_spill_slots_); - ColorSpillSlots(ArrayRef<LiveInterval* const>(int_intervals), &num_int_spill_slots_); -} - -void RegisterAllocatorGraphColor::ColorSpillSlots(ArrayRef<LiveInterval* const> intervals, - /* out */ size_t* num_stack_slots_used) { - // We cannot use the original interference graph here because spill slots are assigned to - // all of the siblings of an interval, whereas an interference node represents only a single - // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints - // by position, and assigning the lowest spill slot available when we encounter an interval - // beginning. We ignore lifetime holes for simplicity. - ScopedArenaAllocator allocator(allocator_->GetArenaStack()); - ScopedArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints( - allocator.Adapter(kArenaAllocRegisterAllocator)); - - for (LiveInterval* parent_interval : intervals) { - DCHECK(parent_interval->IsParent()); - DCHECK(!parent_interval->HasSpillSlot()); - size_t start = parent_interval->GetStart(); - size_t end = parent_interval->GetLastSibling()->GetEnd(); - DCHECK_LT(start, end); - interval_endpoints.push_back(std::make_tuple(start, true, parent_interval)); - interval_endpoints.push_back(std::make_tuple(end, false, parent_interval)); - } - - // Sort by position. - // We explicitly ignore the third entry of each tuple (the interval pointer) in order - // to maintain determinism. - std::sort(interval_endpoints.begin(), interval_endpoints.end(), - [] (const std::tuple<size_t, bool, LiveInterval*>& lhs, - const std::tuple<size_t, bool, LiveInterval*>& rhs) { - return std::tie(std::get<0>(lhs), std::get<1>(lhs)) - < std::tie(std::get<0>(rhs), std::get<1>(rhs)); - }); - - ArenaBitVector taken(&allocator, 0, true, kArenaAllocRegisterAllocator); - for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) { - // Extract information from the current tuple. - LiveInterval* parent_interval; - bool is_interval_beginning; - size_t position; - std::tie(position, is_interval_beginning, parent_interval) = *it; - size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded(); - - if (is_interval_beginning) { - DCHECK(!parent_interval->HasSpillSlot()); - DCHECK_EQ(position, parent_interval->GetStart()); - - // Find first available free stack slot(s). - size_t slot = 0; - for (; ; ++slot) { - bool found = true; - for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { - if (taken.IsBitSet(s)) { - found = false; - break; // failure - } - } - if (found) { - break; // success - } - } - - parent_interval->SetSpillSlot(slot); - - *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed); - if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) { - // The parallel move resolver requires that there be an even number of spill slots - // allocated for pair value types. - ++(*num_stack_slots_used); - } - - for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { - taken.SetBit(s); - } - } else { - DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd()); - DCHECK(parent_interval->HasSpillSlot()); - - // Free up the stack slot(s) used by this interval. - size_t slot = parent_interval->GetSpillSlot(); - for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { - DCHECK(taken.IsBitSet(s)); - taken.ClearBit(s); - } - } - } - DCHECK_EQ(taken.NumSetBits(), 0u); -} - -} // namespace art diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h deleted file mode 100644 index 0e10152049..0000000000 --- a/compiler/optimizing/register_allocator_graph_color.h +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_ -#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_ - -#include "arch/instruction_set.h" -#include "base/arena_object.h" -#include "base/array_ref.h" -#include "base/macros.h" -#include "base/scoped_arena_containers.h" -#include "register_allocator.h" - -namespace art HIDDEN { - -class CodeGenerator; -class HBasicBlock; -class HGraph; -class HInstruction; -class HParallelMove; -class Location; -class SsaLivenessAnalysis; -class InterferenceNode; -struct CoalesceOpportunity; -enum class CoalesceKind; - -/** - * A graph coloring register allocator. - * - * The algorithm proceeds as follows: - * (1) Build an interference graph, where nodes represent live intervals, and edges represent - * interferences between two intervals. Coloring this graph with k colors is isomorphic to - * finding a valid register assignment with k registers. - * (2) To color the graph, first prune all nodes with degree less than k, since these nodes are - * guaranteed a color. (No matter how we color their adjacent nodes, we can give them a - * different color.) As we prune nodes from the graph, more nodes may drop below degree k, - * enabling further pruning. The key is to maintain the pruning order in a stack, so that we - * can color the nodes in the reverse order. - * When there are no more nodes with degree less than k, we start pruning alternate nodes based - * on heuristics. Since these nodes are not guaranteed a color, we are careful to - * prioritize nodes that require a register. We also prioritize short intervals, because - * short intervals cannot be split very much if coloring fails (see below). "Prioritizing" - * a node amounts to pruning it later, since it will have fewer interferences if we prune other - * nodes first. - * (3) We color nodes in the reverse order in which we pruned them. If we cannot assign - * a node a color, we do one of two things: - * - If the node requires a register, we consider the current coloring attempt a failure. - * However, we split the node's live interval in order to make the interference graph - * sparser, so that future coloring attempts may succeed. - * - If the node does not require a register, we simply assign it a location on the stack. - * - * If iterative move coalescing is enabled, the algorithm also attempts to conservatively - * combine nodes in the graph that would prefer to have the same color. (For example, the output - * of a phi instruction would prefer to have the same register as at least one of its inputs.) - * There are several additional steps involved with this: - * - We look for coalesce opportunities by examining each live interval, a step similar to that - * used by linear scan when looking for register hints. - * - When pruning the graph, we maintain a worklist of coalesce opportunities, as well as a worklist - * of low degree nodes that have associated coalesce opportunities. Only when we run out of - * coalesce opportunities do we start pruning coalesce-associated nodes. - * - When pruning a node, if any nodes transition from high degree to low degree, we add - * associated coalesce opportunities to the worklist, since these opportunities may now succeed. - * - Whether two nodes can be combined is decided by two different heuristics--one used when - * coalescing uncolored nodes, and one used for coalescing an uncolored node with a colored node. - * It is vital that we only combine two nodes if the node that remains is guaranteed to receive - * a color. This is because additionally spilling is more costly than failing to coalesce. - * - Even if nodes are not coalesced while pruning, we keep the coalesce opportunities around - * to be used as last-chance register hints when coloring. If nothing else, we try to use - * caller-save registers before callee-save registers. - * - * A good reference for graph coloring register allocation is - * "Modern Compiler Implementation in Java" (Andrew W. Appel, 2nd Edition). - */ -class RegisterAllocatorGraphColor : public RegisterAllocator { - public: - RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator, - CodeGenerator* codegen, - const SsaLivenessAnalysis& analysis, - bool iterative_move_coalescing = true); - ~RegisterAllocatorGraphColor() override; - - void AllocateRegisters() override; - - bool Validate(bool log_fatal_on_failure) override; - - private: - // Collect all intervals and prepare for register allocation. - void ProcessInstructions(); - void ProcessInstruction(HInstruction* instruction); - - // If any inputs require specific registers, block those registers - // at the position of this instruction. - void CheckForFixedInputs(HInstruction* instruction); - - // If the output of an instruction requires a specific register, split - // the interval and assign the register to the first part. - void CheckForFixedOutput(HInstruction* instruction); - - // Add all applicable safepoints to a live interval. - // Currently depends on instruction processing order. - void AddSafepointsFor(HInstruction* instruction); - - // Collect all live intervals associated with the temporary locations - // needed by an instruction. - void CheckForTempLiveIntervals(HInstruction* instruction); - - // If a safe point is needed, add a synthesized interval to later record - // the number of live registers at this point. - void CheckForSafepoint(HInstruction* instruction); - - // Try to remove the SuspendCheck at function entry. Returns true if it was successful. - bool TryRemoveSuspendCheckEntry(HInstruction* instruction); - - // Split an interval, but only if `position` is inside of `interval`. - // Return either the new interval, or the original interval if not split. - static LiveInterval* TrySplit(LiveInterval* interval, size_t position); - - // To ensure every graph can be colored, split live intervals - // at their register defs and uses. This creates short intervals with low - // degree in the interference graph, which are prioritized during graph - // coloring. - void SplitAtRegisterUses(LiveInterval* interval); - - // If the given instruction is a catch phi, give it a spill slot. - void AllocateSpillSlotForCatchPhi(HInstruction* instruction); - - // Ensure that the given register cannot be allocated for a given range. - void BlockRegister(Location location, size_t start, size_t end); - void BlockRegisters(size_t start, size_t end, bool caller_save_only = false); - - bool IsCallerSave(size_t reg, bool processing_core_regs); - - // Assigns stack slots to a list of intervals, ensuring that interfering intervals are not - // assigned the same stack slot. - void ColorSpillSlots(ArrayRef<LiveInterval* const> nodes, /* out */ size_t* num_stack_slots_used); - - // Provide stack slots to nodes that need them. - void AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes); - - // Whether iterative move coalescing should be performed. Iterative move coalescing - // improves code quality, but increases compile time. - const bool iterative_move_coalescing_; - - // Live intervals, split by kind (core and floating point). - // These should not contain high intervals, as those are represented by - // the corresponding low interval throughout register allocation. - ScopedArenaVector<LiveInterval*> core_intervals_; - ScopedArenaVector<LiveInterval*> fp_intervals_; - - // Intervals for temporaries, saved for special handling in the resolution phase. - ScopedArenaVector<LiveInterval*> temp_intervals_; - - // Safepoints, saved for special handling while processing instructions. - ScopedArenaVector<HInstruction*> safepoints_; - - // Interference nodes representing specific registers. These are "pre-colored" nodes - // in the interference graph. - ScopedArenaVector<InterferenceNode*> physical_core_nodes_; - ScopedArenaVector<InterferenceNode*> physical_fp_nodes_; - - // Allocated stack slot counters. - size_t num_int_spill_slots_; - size_t num_double_spill_slots_; - size_t num_float_spill_slots_; - size_t num_long_spill_slots_; - size_t catch_phi_spill_slot_counter_; - - // Number of stack slots needed for the pointer to the current method. - // This is 1 for 32-bit architectures, and 2 for 64-bit architectures. - const size_t reserved_art_method_slots_; - - // Number of stack slots needed for outgoing arguments. - const size_t reserved_out_slots_; - - friend class ColoringIteration; - - DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorGraphColor); -}; - -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_ diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index fcdaa2d34f..a3029f56c6 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -355,9 +355,14 @@ void RegisterAllocatorLinearScan::CheckForFixedInputs(HInstruction* instruction) Location input = locations->InAt(i); if (input.IsRegister() || input.IsFpuRegister()) { BlockRegister(input, position, position + 1); + // Ensure that an explicit input register is marked as being allocated. + codegen_->AddAllocatedRegister(input); } else if (input.IsPair()) { BlockRegister(input.ToLow(), position, position + 1); BlockRegister(input.ToHigh(), position, position + 1); + // Ensure that an explicit input register pair is marked as being allocated. + codegen_->AddAllocatedRegister(input.ToLow()); + codegen_->AddAllocatedRegister(input.ToHigh()); } } } @@ -417,6 +422,8 @@ void RegisterAllocatorLinearScan::CheckForFixedOutput(HInstruction* instruction) current->SetFrom(position + 1); current->SetRegister(output.reg()); BlockRegister(output, position, position + 1); + // Ensure that an explicit output register is marked as being allocated. + codegen_->AddAllocatedRegister(output); } else if (output.IsPair()) { current->SetFrom(position + 1); current->SetRegister(output.low()); @@ -425,6 +432,9 @@ void RegisterAllocatorLinearScan::CheckForFixedOutput(HInstruction* instruction) high->SetFrom(position + 1); BlockRegister(output.ToLow(), position, position + 1); BlockRegister(output.ToHigh(), position, position + 1); + // Ensure that an explicit output register pair is marked as being allocated. + codegen_->AddAllocatedRegister(output.ToLow()); + codegen_->AddAllocatedRegister(output.ToHigh()); } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) { current->SetSpillSlot(output.GetStackIndex()); } else { @@ -1208,8 +1218,7 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) { LiveInterval* interval = phi->GetLiveInterval(); HInstruction* previous_phi = phi->GetPrevious(); - DCHECK(previous_phi == nullptr || - previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber()) + DCHECK(previous_phi == nullptr || previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber()) << "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent."; if (phi->IsVRegEquivalentOf(previous_phi)) { diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index d316aa5dc2..0d2d20682d 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -84,7 +84,8 @@ class RegisterAllocatorTest : public CommonCompilerTest, public OptimizingUnitTe TEST_F(RegisterAllocatorTest, test_name##_LinearScan) {\ test_name(Strategy::kRegisterAllocatorLinearScan);\ }\ -TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\ +/* Note: Graph coloring register allocator has been removed, so the test is DISABLED. */ \ +TEST_F(RegisterAllocatorTest, DISABLED_##test_name##_GraphColor) {\ test_name(Strategy::kRegisterAllocatorGraphColor);\ } diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index 116f52605e..4c68844dbb 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -108,7 +108,6 @@ static bool IsArrayAccess(const HInstruction* instruction) { static bool IsInstanceFieldAccess(const HInstruction* instruction) { return instruction->IsInstanceFieldGet() || instruction->IsInstanceFieldSet() || - instruction->IsPredicatedInstanceFieldGet() || instruction->IsUnresolvedInstanceFieldGet() || instruction->IsUnresolvedInstanceFieldSet(); } @@ -123,7 +122,6 @@ static bool IsStaticFieldAccess(const HInstruction* instruction) { static bool IsResolvedFieldAccess(const HInstruction* instruction) { return instruction->IsInstanceFieldGet() || instruction->IsInstanceFieldSet() || - instruction->IsPredicatedInstanceFieldGet() || instruction->IsStaticFieldGet() || instruction->IsStaticFieldSet(); } @@ -149,9 +147,7 @@ size_t SideEffectDependencyAnalysis::MemoryDependencyAnalysis::FieldAccessHeapLo DCHECK(GetFieldInfo(instr) != nullptr); DCHECK(heap_location_collector_ != nullptr); - HInstruction* ref = instr->IsPredicatedInstanceFieldGet() - ? instr->AsPredicatedInstanceFieldGet()->GetTarget() - : instr->InputAt(0); + HInstruction* ref = instr->InputAt(0); size_t heap_loc = heap_location_collector_->GetFieldHeapLocation(ref, GetFieldInfo(instr)); // This field access should be analyzed and added to HeapLocationCollector before. DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound); @@ -490,9 +486,9 @@ SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition( DCHECK(instruction != nullptr); if (instruction->IsIf()) { - condition = instruction->AsIf()->InputAt(0)->AsCondition(); + condition = instruction->AsIf()->InputAt(0)->AsConditionOrNull(); } else if (instruction->IsSelect()) { - condition = instruction->AsSelect()->GetCondition()->AsCondition(); + condition = instruction->AsSelect()->GetCondition()->AsConditionOrNull(); } SchedulingNode* condition_node = (condition != nullptr) ? graph.GetNode(condition) : nullptr; @@ -554,7 +550,7 @@ void HScheduler::Schedule(HGraph* graph) { // should run the analysis or not. const HeapLocationCollector* heap_location_collector = nullptr; ScopedArenaAllocator allocator(graph->GetArenaStack()); - LoadStoreAnalysis lsa(graph, /*stats=*/nullptr, &allocator, LoadStoreAnalysisType::kBasic); + LoadStoreAnalysis lsa(graph, /*stats=*/nullptr, &allocator); if (!only_optimize_loop_blocks_ || graph->HasLoops()) { lsa.Run(); heap_location_collector = &lsa.GetHeapLocationCollector(); @@ -734,8 +730,6 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { instruction->IsCurrentMethod() || instruction->IsDivZeroCheck() || (instruction->IsInstanceFieldGet() && !instruction->AsInstanceFieldGet()->IsVolatile()) || - (instruction->IsPredicatedInstanceFieldGet() && - !instruction->AsPredicatedInstanceFieldGet()->IsVolatile()) || (instruction->IsInstanceFieldSet() && !instruction->AsInstanceFieldSet()->IsVolatile()) || instruction->IsInstanceOf() || instruction->IsInvokeInterface() || diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc index 3f931c4c49..cafb0f5da6 100644 --- a/compiler/optimizing/scheduler_arm.cc +++ b/compiler/optimizing/scheduler_arm.cc @@ -610,7 +610,7 @@ void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifte } } -void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM::VisitIntermediateAddress([[maybe_unused]] HIntermediateAddress*) { // Although the code generated is a simple `add` instruction, we found through empirical results // that spacing it from its use in memory accesses was beneficial. last_visited_internal_latency_ = kArmNopLatency; @@ -618,11 +618,11 @@ void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* } void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex( - HIntermediateAddressIndex* ATTRIBUTE_UNUSED) { + [[maybe_unused]] HIntermediateAddressIndex*) { UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM"; } -void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate([[maybe_unused]] HMultiplyAccumulate*) { last_visited_latency_ = kArmMulIntegerLatency; } @@ -669,7 +669,7 @@ void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) { } case DataType::Type::kReference: { - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency; } else { if (index->IsConstant()) { @@ -806,7 +806,7 @@ void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) { } } -void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM::VisitBoundsCheck([[maybe_unused]] HBoundsCheck*) { last_visited_internal_latency_ = kArmIntegerOpLatency; // Users do not use any data results. last_visited_latency_ = 0; @@ -853,11 +853,6 @@ void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) { } } -void SchedulingLatencyVisitorARM::VisitPredicatedInstanceFieldGet( - HPredicatedInstanceFieldGet* instruction) { - HandleFieldGetLatencies(instruction, instruction->GetFieldInfo()); -} - void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { HandleFieldGetLatencies(instruction, instruction->GetFieldInfo()); } @@ -866,22 +861,22 @@ void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instr HandleFieldSetLatencies(instruction, instruction->GetFieldInfo()); } -void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM::VisitInstanceOf([[maybe_unused]] HInstanceOf*) { last_visited_internal_latency_ = kArmCallInternalLatency; last_visited_latency_ = kArmIntegerOpLatency; } -void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM::VisitInvoke([[maybe_unused]] HInvoke*) { last_visited_internal_latency_ = kArmCallInternalLatency; last_visited_latency_ = kArmCallLatency; } -void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM::VisitLoadString([[maybe_unused]] HLoadString*) { last_visited_internal_latency_ = kArmLoadStringInternalLatency; last_visited_latency_ = kArmMemoryLoadLatency; } -void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM::VisitNewArray([[maybe_unused]] HNewArray*) { last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency; last_visited_latency_ = kArmCallLatency; } @@ -918,9 +913,7 @@ void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) { void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info) { - DCHECK(instruction->IsInstanceFieldGet() || - instruction->IsStaticFieldGet() || - instruction->IsPredicatedInstanceFieldGet()); + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); DCHECK(codegen_ != nullptr); bool is_volatile = field_info.IsVolatile(); DataType::Type field_type = field_info.GetFieldType(); @@ -937,7 +930,7 @@ void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruct break; case DataType::Type::kReference: - if (gUseReadBarrier && kUseBakerReadBarrier) { + if (codegen_->EmitBakerReadBarrier()) { last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency; last_visited_latency_ = kArmMemoryLoadLatency; } else { diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h index 0da21c187f..cf00fa12a3 100644 --- a/compiler/optimizing/scheduler_arm.h +++ b/compiler/optimizing/scheduler_arm.h @@ -53,7 +53,7 @@ class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor { : codegen_(down_cast<CodeGeneratorARMVIXL*>(codegen)) {} // Default visitor for instructions not handled specifically below. - void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { + void VisitInstruction([[maybe_unused]] HInstruction*) override { last_visited_latency_ = kArmIntegerOpLatency; } @@ -77,7 +77,6 @@ class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor { M(Condition, unused) \ M(Compare, unused) \ M(BoundsCheck, unused) \ - M(PredicatedInstanceFieldGet, unused) \ M(InstanceFieldGet, unused) \ M(InstanceFieldSet, unused) \ M(InstanceOf, unused) \ diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc index 3071afd951..5113cf446d 100644 --- a/compiler/optimizing/scheduler_arm64.cc +++ b/compiler/optimizing/scheduler_arm64.cc @@ -30,30 +30,30 @@ void SchedulingLatencyVisitorARM64::VisitBinaryOperation(HBinaryOperation* instr } void SchedulingLatencyVisitorARM64::VisitBitwiseNegatedRight( - HBitwiseNegatedRight* ATTRIBUTE_UNUSED) { + [[maybe_unused]] HBitwiseNegatedRight*) { last_visited_latency_ = kArm64IntegerOpLatency; } void SchedulingLatencyVisitorARM64::VisitDataProcWithShifterOp( - HDataProcWithShifterOp* ATTRIBUTE_UNUSED) { + [[maybe_unused]] HDataProcWithShifterOp*) { last_visited_latency_ = kArm64DataProcWithShifterOpLatency; } void SchedulingLatencyVisitorARM64::VisitIntermediateAddress( - HIntermediateAddress* ATTRIBUTE_UNUSED) { + [[maybe_unused]] HIntermediateAddress*) { // Although the code generated is a simple `add` instruction, we found through empirical results // that spacing it from its use in memory accesses was beneficial. last_visited_latency_ = kArm64IntegerOpLatency + 2; } void SchedulingLatencyVisitorARM64::VisitIntermediateAddressIndex( - HIntermediateAddressIndex* instr ATTRIBUTE_UNUSED) { + [[maybe_unused]] HIntermediateAddressIndex* instr) { // Although the code generated is a simple `add` instruction, we found through empirical results // that spacing it from its use in memory accesses was beneficial. last_visited_latency_ = kArm64DataProcWithShifterOpLatency + 2; } -void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate([[maybe_unused]] HMultiplyAccumulate*) { last_visited_latency_ = kArm64MulIntegerLatency; } @@ -65,15 +65,15 @@ void SchedulingLatencyVisitorARM64::VisitArrayGet(HArrayGet* instruction) { last_visited_latency_ = kArm64MemoryLoadLatency; } -void SchedulingLatencyVisitorARM64::VisitArrayLength(HArrayLength* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitArrayLength([[maybe_unused]] HArrayLength*) { last_visited_latency_ = kArm64MemoryLoadLatency; } -void SchedulingLatencyVisitorARM64::VisitArraySet(HArraySet* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitArraySet([[maybe_unused]] HArraySet*) { last_visited_latency_ = kArm64MemoryStoreLatency; } -void SchedulingLatencyVisitorARM64::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitBoundsCheck([[maybe_unused]] HBoundsCheck*) { last_visited_internal_latency_ = kArm64IntegerOpLatency; // Users do not use any data results. last_visited_latency_ = 0; @@ -113,21 +113,21 @@ void SchedulingLatencyVisitorARM64::VisitDiv(HDiv* instr) { } } -void SchedulingLatencyVisitorARM64::VisitInstanceFieldGet(HInstanceFieldGet* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitInstanceFieldGet([[maybe_unused]] HInstanceFieldGet*) { last_visited_latency_ = kArm64MemoryLoadLatency; } -void SchedulingLatencyVisitorARM64::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitInstanceOf([[maybe_unused]] HInstanceOf*) { last_visited_internal_latency_ = kArm64CallInternalLatency; last_visited_latency_ = kArm64IntegerOpLatency; } -void SchedulingLatencyVisitorARM64::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitInvoke([[maybe_unused]] HInvoke*) { last_visited_internal_latency_ = kArm64CallInternalLatency; last_visited_latency_ = kArm64CallLatency; } -void SchedulingLatencyVisitorARM64::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitLoadString([[maybe_unused]] HLoadString*) { last_visited_internal_latency_ = kArm64LoadStringInternalLatency; last_visited_latency_ = kArm64MemoryLoadLatency; } @@ -138,7 +138,7 @@ void SchedulingLatencyVisitorARM64::VisitMul(HMul* instr) { : kArm64MulIntegerLatency; } -void SchedulingLatencyVisitorARM64::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitNewArray([[maybe_unused]] HNewArray*) { last_visited_internal_latency_ = kArm64IntegerOpLatency + kArm64CallInternalLatency; last_visited_latency_ = kArm64CallLatency; } @@ -181,7 +181,7 @@ void SchedulingLatencyVisitorARM64::VisitRem(HRem* instruction) { } } -void SchedulingLatencyVisitorARM64::VisitStaticFieldGet(HStaticFieldGet* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitStaticFieldGet([[maybe_unused]] HStaticFieldGet*) { last_visited_latency_ = kArm64MemoryLoadLatency; } @@ -211,7 +211,7 @@ void SchedulingLatencyVisitorARM64::HandleSimpleArithmeticSIMD(HVecOperation *in } void SchedulingLatencyVisitorARM64::VisitVecReplicateScalar( - HVecReplicateScalar* instr ATTRIBUTE_UNUSED) { + [[maybe_unused]] HVecReplicateScalar* instr) { last_visited_latency_ = kArm64SIMDReplicateOpLatency; } @@ -223,7 +223,7 @@ void SchedulingLatencyVisitorARM64::VisitVecReduce(HVecReduce* instr) { HandleSimpleArithmeticSIMD(instr); } -void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitVecCnv([[maybe_unused]] HVecCnv* instr) { last_visited_latency_ = kArm64SIMDTypeConversionInt2FPLatency; } @@ -279,19 +279,19 @@ void SchedulingLatencyVisitorARM64::VisitVecMax(HVecMax* instr) { HandleSimpleArithmeticSIMD(instr); } -void SchedulingLatencyVisitorARM64::VisitVecAnd(HVecAnd* instr ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitVecAnd([[maybe_unused]] HVecAnd* instr) { last_visited_latency_ = kArm64SIMDIntegerOpLatency; } -void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitVecAndNot([[maybe_unused]] HVecAndNot* instr) { last_visited_latency_ = kArm64SIMDIntegerOpLatency; } -void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitVecOr([[maybe_unused]] HVecOr* instr) { last_visited_latency_ = kArm64SIMDIntegerOpLatency; } -void SchedulingLatencyVisitorARM64::VisitVecXor(HVecXor* instr ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitVecXor([[maybe_unused]] HVecXor* instr) { last_visited_latency_ = kArm64SIMDIntegerOpLatency; } @@ -312,13 +312,12 @@ void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) { } void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate( - HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) { + [[maybe_unused]] HVecMultiplyAccumulate* instr) { last_visited_latency_ = kArm64SIMDMulIntegerLatency; } -void SchedulingLatencyVisitorARM64::HandleVecAddress( - HVecMemoryOperation* instruction, - size_t size ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::HandleVecAddress(HVecMemoryOperation* instruction, + [[maybe_unused]] size_t size) { HInstruction* index = instruction->InputAt(1); if (!index->IsConstant()) { last_visited_internal_latency_ += kArm64DataProcWithShifterOpLatency; diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h index ec41577e9d..7ce00e00ab 100644 --- a/compiler/optimizing/scheduler_arm64.h +++ b/compiler/optimizing/scheduler_arm64.h @@ -59,7 +59,7 @@ static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10; class SchedulingLatencyVisitorARM64 final : public SchedulingLatencyVisitor { public: // Default visitor for instructions not handled specifically below. - void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { + void VisitInstruction([[maybe_unused]] HInstruction*) override { last_visited_latency_ = kArm64IntegerOpLatency; } diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc index 165bfe3d94..c2b1fd6f7c 100644 --- a/compiler/optimizing/scheduler_test.cc +++ b/compiler/optimizing/scheduler_test.cc @@ -274,8 +274,7 @@ class SchedulerTest : public CommonCompilerTest, public OptimizingUnitTestHelper entry->AddInstruction(instr); } - HeapLocationCollector heap_location_collector( - graph_, GetScopedAllocator(), LoadStoreAnalysisType::kBasic); + HeapLocationCollector heap_location_collector(graph_, GetScopedAllocator()); heap_location_collector.VisitBasicBlock(entry); heap_location_collector.BuildAliasingMatrix(); TestSchedulingGraph scheduling_graph(GetScopedAllocator(), &heap_location_collector); diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc index 6a10440d11..07065efbb7 100644 --- a/compiler/optimizing/select_generator.cc +++ b/compiler/optimizing/select_generator.cc @@ -46,8 +46,7 @@ static bool IsSimpleBlock(HBasicBlock* block) { } else if (instruction->CanBeMoved() && !instruction->HasSideEffects() && !instruction->CanThrow()) { - if (instruction->IsSelect() && - instruction->AsSelect()->GetCondition()->GetBlock() == block) { + if (instruction->IsSelect() && instruction->AsSelect()->GetCondition()->GetBlock() == block) { // Count one HCondition and HSelect in the same block as a single instruction. // This enables finding nested selects. continue; diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index a658252e69..2179bf50b5 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -604,7 +604,7 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { */ HFloatConstant* SsaBuilder::GetFloatEquivalent(HIntConstant* constant) { // We place the floating point constant next to this constant. - HFloatConstant* result = constant->GetNext()->AsFloatConstant(); + HFloatConstant* result = constant->GetNext()->AsFloatConstantOrNull(); if (result == nullptr) { float value = bit_cast<float, int32_t>(constant->GetValue()); result = new (graph_->GetAllocator()) HFloatConstant(value); @@ -626,7 +626,7 @@ HFloatConstant* SsaBuilder::GetFloatEquivalent(HIntConstant* constant) { */ HDoubleConstant* SsaBuilder::GetDoubleEquivalent(HLongConstant* constant) { // We place the floating point constant next to this constant. - HDoubleConstant* result = constant->GetNext()->AsDoubleConstant(); + HDoubleConstant* result = constant->GetNext()->AsDoubleConstantOrNull(); if (result == nullptr) { double value = bit_cast<double, int64_t>(constant->GetValue()); result = new (graph_->GetAllocator()) HDoubleConstant(value); @@ -652,16 +652,16 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, DataType:: // We place the floating point /reference phi next to this phi. HInstruction* next = phi->GetNext(); - if (next != nullptr - && next->AsPhi()->GetRegNumber() == phi->GetRegNumber() - && next->GetType() != type) { + if (next != nullptr && + next->AsPhi()->GetRegNumber() == phi->GetRegNumber() && + next->GetType() != type) { // Move to the next phi to see if it is the one we are looking for. next = next->GetNext(); } - if (next == nullptr - || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber()) - || (next->GetType() != type)) { + if (next == nullptr || + (next->AsPhi()->GetRegNumber() != phi->GetRegNumber()) || + (next->GetType() != type)) { ArenaAllocator* allocator = graph_->GetAllocator(); HInputsRef inputs = phi->GetInputs(); HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), inputs.size(), type); diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc index 2df0f34c7d..18c945381d 100644 --- a/compiler/optimizing/ssa_liveness_analysis_test.cc +++ b/compiler/optimizing/ssa_liveness_analysis_test.cc @@ -31,6 +31,7 @@ namespace art HIDDEN { class SsaLivenessAnalysisTest : public OptimizingUnitTest { protected: void SetUp() override { + TEST_SETUP_DISABLED_FOR_RISCV64(); OptimizingUnitTest::SetUp(); graph_ = CreateGraph(); compiler_options_ = CommonCompilerTest::CreateCompilerOptions(kRuntimeISA, "default"); @@ -42,6 +43,11 @@ class SsaLivenessAnalysisTest : public OptimizingUnitTest { graph_->SetEntryBlock(entry_); } + void TearDown() override { + TEST_TEARDOWN_DISABLED_FOR_RISCV64(); + OptimizingUnitTest::TearDown(); + } + protected: HBasicBlock* CreateSuccessor(HBasicBlock* block) { HGraph* graph = block->GetGraph(); @@ -58,6 +64,7 @@ class SsaLivenessAnalysisTest : public OptimizingUnitTest { }; TEST_F(SsaLivenessAnalysisTest, TestReturnArg) { + TEST_DISABLED_FOR_RISCV64(); HInstruction* arg = new (GetAllocator()) HParameterValue( graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32); entry_->AddInstruction(arg); @@ -78,6 +85,7 @@ TEST_F(SsaLivenessAnalysisTest, TestReturnArg) { } TEST_F(SsaLivenessAnalysisTest, TestAput) { + TEST_DISABLED_FOR_RISCV64(); HInstruction* array = new (GetAllocator()) HParameterValue( graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); HInstruction* index = new (GetAllocator()) HParameterValue( @@ -147,6 +155,7 @@ TEST_F(SsaLivenessAnalysisTest, TestAput) { } TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) { + TEST_DISABLED_FOR_RISCV64(); HInstruction* array = new (GetAllocator()) HParameterValue( graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); HInstruction* index = new (GetAllocator()) HParameterValue( diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index ce343dffec..1d9be3956a 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -76,7 +76,7 @@ void SsaDeadPhiElimination::MarkDeadPhis() { HPhi* phi = worklist.back(); worklist.pop_back(); for (HInstruction* raw_input : phi->GetInputs()) { - HPhi* input = raw_input->AsPhi(); + HPhi* input = raw_input->AsPhiOrNull(); if (input != nullptr && input->IsDead()) { // Input is a dead phi. Revive it and add to the worklist. We make sure // that the phi was not dead initially (see definition of `initially_live`). diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 1a368ed347..2ecda7610e 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -51,7 +51,8 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes, size_t fp_spill_mask, uint32_t num_dex_registers, bool baseline, - bool debuggable) { + bool debuggable, + bool has_should_deoptimize_flag) { DCHECK(!in_method_) << "Mismatched Begin/End calls"; in_method_ = true; DCHECK_EQ(packed_frame_size_, 0u) << "BeginMethod was already called"; @@ -63,6 +64,7 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes, num_dex_registers_ = num_dex_registers; baseline_ = baseline; debuggable_ = debuggable; + has_should_deoptimize_flag_ = has_should_deoptimize_flag; if (kVerifyStackMaps) { dchecks_.emplace_back([=](const CodeInfo& code_info) { @@ -152,8 +154,10 @@ void StackMapStream::BeginStackMapEntry( // Create lambda method, which will be executed at the very end to verify data. // Parameters and local variables will be captured(stored) by the lambda "[=]". dchecks_.emplace_back([=](const CodeInfo& code_info) { + // The `native_pc_offset` may have been overridden using `SetStackMapNativePcOffset(.)`. + uint32_t final_native_pc_offset = GetStackMapNativePcOffset(stack_map_index); if (kind == StackMap::Kind::Default || kind == StackMap::Kind::OSR) { - StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, + StackMap stack_map = code_info.GetStackMapForNativePcOffset(final_native_pc_offset, instruction_set_); CHECK_EQ(stack_map.Row(), stack_map_index); } else if (kind == StackMap::Kind::Catch) { @@ -162,7 +166,7 @@ void StackMapStream::BeginStackMapEntry( CHECK_EQ(stack_map.Row(), stack_map_index); } StackMap stack_map = code_info.GetStackMapAt(stack_map_index); - CHECK_EQ(stack_map.GetNativePcOffset(instruction_set_), native_pc_offset); + CHECK_EQ(stack_map.GetNativePcOffset(instruction_set_), final_native_pc_offset); CHECK_EQ(stack_map.GetKind(), static_cast<uint32_t>(kind)); CHECK_EQ(stack_map.GetDexPc(), dex_pc); CHECK_EQ(code_info.GetRegisterMaskOf(stack_map), register_mask); @@ -374,10 +378,12 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() { DCHECK(in_stack_map_ == false) << "Mismatched Begin/End calls"; DCHECK(in_inline_info_ == false) << "Mismatched Begin/End calls"; - uint32_t flags = (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0; + uint32_t flags = 0; + flags |= (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0; flags |= baseline_ ? CodeInfo::kIsBaseline : 0; flags |= debuggable_ ? CodeInfo::kIsDebuggable : 0; - DCHECK_LE(flags, kVarintMax); // Ensure flags can be read directly as byte. + flags |= has_should_deoptimize_flag_ ? CodeInfo::kHasShouldDeoptimizeFlag : 0; + uint32_t bit_table_flags = 0; ForEachBitTable([&bit_table_flags](size_t i, auto bit_table) { if (bit_table->size() != 0) { // Record which bit-tables are stored. @@ -409,6 +415,8 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() { CHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size()); CHECK_EQ(CodeInfo::HasInlineInfo(buffer.data()), inline_infos_.size() > 0); CHECK_EQ(CodeInfo::IsBaseline(buffer.data()), baseline_); + CHECK_EQ(CodeInfo::IsDebuggable(buffer.data()), debuggable_); + CHECK_EQ(CodeInfo::HasShouldDeoptimizeFlag(buffer.data()), has_should_deoptimize_flag_); // Verify all written data (usually only in debug builds). if (kVerifyStackMaps) { diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 643af2da94..f027850ce6 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -66,7 +66,8 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { size_t fp_spill_mask, uint32_t num_dex_registers, bool baseline, - bool debuggable); + bool debuggable, + bool has_should_deoptimize_flag = false); void EndMethod(size_t code_size); void BeginStackMapEntry( @@ -129,8 +130,9 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { uint32_t core_spill_mask_ = 0; uint32_t fp_spill_mask_ = 0; uint32_t num_dex_registers_ = 0; - bool baseline_; - bool debuggable_; + bool baseline_ = false; + bool debuggable_ = false; + bool has_should_deoptimize_flag_ = false; BitTableBuilder<StackMap> stack_maps_; BitTableBuilder<RegisterMask> register_masks_; BitmapTableBuilder stack_masks_; diff --git a/compiler/optimizing/write_barrier_elimination.cc b/compiler/optimizing/write_barrier_elimination.cc index eb70b670fe..6182125b74 100644 --- a/compiler/optimizing/write_barrier_elimination.cc +++ b/compiler/optimizing/write_barrier_elimination.cc @@ -21,6 +21,9 @@ #include "base/scoped_arena_containers.h" #include "optimizing/nodes.h" +// TODO(b/310755375, solanes): Disable WBE while we investigate crashes. +constexpr bool kWBEEnabled = false; + namespace art HIDDEN { class WBEVisitor final : public HGraphVisitor { @@ -153,8 +156,10 @@ class WBEVisitor final : public HGraphVisitor { }; bool WriteBarrierElimination::Run() { - WBEVisitor wbe_visitor(graph_, stats_); - wbe_visitor.VisitReversePostOrder(); + if (kWBEEnabled) { + WBEVisitor wbe_visitor(graph_, stats_); + wbe_visitor.VisitReversePostOrder(); + } return true; } diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc index e266618980..d86869ce0f 100644 --- a/compiler/optimizing/x86_memory_gen.cc +++ b/compiler/optimizing/x86_memory_gen.cc @@ -33,7 +33,7 @@ class MemoryOperandVisitor final : public HGraphVisitor { private: void VisitBoundsCheck(HBoundsCheck* check) override { // Replace the length by the array itself, so that we can do compares to memory. - HArrayLength* array_len = check->InputAt(1)->AsArrayLength(); + HArrayLength* array_len = check->InputAt(1)->AsArrayLengthOrNull(); // We only want to replace an ArrayLength. if (array_len == nullptr) { |