diff options
Diffstat (limited to 'compiler/optimizing')
166 files changed, 17037 insertions, 12575 deletions
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc index d9df23fd47..a5f78cafe0 100644 --- a/compiler/optimizing/block_builder.cc +++ b/compiler/optimizing/block_builder.cc @@ -68,7 +68,7 @@ bool HBasicBlockBuilder::CreateBranchTargets() { // places where the program might fall through into/out of the a block and // where TryBoundary instructions will be inserted later. Other edges which // enter/exit the try blocks are a result of branches/switches. - for (const DexFile::TryItem& try_item : code_item_accessor_.TryItems()) { + for (const dex::TryItem& try_item : code_item_accessor_.TryItems()) { uint32_t dex_pc_start = try_item.start_addr_; uint32_t dex_pc_end = dex_pc_start + try_item.insn_count_; MaybeCreateBlockAt(dex_pc_start); @@ -222,9 +222,9 @@ void HBasicBlockBuilder::ConnectBasicBlocks() { } // Returns the TryItem stored for `block` or nullptr if there is no info for it. -static const DexFile::TryItem* GetTryItem( +static const dex::TryItem* GetTryItem( HBasicBlock* block, - const ScopedArenaSafeMap<uint32_t, const DexFile::TryItem*>& try_block_info) { + const ScopedArenaSafeMap<uint32_t, const dex::TryItem*>& try_block_info) { auto iterator = try_block_info.find(block->GetBlockId()); return (iterator == try_block_info.end()) ? nullptr : iterator->second; } @@ -235,7 +235,7 @@ static const DexFile::TryItem* GetTryItem( // for a handler. static void LinkToCatchBlocks(HTryBoundary* try_boundary, const CodeItemDataAccessor& accessor, - const DexFile::TryItem* try_item, + const dex::TryItem* try_item, const ScopedArenaSafeMap<uint32_t, HBasicBlock*>& catch_blocks) { for (CatchHandlerIterator it(accessor.GetCatchHandlerData(try_item->handler_off_)); it.HasNext(); @@ -279,7 +279,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { // Keep a map of all try blocks and their respective TryItems. We do not use // the block's pointer but rather its id to ensure deterministic iteration. - ScopedArenaSafeMap<uint32_t, const DexFile::TryItem*> try_block_info( + ScopedArenaSafeMap<uint32_t, const dex::TryItem*> try_block_info( std::less<uint32_t>(), local_allocator_->Adapter(kArenaAllocGraphBuilder)); // Obtain TryItem information for blocks with throwing instructions, and split @@ -295,7 +295,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { // loop for synchronized blocks. if (ContainsElement(throwing_blocks_, block)) { // Try to find a TryItem covering the block. - const DexFile::TryItem* try_item = code_item_accessor_.FindTryItem(block->GetDexPc()); + const dex::TryItem* try_item = code_item_accessor_.FindTryItem(block->GetDexPc()); if (try_item != nullptr) { // Block throwing and in a TryItem. Store the try block information. try_block_info.Put(block->GetBlockId(), try_item); @@ -315,8 +315,16 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { CatchHandlerIterator iterator(handlers_ptr); for (; iterator.HasNext(); iterator.Next()) { uint32_t address = iterator.GetHandlerAddress(); - if (catch_blocks.find(address) != catch_blocks.end()) { + auto existing = catch_blocks.find(address); + if (existing != catch_blocks.end()) { // Catch block already processed. + TryCatchInformation* info = existing->second->GetTryCatchInformation(); + if (iterator.GetHandlerTypeIndex() != info->GetCatchTypeIndex()) { + // The handler is for multiple types. We could record all the types, but + // doing class resolution here isn't ideal, and it's unclear whether wasting + // the space in TryCatchInformation is worth it. + info->SetInvalidTypeIndex(); + } continue; } @@ -337,7 +345,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { catch_blocks.Put(address, catch_block); catch_block->SetTryCatchInformation( - new (allocator_) TryCatchInformation(iterator.GetHandlerTypeIndex(), *dex_file_)); + new (allocator_) TryCatchInformation(iterator.GetHandlerTypeIndex(), *dex_file_)); } handlers_ptr = iterator.EndDataPointer(); } @@ -348,7 +356,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { // that all predecessors are relinked to. This preserves loop headers (b/23895756). for (const auto& entry : try_block_info) { uint32_t block_id = entry.first; - const DexFile::TryItem* try_item = entry.second; + const dex::TryItem* try_item = entry.second; HBasicBlock* try_block = graph_->GetBlocks()[block_id]; for (HBasicBlock* predecessor : try_block->GetPredecessors()) { if (GetTryItem(predecessor, try_block_info) != try_item) { @@ -367,7 +375,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { // the successor is not in the same TryItem. for (const auto& entry : try_block_info) { uint32_t block_id = entry.first; - const DexFile::TryItem* try_item = entry.second; + const dex::TryItem* try_item = entry.second; HBasicBlock* try_block = graph_->GetBlocks()[block_id]; // NOTE: Do not use iterators because SplitEdge would invalidate them. for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) { @@ -415,7 +423,7 @@ void HBasicBlockBuilder::BuildIntrinsic() { // Create blocks. HBasicBlock* entry_block = new (allocator_) HBasicBlock(graph_, kNoDexPc); HBasicBlock* exit_block = new (allocator_) HBasicBlock(graph_, kNoDexPc); - HBasicBlock* body = MaybeCreateBlockAt(/* semantic_dex_pc */ kNoDexPc, /* store_dex_pc */ 0u); + HBasicBlock* body = MaybeCreateBlockAt(/* semantic_dex_pc= */ kNoDexPc, /* store_dex_pc= */ 0u); // Add blocks to the graph. graph_->AddBlock(entry_block); diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index d893cc88c4..e35d50220e 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -388,10 +388,10 @@ class MonotonicValueRange : public ValueRange { return induction_variable_->GetBlock(); } - MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; } + MonotonicValueRange* AsMonotonicValueRange() override { return this; } // If it's certain that this value range fits in other_range. - bool FitsIn(ValueRange* other_range) const OVERRIDE { + bool FitsIn(ValueRange* other_range) const override { if (other_range == nullptr) { return true; } @@ -402,7 +402,7 @@ class MonotonicValueRange : public ValueRange { // Try to narrow this MonotonicValueRange given another range. // Ideally it will return a normal ValueRange. But due to // possible overflow/underflow, that may not be possible. - ValueRange* Narrow(ValueRange* range) OVERRIDE { + ValueRange* Narrow(ValueRange* range) override { if (range == nullptr) { return this; } @@ -530,7 +530,7 @@ class BCEVisitor : public HGraphVisitor { induction_range_(induction_analysis), next_(nullptr) {} - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + void VisitBasicBlock(HBasicBlock* block) override { DCHECK(!IsAddedBlock(block)); first_index_bounds_check_map_.clear(); // Visit phis and instructions using a safe iterator. The iteration protects @@ -820,7 +820,7 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE { + void VisitBoundsCheck(HBoundsCheck* bounds_check) override { HBasicBlock* block = bounds_check->GetBlock(); HInstruction* index = bounds_check->InputAt(0); HInstruction* array_length = bounds_check->InputAt(1); @@ -845,8 +845,10 @@ class BCEVisitor : public HGraphVisitor { // make one more attempt to get a constant in the array range. ValueRange* existing_range = LookupValueRange(array_length, block); if (existing_range != nullptr && - existing_range->IsConstantValueRange()) { - ValueRange constant_array_range(&allocator_, lower, existing_range->GetLower()); + existing_range->IsConstantValueRange() && + existing_range->GetLower().GetConstant() > 0) { + ValueBound constant_upper(nullptr, existing_range->GetLower().GetConstant() - 1); + ValueRange constant_array_range(&allocator_, lower, constant_upper); if (index_range->FitsIn(&constant_array_range)) { ReplaceInstruction(bounds_check, index); return; @@ -945,7 +947,7 @@ class BCEVisitor : public HGraphVisitor { return true; } - void VisitPhi(HPhi* phi) OVERRIDE { + void VisitPhi(HPhi* phi) override { if (phi->IsLoopHeaderPhi() && (phi->GetType() == DataType::Type::kInt32) && HasSameInputAtBackEdges(phi)) { @@ -992,14 +994,14 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitIf(HIf* instruction) OVERRIDE { + void VisitIf(HIf* instruction) override { if (instruction->InputAt(0)->IsCondition()) { HCondition* cond = instruction->InputAt(0)->AsCondition(); HandleIf(instruction, cond->GetLeft(), cond->GetRight(), cond->GetCondition()); } } - void VisitAdd(HAdd* add) OVERRIDE { + void VisitAdd(HAdd* add) override { HInstruction* right = add->GetRight(); if (right->IsIntConstant()) { ValueRange* left_range = LookupValueRange(add->GetLeft(), add->GetBlock()); @@ -1013,7 +1015,7 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitSub(HSub* sub) OVERRIDE { + void VisitSub(HSub* sub) override { HInstruction* left = sub->GetLeft(); HInstruction* right = sub->GetRight(); if (right->IsIntConstant()) { @@ -1115,19 +1117,19 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitDiv(HDiv* div) OVERRIDE { + void VisitDiv(HDiv* div) override { FindAndHandlePartialArrayLength(div); } - void VisitShr(HShr* shr) OVERRIDE { + void VisitShr(HShr* shr) override { FindAndHandlePartialArrayLength(shr); } - void VisitUShr(HUShr* ushr) OVERRIDE { + void VisitUShr(HUShr* ushr) override { FindAndHandlePartialArrayLength(ushr); } - void VisitAnd(HAnd* instruction) OVERRIDE { + void VisitAnd(HAnd* instruction) override { if (instruction->GetRight()->IsIntConstant()) { int32_t constant = instruction->GetRight()->AsIntConstant()->GetValue(); if (constant > 0) { @@ -1142,7 +1144,7 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitRem(HRem* instruction) OVERRIDE { + void VisitRem(HRem* instruction) override { HInstruction* left = instruction->GetLeft(); HInstruction* right = instruction->GetRight(); @@ -1202,7 +1204,7 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitNewArray(HNewArray* new_array) OVERRIDE { + void VisitNewArray(HNewArray* new_array) override { HInstruction* len = new_array->GetLength(); if (!len->IsIntConstant()) { HInstruction *left; @@ -1240,7 +1242,7 @@ class BCEVisitor : public HGraphVisitor { * has occurred (see AddCompareWithDeoptimization()), since in those cases it would be * unsafe to hoist array references across their deoptimization instruction inside a loop. */ - void VisitArrayGet(HArrayGet* array_get) OVERRIDE { + void VisitArrayGet(HArrayGet* array_get) override { if (!has_dom_based_dynamic_bce_ && array_get->IsInLoop()) { HLoopInformation* loop = array_get->GetBlock()->GetLoopInformation(); if (loop->IsDefinedOutOfTheLoop(array_get->InputAt(0)) && @@ -1634,7 +1636,7 @@ class BCEVisitor : public HGraphVisitor { HBasicBlock* block = GetPreHeader(loop, check); HInstruction* cond = new (GetGraph()->GetAllocator()) HEqual(array, GetGraph()->GetNullConstant()); - InsertDeoptInLoop(loop, block, cond, /* is_null_check */ true); + InsertDeoptInLoop(loop, block, cond, /* is_null_check= */ true); ReplaceInstruction(check, array); return true; } @@ -1938,9 +1940,9 @@ class BCEVisitor : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(BCEVisitor); }; -void BoundsCheckElimination::Run() { +bool BoundsCheckElimination::Run() { if (!graph_->HasBoundsChecks()) { - return; + return false; } // Reverse post order guarantees a node's dominators are visited first. @@ -1968,6 +1970,8 @@ void BoundsCheckElimination::Run() { // Perform cleanup. visitor.Finish(); + + return true; } } // namespace art diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h index 79c67a8c7a..ef08877daa 100644 --- a/compiler/optimizing/bounds_check_elimination.h +++ b/compiler/optimizing/bounds_check_elimination.h @@ -34,7 +34,7 @@ class BoundsCheckElimination : public HOptimization { side_effects_(side_effects), induction_analysis_(induction_analysis) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kBoundsCheckEliminationPassName = "BCE"; diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index 1523478613..5927d681b2 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -43,7 +43,7 @@ class BoundsCheckEliminationTest : public OptimizingUnitTest { void RunBCE() { graph_->BuildDominatorTree(); - InstructionSimplifier(graph_, /* codegen */ nullptr, /* driver */ nullptr).Run(); + InstructionSimplifier(graph_, /* codegen= */ nullptr).Run(); SideEffectsAnalysis side_effects(graph_); side_effects.Run(); @@ -598,9 +598,10 @@ static HInstruction* BuildSSAGraph3(HGraph* graph, entry->AddSuccessor(block); // We pass a bogus constant for the class to avoid mocking one. HInstruction* new_array = new (allocator) HNewArray( - constant_10, - constant_10, - 0); + /* cls= */ constant_10, + /* length= */ constant_10, + /* dex_pc= */ 0, + /* component_size_shift= */ 0); block->AddInstruction(new_array); block->AddInstruction(new (allocator) HGoto()); @@ -977,7 +978,11 @@ TEST_F(BoundsCheckEliminationTest, ModArrayBoundsElimination) { graph_->AddBlock(block); entry->AddSuccessor(block); // We pass a bogus constant for the class to avoid mocking one. - HInstruction* new_array = new (GetAllocator()) HNewArray(constant_10, constant_10, 0); + HInstruction* new_array = new (GetAllocator()) HNewArray( + /* cls= */ constant_10, + /* length= */ constant_10, + /* dex_pc= */ 0, + /* component_size_shift= */ 0); block->AddInstruction(new_array); block->AddInstruction(new (GetAllocator()) HGoto()); diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index a1a5692ef6..64aa1b9358 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -21,6 +21,7 @@ #include "base/bit_vector-inl.h" #include "base/logging.h" #include "block_builder.h" +#include "code_generator.h" #include "data_type-inl.h" #include "dex/verified_method.h" #include "driver/compiler_options.h" @@ -40,7 +41,6 @@ HGraphBuilder::HGraphBuilder(HGraph* graph, const CodeItemDebugInfoAccessor& accessor, const DexCompilationUnit* dex_compilation_unit, const DexCompilationUnit* outer_compilation_unit, - CompilerDriver* driver, CodeGenerator* code_generator, OptimizingCompilerStats* compiler_stats, ArrayRef<const uint8_t> interpreter_metadata, @@ -50,7 +50,6 @@ HGraphBuilder::HGraphBuilder(HGraph* graph, code_item_accessor_(accessor), dex_compilation_unit_(dex_compilation_unit), outer_compilation_unit_(outer_compilation_unit), - compiler_driver_(driver), code_generator_(code_generator), compilation_stats_(compiler_stats), interpreter_metadata_(interpreter_metadata), @@ -67,19 +66,18 @@ HGraphBuilder::HGraphBuilder(HGraph* graph, code_item_accessor_(accessor), dex_compilation_unit_(dex_compilation_unit), outer_compilation_unit_(nullptr), - compiler_driver_(nullptr), code_generator_(nullptr), compilation_stats_(nullptr), handles_(handles), return_type_(return_type) {} bool HGraphBuilder::SkipCompilation(size_t number_of_branches) { - if (compiler_driver_ == nullptr) { - // Note that the compiler driver is null when unit testing. + if (code_generator_ == nullptr) { + // Note that the codegen is null when unit testing. return false; } - const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions(); + const CompilerOptions& compiler_options = code_generator_->GetCompilerOptions(); CompilerFilter::Filter compiler_filter = compiler_options.GetCompilerFilter(); if (compiler_filter == CompilerFilter::kEverything) { return false; @@ -131,7 +129,6 @@ GraphAnalysisResult HGraphBuilder::BuildGraph() { return_type_, dex_compilation_unit_, outer_compilation_unit_, - compiler_driver_, code_generator_, interpreter_metadata_, compilation_stats_, @@ -203,7 +200,6 @@ void HGraphBuilder::BuildIntrinsicGraph(ArtMethod* method) { return_type_, dex_compilation_unit_, outer_compilation_unit_, - compiler_driver_, code_generator_, interpreter_metadata_, compilation_stats_, diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 5a1914ce08..6152740324 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -22,7 +22,6 @@ #include "dex/code_item_accessors.h" #include "dex/dex_file-inl.h" #include "dex/dex_file.h" -#include "driver/compiler_driver.h" #include "nodes.h" namespace art { @@ -38,7 +37,6 @@ class HGraphBuilder : public ValueObject { const CodeItemDebugInfoAccessor& accessor, const DexCompilationUnit* dex_compilation_unit, const DexCompilationUnit* outer_compilation_unit, - CompilerDriver* driver, CodeGenerator* code_generator, OptimizingCompilerStats* compiler_stats, ArrayRef<const uint8_t> interpreter_metadata, @@ -70,7 +68,6 @@ class HGraphBuilder : public ValueObject { // The compilation unit of the enclosing method being compiled. const DexCompilationUnit* const outer_compilation_unit_; - CompilerDriver* const compiler_driver_; CodeGenerator* const code_generator_; OptimizingCompilerStats* const compilation_stats_; diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc index 3addaeecd9..c6232ef661 100644 --- a/compiler/optimizing/cha_guard_optimization.cc +++ b/compiler/optimizing/cha_guard_optimization.cc @@ -44,9 +44,9 @@ class CHAGuardVisitor : HGraphVisitor { GetGraph()->SetNumberOfCHAGuards(0); } - void VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) OVERRIDE; + void VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) override; - void VisitBasicBlock(HBasicBlock* block) OVERRIDE; + void VisitBasicBlock(HBasicBlock* block) override; private: void RemoveGuard(HShouldDeoptimizeFlag* flag); @@ -241,14 +241,15 @@ void CHAGuardVisitor::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { GetGraph()->IncrementNumberOfCHAGuards(); } -void CHAGuardOptimization::Run() { +bool CHAGuardOptimization::Run() { if (graph_->GetNumberOfCHAGuards() == 0) { - return; + return false; } CHAGuardVisitor visitor(graph_); for (HBasicBlock* block : graph_->GetReversePostOrder()) { visitor.VisitBasicBlock(block); } + return true; } } // namespace art diff --git a/compiler/optimizing/cha_guard_optimization.h b/compiler/optimizing/cha_guard_optimization.h index f14e07bd6c..440d51a969 100644 --- a/compiler/optimizing/cha_guard_optimization.h +++ b/compiler/optimizing/cha_guard_optimization.h @@ -30,7 +30,7 @@ class CHAGuardOptimization : public HOptimization { const char* name = kCHAGuardOptimizationPassName) : HOptimization(graph, name) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kCHAGuardOptimizationPassName = "cha_guard_optimization"; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 6abda9b302..2bbb570c8d 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -49,8 +49,9 @@ #include "dex/bytecode_utils.h" #include "dex/code_item_accessors-inl.h" #include "dex/verified_method.h" -#include "driver/compiler_driver.h" #include "graph_visualizer.h" +#include "image.h" +#include "gc/space/image_space.h" #include "intern_table.h" #include "intrinsics.h" #include "mirror/array-inl.h" @@ -61,15 +62,13 @@ #include "parallel_move_resolver.h" #include "scoped_thread_state_change-inl.h" #include "ssa_liveness_analysis.h" +#include "stack_map.h" #include "stack_map_stream.h" #include "thread-current-inl.h" #include "utils/assembler.h" namespace art { -// If true, we record the static and direct invokes in the invoke infos. -static constexpr bool kEnableDexLayoutOptimizations = false; - // Return whether a location is consistent with a type. static bool CheckType(DataType::Type type, Location location) { if (location.IsFpuRegister() @@ -197,7 +196,7 @@ class CodeGenerator::CodeGenerationData : public DeletableArenaObject<kArenaAllo return GetNumberOfJitStringRoots() + GetNumberOfJitClassRoots(); } - void EmitJitRoots(Handle<mirror::ObjectArray<mirror::Object>> roots) + void EmitJitRoots(/*out*/std::vector<Handle<mirror::Object>>* roots) REQUIRES_SHARED(Locks::mutator_lock_); private: @@ -230,29 +229,31 @@ class CodeGenerator::CodeGenerationData : public DeletableArenaObject<kArenaAllo }; void CodeGenerator::CodeGenerationData::EmitJitRoots( - Handle<mirror::ObjectArray<mirror::Object>> roots) { - DCHECK_EQ(static_cast<size_t>(roots->GetLength()), GetNumberOfJitRoots()); + /*out*/std::vector<Handle<mirror::Object>>* roots) { + DCHECK(roots->empty()); + roots->reserve(GetNumberOfJitRoots()); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); size_t index = 0; for (auto& entry : jit_string_roots_) { // Update the `roots` with the string, and replace the address temporarily // stored to the index in the table. uint64_t address = entry.second; - roots->Set(index, reinterpret_cast<StackReference<mirror::String>*>(address)->AsMirrorPtr()); - DCHECK(roots->Get(index) != nullptr); + roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address)); + DCHECK(roots->back() != nullptr); + DCHECK(roots->back()->IsString()); entry.second = index; // Ensure the string is strongly interned. This is a requirement on how the JIT // handles strings. b/32995596 - class_linker->GetInternTable()->InternStrong( - reinterpret_cast<mirror::String*>(roots->Get(index))); + class_linker->GetInternTable()->InternStrong(roots->back()->AsString()); ++index; } for (auto& entry : jit_class_roots_) { // Update the `roots` with the class, and replace the address temporarily // stored to the index in the table. uint64_t address = entry.second; - roots->Set(index, reinterpret_cast<StackReference<mirror::Class>*>(address)->AsMirrorPtr()); - DCHECK(roots->Get(index) != nullptr); + roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address)); + DCHECK(roots->back() != nullptr); + DCHECK(roots->back()->IsClass()); entry.second = index; ++index; } @@ -390,6 +391,11 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { HGraphVisitor* instruction_visitor = GetInstructionVisitor(); DCHECK_EQ(current_block_index_, 0u); + GetStackMapStream()->BeginMethod(HasEmptyFrame() ? 0 : frame_size_, + core_spill_mask_, + fpu_spill_mask_, + GetGraph()->GetNumberOfVRegs()); + size_t frame_start = GetAssembler()->CodeSize(); GenerateFrameEntry(); DCHECK_EQ(GetAssembler()->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size_)); @@ -407,7 +413,7 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { // This ensures that we have correct native line mapping for all native instructions. // It is necessary to make stepping over a statement work. Otherwise, any initial // instructions (e.g. moves) would be assumed to be the start of next statement. - MaybeRecordNativeDebugInfo(nullptr /* instruction */, block->GetDexPc()); + MaybeRecordNativeDebugInfo(/* instruction= */ nullptr, block->GetDexPc()); for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); if (current->HasEnvironment()) { @@ -432,6 +438,8 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { // Finalize instructions in assember; Finalize(allocator); + + GetStackMapStream()->EndMethod(); } void CodeGenerator::Finalize(CodeAllocator* allocator) { @@ -447,6 +455,18 @@ void CodeGenerator::EmitLinkerPatches( // No linker patches by default. } +bool CodeGenerator::NeedsThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED) const { + // Code generators that create patches requiring thunk compilation should override this function. + return false; +} + +void CodeGenerator::EmitThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED, + /*out*/ ArenaVector<uint8_t>* code ATTRIBUTE_UNUSED, + /*out*/ std::string* debug_name ATTRIBUTE_UNUSED) { + // Code generators that create patches requiring thunk compilation should override this function. + LOG(FATAL) << "Unexpected call to EmitThunkCode()."; +} + void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, size_t maximum_safepoint_spill_size, size_t number_of_out_slots, @@ -501,7 +521,7 @@ void CodeGenerator::CreateCommonInvokeLocationSummary( locations->AddTemp(visitor->GetMethodLocation()); break; } - } else { + } else if (!invoke->IsInvokePolymorphic()) { locations->AddTemp(visitor->GetMethodLocation()); } } @@ -529,6 +549,7 @@ void CodeGenerator::GenerateInvokeStaticOrDirectRuntimeCall( case kVirtual: case kInterface: case kPolymorphic: + case kCustom: LOG(FATAL) << "Unexpected invoke type: " << invoke->GetInvokeType(); UNREACHABLE(); } @@ -557,6 +578,7 @@ void CodeGenerator::GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invok entrypoint = kQuickInvokeInterfaceTrampolineWithAccessCheck; break; case kPolymorphic: + case kCustom: LOG(FATAL) << "Unexpected invoke type: " << invoke->GetInvokeType(); UNREACHABLE(); } @@ -564,11 +586,19 @@ void CodeGenerator::GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invok } void CodeGenerator::GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke) { - MoveConstant(invoke->GetLocations()->GetTemp(0), static_cast<int32_t>(invoke->GetType())); + // invoke-polymorphic does not use a temporary to convey any additional information (e.g. a + // method index) since it requires multiple info from the instruction (registers A, B, H). Not + // using the reservation has no effect on the registers used in the runtime call. QuickEntrypointEnum entrypoint = kQuickInvokePolymorphic; InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), nullptr); } +void CodeGenerator::GenerateInvokeCustomCall(HInvokeCustom* invoke) { + MoveConstant(invoke->GetLocations()->GetTemp(0), invoke->GetCallSiteIndex()); + QuickEntrypointEnum entrypoint = kQuickInvokeCustom; + InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), nullptr); +} + void CodeGenerator::CreateUnresolvedFieldLocationSummary( HInstruction* field_access, DataType::Type field_type, @@ -708,20 +738,99 @@ void CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls, void CodeGenerator::GenerateLoadClassRuntimeCall(HLoadClass* cls) { DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kRuntimeCall); + DCHECK(!cls->MustGenerateClinitCheck()); LocationSummary* locations = cls->GetLocations(); MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_); if (cls->NeedsAccessCheck()) { - CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); - InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc()); - } else if (cls->MustGenerateClinitCheck()) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); - InvokeRuntime(kQuickInitializeStaticStorage, cls, cls->GetDexPc()); + CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>(); + InvokeRuntime(kQuickResolveTypeAndVerifyAccess, cls, cls->GetDexPc()); } else { - CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); - InvokeRuntime(kQuickInitializeType, cls, cls->GetDexPc()); + CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); + InvokeRuntime(kQuickResolveType, cls, cls->GetDexPc()); } } +void CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary( + HLoadMethodHandle* method_handle, + Location runtime_proto_index_location, + Location runtime_return_location) { + DCHECK_EQ(method_handle->InputCount(), 1u); + LocationSummary* locations = + new (method_handle->GetBlock()->GetGraph()->GetAllocator()) LocationSummary( + method_handle, LocationSummary::kCallOnMainOnly); + locations->SetInAt(0, Location::NoLocation()); + locations->AddTemp(runtime_proto_index_location); + locations->SetOut(runtime_return_location); +} + +void CodeGenerator::GenerateLoadMethodHandleRuntimeCall(HLoadMethodHandle* method_handle) { + LocationSummary* locations = method_handle->GetLocations(); + MoveConstant(locations->GetTemp(0), method_handle->GetMethodHandleIndex()); + CheckEntrypointTypes<kQuickResolveMethodHandle, void*, uint32_t>(); + InvokeRuntime(kQuickResolveMethodHandle, method_handle, method_handle->GetDexPc()); +} + +void CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary( + HLoadMethodType* method_type, + Location runtime_proto_index_location, + Location runtime_return_location) { + DCHECK_EQ(method_type->InputCount(), 1u); + LocationSummary* locations = + new (method_type->GetBlock()->GetGraph()->GetAllocator()) LocationSummary( + method_type, LocationSummary::kCallOnMainOnly); + locations->SetInAt(0, Location::NoLocation()); + locations->AddTemp(runtime_proto_index_location); + locations->SetOut(runtime_return_location); +} + +void CodeGenerator::GenerateLoadMethodTypeRuntimeCall(HLoadMethodType* method_type) { + LocationSummary* locations = method_type->GetLocations(); + MoveConstant(locations->GetTemp(0), method_type->GetProtoIndex().index_); + CheckEntrypointTypes<kQuickResolveMethodType, void*, uint32_t>(); + InvokeRuntime(kQuickResolveMethodType, method_type, method_type->GetDexPc()); +} + +static uint32_t GetBootImageOffsetImpl(const void* object, ImageHeader::ImageSections section) { + Runtime* runtime = Runtime::Current(); + DCHECK(runtime->IsAotCompiler()); + const std::vector<gc::space::ImageSpace*>& boot_image_spaces = + runtime->GetHeap()->GetBootImageSpaces(); + // Check that the `object` is in the expected section of one of the boot image files. + DCHECK(std::any_of(boot_image_spaces.begin(), + boot_image_spaces.end(), + [object, section](gc::space::ImageSpace* space) { + uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin()); + uintptr_t offset = reinterpret_cast<uintptr_t>(object) - begin; + return space->GetImageHeader().GetImageSection(section).Contains(offset); + })); + uintptr_t begin = reinterpret_cast<uintptr_t>(boot_image_spaces.front()->Begin()); + uintptr_t offset = reinterpret_cast<uintptr_t>(object) - begin; + return dchecked_integral_cast<uint32_t>(offset); +} + +// NO_THREAD_SAFETY_ANALYSIS: Avoid taking the mutator lock, boot image classes are non-moveable. +uint32_t CodeGenerator::GetBootImageOffset(HLoadClass* load_class) NO_THREAD_SAFETY_ANALYSIS { + DCHECK_EQ(load_class->GetLoadKind(), HLoadClass::LoadKind::kBootImageRelRo); + ObjPtr<mirror::Class> klass = load_class->GetClass().Get(); + DCHECK(klass != nullptr); + return GetBootImageOffsetImpl(klass.Ptr(), ImageHeader::kSectionObjects); +} + +// NO_THREAD_SAFETY_ANALYSIS: Avoid taking the mutator lock, boot image strings are non-moveable. +uint32_t CodeGenerator::GetBootImageOffset(HLoadString* load_string) NO_THREAD_SAFETY_ANALYSIS { + DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kBootImageRelRo); + ObjPtr<mirror::String> string = load_string->GetString().Get(); + DCHECK(string != nullptr); + return GetBootImageOffsetImpl(string.Ptr(), ImageHeader::kSectionObjects); +} + +uint32_t CodeGenerator::GetBootImageOffset(HInvokeStaticOrDirect* invoke) { + DCHECK_EQ(invoke->GetMethodLoadKind(), HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo); + ArtMethod* method = invoke->GetResolvedMethod(); + DCHECK(method != nullptr); + return GetBootImageOffsetImpl(method, ImageHeader::kSectionArtMethods); +} + void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const { // The DCHECKS below check that a register is not specified twice in // the summary. The out location can overlap with an input, so we need @@ -771,53 +880,45 @@ void CodeGenerator::AllocateLocations(HInstruction* instruction) { } std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph, - InstructionSet instruction_set, - const InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) { ArenaAllocator* allocator = graph->GetAllocator(); - switch (instruction_set) { + switch (compiler_options.GetInstructionSet()) { #ifdef ART_ENABLE_CODEGEN_arm case InstructionSet::kArm: case InstructionSet::kThumb2: { return std::unique_ptr<CodeGenerator>( - new (allocator) arm::CodeGeneratorARMVIXL( - graph, *isa_features.AsArmInstructionSetFeatures(), compiler_options, stats)); + new (allocator) arm::CodeGeneratorARMVIXL(graph, compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_arm64 case InstructionSet::kArm64: { return std::unique_ptr<CodeGenerator>( - new (allocator) arm64::CodeGeneratorARM64( - graph, *isa_features.AsArm64InstructionSetFeatures(), compiler_options, stats)); + new (allocator) arm64::CodeGeneratorARM64(graph, compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_mips case InstructionSet::kMips: { return std::unique_ptr<CodeGenerator>( - new (allocator) mips::CodeGeneratorMIPS( - graph, *isa_features.AsMipsInstructionSetFeatures(), compiler_options, stats)); + new (allocator) mips::CodeGeneratorMIPS(graph, compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_mips64 case InstructionSet::kMips64: { return std::unique_ptr<CodeGenerator>( - new (allocator) mips64::CodeGeneratorMIPS64( - graph, *isa_features.AsMips64InstructionSetFeatures(), compiler_options, stats)); + new (allocator) mips64::CodeGeneratorMIPS64(graph, compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_x86 case InstructionSet::kX86: { return std::unique_ptr<CodeGenerator>( - new (allocator) x86::CodeGeneratorX86( - graph, *isa_features.AsX86InstructionSetFeatures(), compiler_options, stats)); + new (allocator) x86::CodeGeneratorX86(graph, compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_x86_64 case InstructionSet::kX86_64: { return std::unique_ptr<CodeGenerator>( - new (allocator) x86_64::CodeGeneratorX86_64( - graph, *isa_features.AsX86_64InstructionSetFeatures(), compiler_options, stats)); + new (allocator) x86_64::CodeGeneratorX86_64(graph, compiler_options, stats)); } #endif default: @@ -861,15 +962,6 @@ CodeGenerator::CodeGenerator(HGraph* graph, CodeGenerator::~CodeGenerator() {} -void CodeGenerator::ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, - size_t* method_info_size) { - DCHECK(stack_map_size != nullptr); - DCHECK(method_info_size != nullptr); - StackMapStream* stack_map_stream = GetStackMapStream(); - *stack_map_size = stack_map_stream->PrepareForFillIn(); - *method_info_size = stack_map_stream->ComputeMethodInfoSize(); -} - size_t CodeGenerator::GetNumberOfJitRoots() const { DCHECK(code_generation_data_ != nullptr); return code_generation_data_->GetNumberOfJitRoots(); @@ -880,11 +972,10 @@ static void CheckCovers(uint32_t dex_pc, const CodeInfo& code_info, const ArenaVector<HSuspendCheck*>& loop_headers, ArenaVector<size_t>* covered) { - CodeInfoEncoding encoding = code_info.ExtractEncoding(); for (size_t i = 0; i < loop_headers.size(); ++i) { if (loop_headers[i]->GetDexPc() == dex_pc) { if (graph.IsCompilingOsr()) { - DCHECK(code_info.GetOsrStackMapForDexPc(dex_pc, encoding).IsValid()); + DCHECK(code_info.GetOsrStackMapForDexPc(dex_pc).IsValid()); } ++(*covered)[i]; } @@ -895,7 +986,7 @@ static void CheckCovers(uint32_t dex_pc, // dex branch instructions. static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, const CodeInfo& code_info, - const DexFile::CodeItem& code_item) { + const dex::CodeItem& code_item) { if (graph.HasTryCatch()) { // One can write loops through try/catch, which we do not support for OSR anyway. return; @@ -937,20 +1028,18 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, } } -void CodeGenerator::BuildStackMaps(MemoryRegion stack_map_region, - MemoryRegion method_info_region, - const DexFile::CodeItem* code_item_for_osr_check) { - StackMapStream* stack_map_stream = GetStackMapStream(); - stack_map_stream->FillInCodeInfo(stack_map_region); - stack_map_stream->FillInMethodInfo(method_info_region); - if (kIsDebugBuild && code_item_for_osr_check != nullptr) { - CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(stack_map_region), *code_item_for_osr_check); +ScopedArenaVector<uint8_t> CodeGenerator::BuildStackMaps(const dex::CodeItem* code_item) { + ScopedArenaVector<uint8_t> stack_map = GetStackMapStream()->Encode(); + if (kIsDebugBuild && code_item != nullptr) { + CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(stack_map.data()), *code_item); } + return stack_map; } void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path) { + SlowPathCode* slow_path, + bool native_debug_info) { if (instruction != nullptr) { // The code generated for some type conversions // may call the runtime, thus normally requiring a subsequent @@ -981,7 +1070,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, if (instruction == nullptr) { // For stack overflow checks and native-debug-info entries without dex register // mapping (i.e. start of basic block or start of slow path). - stack_map_stream->BeginStackMapEntry(dex_pc, native_pc, 0, 0, 0, 0); + stack_map_stream->BeginStackMapEntry(dex_pc, native_pc); stack_map_stream->EndStackMapEntry(); return; } @@ -995,7 +1084,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, // call). Therefore register_mask contains both callee-save and caller-save // registers that hold objects. We must remove the spilled caller-save from the // mask, since they will be overwritten by the callee. - uint32_t spills = GetSlowPathSpills(locations, /* core_registers */ true); + uint32_t spills = GetSlowPathSpills(locations, /* core_registers= */ true); register_mask &= ~spills; } else { // The register mask must be a subset of callee-save registers. @@ -1015,37 +1104,28 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, outer_dex_pc = outer_environment->GetDexPc(); outer_environment_size = outer_environment->Size(); } + + HLoopInformation* info = instruction->GetBlock()->GetLoopInformation(); + bool osr = + instruction->IsSuspendCheck() && + (info != nullptr) && + graph_->IsCompilingOsr() && + (inlining_depth == 0); + StackMap::Kind kind = native_debug_info + ? StackMap::Kind::Debug + : (osr ? StackMap::Kind::OSR : StackMap::Kind::Default); stack_map_stream->BeginStackMapEntry(outer_dex_pc, native_pc, register_mask, locations->GetStackMask(), - outer_environment_size, - inlining_depth); + kind); EmitEnvironment(environment, slow_path); - // Record invoke info, the common case for the trampoline is super and static invokes. Only - // record these to reduce oat file size. - if (kEnableDexLayoutOptimizations) { - if (instruction->IsInvokeStaticOrDirect()) { - HInvoke* const invoke = instruction->AsInvokeStaticOrDirect(); - DCHECK(environment != nullptr); - stack_map_stream->AddInvoke(invoke->GetInvokeType(), invoke->GetDexMethodIndex()); - } - } stack_map_stream->EndStackMapEntry(); - HLoopInformation* info = instruction->GetBlock()->GetLoopInformation(); - if (instruction->IsSuspendCheck() && - (info != nullptr) && - graph_->IsCompilingOsr() && - (inlining_depth == 0)) { + if (osr) { DCHECK_EQ(info->GetSuspendCheck(), instruction); - // We duplicate the stack map as a marker that this stack map can be an OSR entry. - // Duplicating it avoids having the runtime recognize and skip an OSR stack map. DCHECK(info->IsIrreducible()); - stack_map_stream->BeginStackMapEntry( - dex_pc, native_pc, register_mask, locations->GetStackMask(), outer_environment_size, 0); - EmitEnvironment(instruction->GetEnvironment(), slow_path); - stack_map_stream->EndStackMapEntry(); + DCHECK(environment != nullptr); if (kIsDebugBuild) { for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) { HInstruction* in_environment = environment->GetInstructionAt(i); @@ -1062,14 +1142,6 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } } } - } else if (kIsDebugBuild) { - // Ensure stack maps are unique, by checking that the native pc in the stack map - // last emitted is different than the native pc of the stack map just emitted. - size_t number_of_stack_maps = stack_map_stream->GetNumberOfStackMaps(); - if (number_of_stack_maps > 1) { - DCHECK_NE(stack_map_stream->GetStackMap(number_of_stack_maps - 1).native_pc_code_offset, - stack_map_stream->GetStackMap(number_of_stack_maps - 2).native_pc_code_offset); - } } } @@ -1080,8 +1152,7 @@ bool CodeGenerator::HasStackMapAtCurrentPc() { if (count == 0) { return false; } - CodeOffset native_pc_offset = stack_map_stream->GetStackMap(count - 1).native_pc_code_offset; - return (native_pc_offset.Uint32Value(GetInstructionSet()) == pc); + return stack_map_stream->GetStackMapNativePcOffset(count - 1) == pc; } void CodeGenerator::MaybeRecordNativeDebugInfo(HInstruction* instruction, @@ -1092,12 +1163,11 @@ void CodeGenerator::MaybeRecordNativeDebugInfo(HInstruction* instruction, // Ensure that we do not collide with the stack map of the previous instruction. GenerateNop(); } - RecordPcInfo(instruction, dex_pc, slow_path); + RecordPcInfo(instruction, dex_pc, slow_path, /* native_debug_info= */ true); } } void CodeGenerator::RecordCatchBlockInfo() { - ArenaAllocator* allocator = graph_->GetAllocator(); StackMapStream* stack_map_stream = GetStackMapStream(); for (HBasicBlock* block : *block_order_) { @@ -1107,30 +1177,23 @@ void CodeGenerator::RecordCatchBlockInfo() { uint32_t dex_pc = block->GetDexPc(); uint32_t num_vregs = graph_->GetNumberOfVRegs(); - uint32_t inlining_depth = 0; // Inlining of catch blocks is not supported at the moment. uint32_t native_pc = GetAddressOf(block); - uint32_t register_mask = 0; // Not used. - - // The stack mask is not used, so we leave it empty. - ArenaBitVector* stack_mask = - ArenaBitVector::Create(allocator, 0, /* expandable */ true, kArenaAllocCodeGenerator); stack_map_stream->BeginStackMapEntry(dex_pc, native_pc, - register_mask, - stack_mask, - num_vregs, - inlining_depth); + /* register_mask= */ 0, + /* sp_mask= */ nullptr, + StackMap::Kind::Catch); HInstruction* current_phi = block->GetFirstPhi(); for (size_t vreg = 0; vreg < num_vregs; ++vreg) { - while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) { - HInstruction* next_phi = current_phi->GetNext(); - DCHECK(next_phi == nullptr || - current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber()) - << "Phis need to be sorted by vreg number to keep this a linear-time loop."; - current_phi = next_phi; - } + while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) { + HInstruction* next_phi = current_phi->GetNext(); + DCHECK(next_phi == nullptr || + current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber()) + << "Phis need to be sorted by vreg number to keep this a linear-time loop."; + current_phi = next_phi; + } if (current_phi == nullptr || current_phi->AsPhi()->GetRegNumber() != vreg) { stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); @@ -1190,50 +1253,45 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo continue; } + using Kind = DexRegisterLocation::Kind; Location location = environment->GetLocationAt(i); switch (location.GetKind()) { case Location::kConstant: { DCHECK_EQ(current, location.GetConstant()); if (current->IsLongConstant()) { int64_t value = current->AsLongConstant()->GetValue(); - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kConstant, Low32Bits(value)); - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kConstant, High32Bits(value)); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, Low32Bits(value)); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, High32Bits(value)); ++i; DCHECK_LT(i, environment_size); } else if (current->IsDoubleConstant()) { int64_t value = bit_cast<int64_t, double>(current->AsDoubleConstant()->GetValue()); - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kConstant, Low32Bits(value)); - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kConstant, High32Bits(value)); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, Low32Bits(value)); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, High32Bits(value)); ++i; DCHECK_LT(i, environment_size); } else if (current->IsIntConstant()) { int32_t value = current->AsIntConstant()->GetValue(); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, value); } else if (current->IsNullConstant()) { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, 0); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, 0); } else { DCHECK(current->IsFloatConstant()) << current->DebugName(); int32_t value = bit_cast<int32_t, float>(current->AsFloatConstant()->GetValue()); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, value); } break; } case Location::kStackSlot: { - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, location.GetStackIndex()); break; } case Location::kDoubleStackSlot: { + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, location.GetStackIndex()); stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize)); + Kind::kInStack, location.GetHighStackIndex(kVRegSize)); ++i; DCHECK_LT(i, environment_size); break; @@ -1243,17 +1301,16 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo int id = location.reg(); if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(id)) { uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(id); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); if (current->GetType() == DataType::Type::kInt64) { - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, offset + kVRegSize); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset + kVRegSize); ++i; DCHECK_LT(i, environment_size); } } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, id); + stack_map_stream->AddDexRegisterEntry(Kind::kInRegister, id); if (current->GetType() == DataType::Type::kInt64) { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegisterHigh, id); + stack_map_stream->AddDexRegisterEntry(Kind::kInRegisterHigh, id); ++i; DCHECK_LT(i, environment_size); } @@ -1265,18 +1322,16 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo int id = location.reg(); if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(id)) { uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(id); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); if (current->GetType() == DataType::Type::kFloat64) { - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, offset + kVRegSize); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset + kVRegSize); ++i; DCHECK_LT(i, environment_size); } } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, id); + stack_map_stream->AddDexRegisterEntry(Kind::kInFpuRegister, id); if (current->GetType() == DataType::Type::kFloat64) { - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInFpuRegisterHigh, id); + stack_map_stream->AddDexRegisterEntry(Kind::kInFpuRegisterHigh, id); ++i; DCHECK_LT(i, environment_size); } @@ -1289,16 +1344,16 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo int high = location.high(); if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(low)) { uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(low); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, low); + stack_map_stream->AddDexRegisterEntry(Kind::kInFpuRegister, low); } if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(high)) { uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(high); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); ++i; } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, high); + stack_map_stream->AddDexRegisterEntry(Kind::kInFpuRegister, high); ++i; } DCHECK_LT(i, environment_size); @@ -1310,15 +1365,15 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo int high = location.high(); if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(low)) { uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(low); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, low); + stack_map_stream->AddDexRegisterEntry(Kind::kInRegister, low); } if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(high)) { uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(high); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, high); + stack_map_stream->AddDexRegisterEntry(Kind::kInRegister, high); } ++i; DCHECK_LT(i, environment_size); @@ -1326,7 +1381,7 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo } case Location::kInvalid: { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); + stack_map_stream->AddDexRegisterEntry(Kind::kNone, 0); break; } @@ -1341,37 +1396,12 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo } bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) { - HInstruction* first_next_not_move = null_check->GetNextDisregardingMoves(); - - return (first_next_not_move != nullptr) - && first_next_not_move->CanDoImplicitNullCheckOn(null_check->InputAt(0)); + return null_check->IsEmittedAtUseSite(); } void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) { - if (!compiler_options_.GetImplicitNullChecks()) { - return; - } - - // If we are from a static path don't record the pc as we can't throw NPE. - // NB: having the checks here makes the code much less verbose in the arch - // specific code generators. - if (instr->IsStaticFieldSet() || instr->IsStaticFieldGet()) { - return; - } - - if (!instr->CanDoImplicitNullCheckOn(instr->InputAt(0))) { - return; - } - - // Find the first previous instruction which is not a move. - HInstruction* first_prev_not_move = instr->GetPreviousDisregardingMoves(); - - // If the instruction is a null check it means that `instr` is the first user - // and needs to record the pc. - if (first_prev_not_move != nullptr && first_prev_not_move->IsNullCheck()) { - HNullCheck* null_check = first_prev_not_move->AsNullCheck(); - // TODO: The parallel moves modify the environment. Their changes need to be - // reverted otherwise the stack maps at the throw point will not be correct. + HNullCheck* null_check = instr->GetImplicitNullCheck(); + if (null_check != nullptr) { RecordPcInfo(null_check, null_check->GetDexPc()); } } @@ -1461,7 +1491,12 @@ void CodeGenerator::ValidateInvokeRuntime(QuickEntrypointEnum entrypoint, << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString(); } else { - DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) || + // 'CanTriggerGC' side effect is used to restrict optimization of instructions which depend + // on GC (e.g. IntermediateAddress) - to ensure they are not alive across GC points. However + // if execution never returns to the compiled code from a GC point this restriction is + // unnecessary - in particular for fatal slow paths which might trigger GC. + DCHECK((slow_path->IsFatal() && !instruction->GetLocations()->WillCall()) || + instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) || // When (non-Baker) read barriers are enabled, some instructions // use a slow path to emit a read barrier, which does not trigger // GC. @@ -1519,7 +1554,7 @@ void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* in void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); - const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); for (uint32_t i : LowToHighBits(core_spills)) { // If the register holds an object, update the stack mask. if (locations->RegisterContainsObject(i)) { @@ -1531,7 +1566,7 @@ void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* lo stack_offset += codegen->SaveCoreRegister(stack_offset, i); } - const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); for (uint32_t i : LowToHighBits(fp_spills)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); @@ -1543,14 +1578,14 @@ void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* lo void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); - const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); for (uint32_t i : LowToHighBits(core_spills)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); stack_offset += codegen->RestoreCoreRegister(stack_offset, i); } - const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); for (uint32_t i : LowToHighBits(fp_spills)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); @@ -1612,28 +1647,21 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { } void CodeGenerator::EmitJitRoots(uint8_t* code, - Handle<mirror::ObjectArray<mirror::Object>> roots, - const uint8_t* roots_data) { + const uint8_t* roots_data, + /*out*/std::vector<Handle<mirror::Object>>* roots) { code_generation_data_->EmitJitRoots(roots); EmitJitRootPatches(code, roots_data); } -QuickEntrypointEnum CodeGenerator::GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass) { - ScopedObjectAccess soa(Thread::Current()); - if (array_klass == nullptr) { - // This can only happen for non-primitive arrays, as primitive arrays can always - // be resolved. - return kQuickAllocArrayResolved32; - } - - switch (array_klass->GetComponentSize()) { - case 1: return kQuickAllocArrayResolved8; - case 2: return kQuickAllocArrayResolved16; - case 4: return kQuickAllocArrayResolved32; - case 8: return kQuickAllocArrayResolved64; +QuickEntrypointEnum CodeGenerator::GetArrayAllocationEntrypoint(HNewArray* new_array) { + switch (new_array->GetComponentSizeShift()) { + case 0: return kQuickAllocArrayResolved8; + case 1: return kQuickAllocArrayResolved16; + case 2: return kQuickAllocArrayResolved32; + case 3: return kQuickAllocArrayResolved64; } LOG(FATAL) << "Unreachable"; - return kQuickAllocArrayResolved; + UNREACHABLE(); } } // namespace art diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index f784a1a857..f70ecb612d 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -21,20 +21,20 @@ #include "arch/instruction_set_features.h" #include "base/arena_containers.h" #include "base/arena_object.h" +#include "base/array_ref.h" #include "base/bit_field.h" #include "base/bit_utils.h" #include "base/enums.h" +#include "base/globals.h" +#include "base/memory_region.h" #include "dex/string_reference.h" #include "dex/type_reference.h" -#include "globals.h" #include "graph_visualizer.h" #include "locations.h" -#include "memory_region.h" #include "nodes.h" #include "optimizing_compiler_stats.h" #include "read_barrier_option.h" #include "stack.h" -#include "stack_map.h" #include "utils/label.h" namespace art { @@ -59,7 +59,6 @@ static constexpr ReadBarrierOption kCompilerReadBarrierOption = class Assembler; class CodeGenerator; -class CompilerDriver; class CompilerOptions; class StackMapStream; class ParallelMoveResolver; @@ -74,6 +73,7 @@ class CodeAllocator { virtual ~CodeAllocator() {} virtual uint8_t* Allocate(size_t size) = 0; + virtual ArrayRef<const uint8_t> GetMemory() const = 0; private: DISALLOW_COPY_AND_ASSIGN(CodeAllocator); @@ -187,8 +187,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // Compiles the graph to executable instructions. void Compile(CodeAllocator* allocator); static std::unique_ptr<CodeGenerator> Create(HGraph* graph, - InstructionSet instruction_set, - const InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGenerator(); @@ -210,6 +208,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { virtual void Initialize() = 0; virtual void Finalize(CodeAllocator* allocator); virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches); + virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const; + virtual void EmitThunkCode(const linker::LinkerPatch& patch, + /*out*/ ArenaVector<uint8_t>* code, + /*out*/ std::string* debug_name); virtual void GenerateFrameEntry() = 0; virtual void GenerateFrameExit() = 0; virtual void Bind(HBasicBlock* block) = 0; @@ -318,7 +320,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { } // Record native to dex mapping for a suspend point. Required by runtime. - void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); + void RecordPcInfo(HInstruction* instruction, + uint32_t dex_pc, + SlowPathCode* slow_path = nullptr, + bool native_debug_info = false); // Check whether we have already recorded mapping at this PC. bool HasStackMapAtCurrentPc(); // Record extra stack maps if we support native debugging. @@ -344,17 +349,14 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { void AddSlowPath(SlowPathCode* slow_path); - void BuildStackMaps(MemoryRegion stack_map_region, - MemoryRegion method_info_region, - const DexFile::CodeItem* code_item_for_osr_check); - void ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, size_t* method_info_size); + ScopedArenaVector<uint8_t> BuildStackMaps(const dex::CodeItem* code_item_for_osr_check); size_t GetNumberOfJitRoots() const; // Fills the `literals` array with literals collected during code generation. // Also emits literal patches. void EmitJitRoots(uint8_t* code, - Handle<mirror::ObjectArray<mirror::Object>> roots, - const uint8_t* roots_data) + const uint8_t* roots_data, + /*out*/std::vector<Handle<mirror::Object>>* roots) REQUIRES_SHARED(Locks::mutator_lock_); bool IsLeafMethod() const { @@ -438,6 +440,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: return false; + case TypeCheckKind::kBitstringCheck: + return true; } LOG(FATAL) << "Unreachable"; UNREACHABLE(); @@ -535,10 +539,13 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { void GenerateInvokeStaticOrDirectRuntimeCall( HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path); + void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke); void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke); + void GenerateInvokeCustomCall(HInvokeCustom* invoke); + void CreateUnresolvedFieldLocationSummary( HInstruction* field_access, DataType::Type field_type, @@ -556,6 +563,20 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { Location runtime_return_location); void GenerateLoadClassRuntimeCall(HLoadClass* cls); + static void CreateLoadMethodHandleRuntimeCallLocationSummary(HLoadMethodHandle* method_handle, + Location runtime_handle_index_location, + Location runtime_return_location); + void GenerateLoadMethodHandleRuntimeCall(HLoadMethodHandle* method_handle); + + static void CreateLoadMethodTypeRuntimeCallLocationSummary(HLoadMethodType* method_type, + Location runtime_type_index_location, + Location runtime_return_location); + void GenerateLoadMethodTypeRuntimeCall(HLoadMethodType* method_type); + + uint32_t GetBootImageOffset(HLoadClass* load_class); + uint32_t GetBootImageOffset(HLoadString* load_string); + uint32_t GetBootImageOffset(HInvokeStaticOrDirect* invoke); + static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke); void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } @@ -600,7 +621,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // otherwise return a fall-back info that should be used instead. virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) = 0; + ArtMethod* method) = 0; // Generate a call to a static or direct method. virtual void GenerateStaticOrDirectCall( @@ -614,7 +635,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { virtual void GenerateNop() = 0; - static QuickEntrypointEnum GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass); + static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array); protected: // Patch info used for recording locations of required linker patches and their targets, diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 60f8f98757..3086882678 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -27,10 +27,10 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_arm64.h" -#include "linker/arm64/relative_patcher_arm64.h" #include "linker/linker_patch.h" #include "lock_word.h" #include "mirror/array-inl.h" @@ -64,12 +64,11 @@ using helpers::DRegisterFrom; using helpers::FPRegisterFrom; using helpers::HeapOperand; using helpers::HeapOperandFrom; -using helpers::InputCPURegisterAt; using helpers::InputCPURegisterOrZeroRegAt; using helpers::InputFPRegisterAt; using helpers::InputOperandAt; using helpers::InputRegisterAt; -using helpers::Int64ConstantFrom; +using helpers::Int64FromLocation; using helpers::IsConstantZeroBitPattern; using helpers::LocationFrom; using helpers::OperandFromMemOperand; @@ -90,25 +89,10 @@ static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions. -// For the Baker read barrier implementation using link-generated thunks we need to split +// For the Baker read barrier implementation using link-time generated thunks we need to split // the offset explicitly. constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB; -// Flags controlling the use of link-time generated thunks for Baker read barriers. -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; - -// Some instructions have special requirements for a temporary, for example -// LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require -// temp that's not an R0 (to avoid an extra move) and Baker read barrier field -// loads with large offsets need a fixed register to limit the number of link-time -// thunks we generate. For these and similar cases, we want to reserve a specific -// register that's neither callee-save nor an argument register. We choose x15. -inline Location FixedTempLocation() { - return Location::RegisterLocation(x15.GetCode()); -} - inline Condition ARM64Condition(IfCondition cond) { switch (cond) { case kCondEQ: return eq; @@ -165,6 +149,16 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return return ARM64ReturnLocation(return_type); } +static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); + DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), + RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference), + DataType::Type::kReference).GetCode()); + return caller_saves; +} + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value() @@ -174,8 +168,8 @@ static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen, LocationSummary* locations, int64_t spill_offset, bool is_save) { - const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); - const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); + const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills, codegen->GetNumberOfCoreRegisters(), fp_spills, @@ -218,7 +212,7 @@ static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen, void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); - const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); for (uint32_t i : LowToHighBits(core_spills)) { // If the register holds an object, update the stack mask. if (locations->RegisterContainsObject(i)) { @@ -230,7 +224,7 @@ void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummar stack_offset += kXRegSizeInBytes; } - const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); for (uint32_t i : LowToHighBits(fp_spills)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); @@ -240,20 +234,20 @@ void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummar SaveRestoreLiveRegistersHelper(codegen, locations, - codegen->GetFirstRegisterSlotInSlowPath(), true /* is_save */); + codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ true); } void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { SaveRestoreLiveRegistersHelper(codegen, locations, - codegen->GetFirstRegisterSlotInSlowPath(), false /* is_save */); + codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ false); } class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { public: explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); @@ -279,9 +273,9 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; } + const char* GetDescription() const override { return "BoundsCheckSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64); @@ -291,16 +285,16 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { public: explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; } + const char* GetDescription() const override { return "DivZeroCheckSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64); @@ -308,35 +302,41 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { public: - LoadClassSlowPathARM64(HLoadClass* cls, - HInstruction* at, - uint32_t dex_pc, - bool do_clinit) - : SlowPathCodeARM64(at), - cls_(cls), - dex_pc_(dex_pc), - do_clinit_(do_clinit) { + LoadClassSlowPathARM64(HLoadClass* cls, HInstruction* at) + : SlowPathCodeARM64(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Location out = locations->Out(); - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - dex::TypeIndex type_index = cls_->GetTypeIndex(); - __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_); - QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage - : kQuickInitializeType; - arm64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), arm64_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_); + arm64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. } else { - CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), + source, + cls_->GetType()); + } + if (must_do_clinit) { + arm64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } // Move the class to the desired location. @@ -349,18 +349,12 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARM64"; } + const char* GetDescription() const override { return "LoadClassSlowPathARM64"; } private: // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64); }; @@ -369,7 +363,7 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { explicit LoadStringSlowPathARM64(HLoadString* instruction) : SlowPathCodeARM64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); @@ -390,7 +384,7 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; } + const char* GetDescription() const override { return "LoadStringSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64); @@ -400,7 +394,7 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { public: explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { @@ -414,9 +408,9 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; } + const char* GetDescription() const override { return "NullCheckSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64); @@ -427,7 +421,7 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor) : SlowPathCodeARM64(instruction), successor_(successor) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); @@ -451,7 +445,7 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { return successor_; } - const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; } + const char* GetDescription() const override { return "SuspendCheckSlowPathARM64"; } private: // If not null, the block to branch to after the suspend check. @@ -468,7 +462,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal) : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DCHECK(instruction_->IsCheckCast() @@ -509,8 +503,8 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { } } - const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM64"; } - bool IsFatal() const OVERRIDE { return is_fatal_; } + const char* GetDescription() const override { return "TypeCheckSlowPathARM64"; } + bool IsFatal() const override { return is_fatal_; } private: const bool is_fatal_; @@ -523,7 +517,7 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction) : SlowPathCodeARM64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); LocationSummary* locations = instruction_->GetLocations(); @@ -535,7 +529,7 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } - const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; } + const char* GetDescription() const override { return "DeoptimizationSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64); @@ -545,7 +539,7 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { public: explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -576,7 +570,7 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; } + const char* GetDescription() const override { return "ArraySetSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64); @@ -605,503 +599,6 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { } } -// Abstract base class for read barrier slow paths marking a reference -// `ref`. -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 { - protected: - ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint) - : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; } - - // Generate assembly code calling the read barrier marking runtime - // entry point (ReadBarrierMarkRegX). - void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) { - // No need to save live registers; it's taken care of by the - // entrypoint. Also, there is no need to update the stack mask, - // as this runtime call will not trigger a garbage collection. - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - DCHECK_NE(ref_.reg(), LR); - DCHECK_NE(ref_.reg(), WSP); - DCHECK_NE(ref_.reg(), WZR); - // IP0 is used internally by the ReadBarrierMarkRegX entry point - // as a temporary, it cannot be the entry point's input/output. - DCHECK_NE(ref_.reg(), IP0); - DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg(); - // "Compact" slow path, saving two moves. - // - // Instead of using the standard runtime calling convention (input - // and output in W0): - // - // W0 <- ref - // W0 <- ReadBarrierMark(W0) - // ref <- W0 - // - // we just use rX (the register containing `ref`) as input and output - // of a dedicated entrypoint: - // - // rX <- ReadBarrierMarkRegX(rX) - // - if (entrypoint_.IsValid()) { - arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); - __ Blr(XRegisterFrom(entrypoint_)); - } else { - // Entrypoint is not already loaded, load from the thread. - int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); - // This runtime call does not require a stack map. - arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); - } - } - - // The location (register) of the marked object reference. - const Location ref_; - - // The location of the entrypoint if it is already loaded. - const Location entrypoint_; - - private: - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64); -}; - -// Slow path marking an object reference `ref` during a read -// barrier. The field `obj.field` in the object `obj` holding this -// reference does not get updated by this slow path after marking. -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 { - public: - ReadBarrierMarkSlowPathARM64(HInstruction* instruction, - Location ref, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - DCHECK(locations->CanCall()); - DCHECK(ref_.IsRegister()) << ref_; - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - - __ Bind(GetEntryLabel()); - GenerateReadBarrierMarkRuntimeCall(codegen); - __ B(GetExitLabel()); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64); -}; - -// Slow path loading `obj`'s lock word, loading a reference from -// object `*(obj + offset + (index << scale_factor))` into `ref`, and -// marking `ref` if `obj` is gray according to the lock word (Baker -// read barrier). The field `obj.field` in the object `obj` holding -// this reference does not get updated by this slow path after marking -// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 -// below for that). -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 { - public: - LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - bool needs_null_check, - bool use_load_acquire, - Register temp, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint), - obj_(obj), - offset_(offset), - index_(index), - scale_factor_(scale_factor), - needs_null_check_(needs_null_check), - use_load_acquire_(use_load_acquire), - temp_(temp) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { - return "LoadReferenceWithBakerReadBarrierSlowPathARM64"; - } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - DCHECK(locations->CanCall()); - DCHECK(ref_.IsRegister()) << ref_; - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); - DCHECK(obj_.IsW()); - DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg()); - DCHECK(instruction_->IsInstanceFieldGet() || - instruction_->IsStaticFieldGet() || - instruction_->IsArrayGet() || - instruction_->IsArraySet() || - instruction_->IsInstanceOf() || - instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || - (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - // The read barrier instrumentation of object ArrayGet - // instructions does not support the HIntermediateAddress - // instruction. - DCHECK(!(instruction_->IsArrayGet() && - instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); - - // Temporary register `temp_`, used to store the lock word, must - // not be IP0 nor IP1, as we may use them to emit the reference - // load (in the call to GenerateRawReferenceLoad below), and we - // need the lock word to still be in `temp_` after the reference - // load. - DCHECK_NE(LocationFrom(temp_).reg(), IP0); - DCHECK_NE(LocationFrom(temp_).reg(), IP1); - - __ Bind(GetEntryLabel()); - - // When using MaybeGenerateReadBarrierSlow, the read barrier call is - // inserted after the original load. However, in fast path based - // Baker's read barriers, we need to perform the load of - // mirror::Object::monitor_ *before* the original reference load. - // This load-load ordering is required by the read barrier. - // The slow path (for Baker's algorithm) should look like: - // - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } - // - // Note: the original implementation in ReadBarrier::Barrier is - // slightly more complex as it performs additional checks that we do - // not do here for performance reasons. - - // /* int32_t */ monitor = obj->monitor_ - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - __ Ldr(temp_, HeapOperand(obj_, monitor_offset)); - if (needs_null_check_) { - codegen->MaybeRecordImplicitNullCheck(instruction_); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including rb_state, - // to prevent load-load reordering, and without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp`. - __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32)); - - // The actual reference load. - // A possible implicit null check has already been handled above. - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - arm64_codegen->GenerateRawReferenceLoad(instruction_, - ref_, - obj_, - offset_, - index_, - scale_factor_, - /* needs_null_check */ false, - use_load_acquire_); - - // Mark the object `ref` when `obj` is gray. - // - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel()); - GenerateReadBarrierMarkRuntimeCall(codegen); - - __ B(GetExitLabel()); - } - - private: - // The register containing the object holding the marked object reference field. - Register obj_; - // The offset, index and scale factor to access the reference in `obj_`. - uint32_t offset_; - Location index_; - size_t scale_factor_; - // Is a null check required? - bool needs_null_check_; - // Should this reference load use Load-Acquire semantics? - bool use_load_acquire_; - // A temporary register used to hold the lock word of `obj_`. - Register temp_; - - DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64); -}; - -// Slow path loading `obj`'s lock word, loading a reference from -// object `*(obj + offset + (index << scale_factor))` into `ref`, and -// marking `ref` if `obj` is gray according to the lock word (Baker -// read barrier). If needed, this slow path also atomically updates -// the field `obj.field` in the object `obj` holding this reference -// after marking (contrary to -// LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never -// tries to update `obj.field`). -// -// This means that after the execution of this slow path, both `ref` -// and `obj.field` will be up-to-date; i.e., after the flip, both will -// hold the same to-space reference (unless another thread installed -// another object reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 - : public ReadBarrierMarkSlowPathBaseARM64 { - public: - LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( - HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - bool needs_null_check, - bool use_load_acquire, - Register temp, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint), - obj_(obj), - offset_(offset), - index_(index), - scale_factor_(scale_factor), - needs_null_check_(needs_null_check), - use_load_acquire_(use_load_acquire), - temp_(temp) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { - return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64"; - } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - Register ref_reg = WRegisterFrom(ref_); - DCHECK(locations->CanCall()); - DCHECK(ref_.IsRegister()) << ref_; - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); - DCHECK(obj_.IsW()); - DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg()); - - // This slow path is only used by the UnsafeCASObject intrinsic at the moment. - DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier marking and field updating slow path: " - << instruction_->DebugName(); - DCHECK(instruction_->GetLocations()->Intrinsified()); - DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); - DCHECK_EQ(offset_, 0u); - DCHECK_EQ(scale_factor_, 0u); - DCHECK_EQ(use_load_acquire_, false); - // The location of the offset of the marked reference field within `obj_`. - Location field_offset = index_; - DCHECK(field_offset.IsRegister()) << field_offset; - - // Temporary register `temp_`, used to store the lock word, must - // not be IP0 nor IP1, as we may use them to emit the reference - // load (in the call to GenerateRawReferenceLoad below), and we - // need the lock word to still be in `temp_` after the reference - // load. - DCHECK_NE(LocationFrom(temp_).reg(), IP0); - DCHECK_NE(LocationFrom(temp_).reg(), IP1); - - __ Bind(GetEntryLabel()); - - // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's: - // - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // old_ref = ref; - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // compareAndSwapObject(obj, field_offset, old_ref, ref); - // } - - // /* int32_t */ monitor = obj->monitor_ - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - __ Ldr(temp_, HeapOperand(obj_, monitor_offset)); - if (needs_null_check_) { - codegen->MaybeRecordImplicitNullCheck(instruction_); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including rb_state, - // to prevent load-load reordering, and without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp`. - __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32)); - - // The actual reference load. - // A possible implicit null check has already been handled above. - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - arm64_codegen->GenerateRawReferenceLoad(instruction_, - ref_, - obj_, - offset_, - index_, - scale_factor_, - /* needs_null_check */ false, - use_load_acquire_); - - // Mark the object `ref` when `obj` is gray. - // - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel()); - - // Save the old value of the reference before marking it. - // Note that we cannot use IP to save the old reference, as IP is - // used internally by the ReadBarrierMarkRegX entry point, and we - // need the old reference after the call to that entry point. - DCHECK_NE(LocationFrom(temp_).reg(), IP0); - __ Mov(temp_.W(), ref_reg); - - GenerateReadBarrierMarkRuntimeCall(codegen); - - // If the new reference is different from the old reference, - // update the field in the holder (`*(obj_ + field_offset)`). - // - // Note that this field could also hold a different object, if - // another thread had concurrently changed it. In that case, the - // LDXR/CMP/BNE sequence of instructions in the compare-and-set - // (CAS) operation below would abort the CAS, leaving the field - // as-is. - __ Cmp(temp_.W(), ref_reg); - __ B(eq, GetExitLabel()); - - // Update the the holder's field atomically. This may fail if - // mutator updates before us, but it's OK. This is achieved - // using a strong compare-and-set (CAS) operation with relaxed - // memory synchronization ordering, where the expected value is - // the old reference and the desired value is the new reference. - - MacroAssembler* masm = arm64_codegen->GetVIXLAssembler(); - UseScratchRegisterScope temps(masm); - - // Convenience aliases. - Register base = obj_.W(); - Register offset = XRegisterFrom(field_offset); - Register expected = temp_.W(); - Register value = ref_reg; - Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory. - Register tmp_value = temps.AcquireW(); // Value in memory. - - __ Add(tmp_ptr, base.X(), Operand(offset)); - - if (kPoisonHeapReferences) { - arm64_codegen->GetAssembler()->PoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not poison `value`, as it is the same register as - // `expected`, which has just been poisoned. - } else { - arm64_codegen->GetAssembler()->PoisonHeapReference(value); - } - } - - // do { - // tmp_value = [tmp_ptr] - expected; - // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); - - vixl::aarch64::Label loop_head, comparison_failed, exit_loop; - __ Bind(&loop_head); - __ Ldxr(tmp_value, MemOperand(tmp_ptr)); - __ Cmp(tmp_value, expected); - __ B(&comparison_failed, ne); - __ Stxr(tmp_value, value, MemOperand(tmp_ptr)); - __ Cbnz(tmp_value, &loop_head); - __ B(&exit_loop); - __ Bind(&comparison_failed); - __ Clrex(); - __ Bind(&exit_loop); - - if (kPoisonHeapReferences) { - arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not unpoison `value`, as it is the same register as - // `expected`, which has just been unpoisoned. - } else { - arm64_codegen->GetAssembler()->UnpoisonHeapReference(value); - } - } - - __ B(GetExitLabel()); - } - - private: - // The register containing the object holding the marked object reference field. - const Register obj_; - // The offset, index and scale factor to access the reference in `obj_`. - uint32_t offset_; - Location index_; - size_t scale_factor_; - // Is a null check required? - bool needs_null_check_; - // Should this reference load use Load-Acquire semantics? - bool use_load_acquire_; - // A temporary register used to hold the lock word of `obj_`; and - // also to hold the original reference value, when the reference is - // marked. - const Register temp_; - - DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64); -}; - // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { public: @@ -1131,7 +628,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); LocationSummary* locations = instruction_->GetLocations(); DataType::Type type = DataType::Type::kReference; @@ -1257,7 +754,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; } + const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathARM64"; } private: Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { @@ -1297,7 +794,7 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { DCHECK(kEmitCompilerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DataType::Type type = DataType::Type::kReference; DCHECK(locations->CanCall()); @@ -1334,7 +831,7 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; } + const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARM64"; } private: const Location out_; @@ -1373,7 +870,6 @@ Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const { } CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, - const Arm64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, @@ -1389,8 +885,8 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), - assembler_(graph->GetAllocator()), - isa_features_(isa_features), + assembler_(graph->GetAllocator(), + compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()), uint32_literals_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), @@ -1401,11 +897,14 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { // Save the link register (containing the return address) to mimic Quick. AddAllocatedRegister(LocationFrom(lr)); } @@ -1420,10 +919,86 @@ void CodeGeneratorARM64::EmitJumpTables() { void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { EmitJumpTables(); + + // Emit JIT baker read barrier slow paths. + DCHECK(Runtime::Current()->UseJitCompilation() || jit_baker_read_barrier_slow_paths_.empty()); + for (auto& entry : jit_baker_read_barrier_slow_paths_) { + uint32_t encoded_data = entry.first; + vixl::aarch64::Label* slow_path_entry = &entry.second.label; + __ Bind(slow_path_entry); + CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr); + } + // Ensure we emit the literal pool. __ FinalizeCode(); CodeGenerator::Finalize(allocator); + + // Verify Baker read barrier linker patches. + if (kIsDebugBuild) { + ArrayRef<const uint8_t> code = allocator->GetMemory(); + for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { + DCHECK(info.label.IsBound()); + uint32_t literal_offset = info.label.GetLocation(); + DCHECK_ALIGNED(literal_offset, 4u); + + auto GetInsn = [&code](uint32_t offset) { + DCHECK_ALIGNED(offset, 4u); + return + (static_cast<uint32_t>(code[offset + 0]) << 0) + + (static_cast<uint32_t>(code[offset + 1]) << 8) + + (static_cast<uint32_t>(code[offset + 2]) << 16)+ + (static_cast<uint32_t>(code[offset + 3]) << 24); + }; + + const uint32_t encoded_data = info.custom_data; + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + // Check that the next instruction matches the expected LDR. + switch (kind) { + case BakerReadBarrierKind::kField: + case BakerReadBarrierKind::kAcquire: { + DCHECK_GE(code.size() - literal_offset, 8u); + uint32_t next_insn = GetInsn(literal_offset + 4u); + CheckValidReg(next_insn & 0x1fu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + if (kind == BakerReadBarrierKind::kField) { + // LDR (immediate) with correct base_reg. + CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5)); + } else { + DCHECK(kind == BakerReadBarrierKind::kAcquire); + // LDAR with correct base_reg. + CHECK_EQ(next_insn & 0xffffffe0u, 0x88dffc00u | (base_reg << 5)); + } + break; + } + case BakerReadBarrierKind::kArray: { + DCHECK_GE(code.size() - literal_offset, 8u); + uint32_t next_insn = GetInsn(literal_offset + 4u); + // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL), + // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2]. + CheckValidReg(next_insn & 0x1fu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5)); + CheckValidReg((next_insn >> 16) & 0x1f); // Check index register + break; + } + case BakerReadBarrierKind::kGcRoot: { + DCHECK_GE(literal_offset, 4u); + uint32_t prev_insn = GetInsn(literal_offset - 4u); + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + // Usually LDR (immediate) with correct root_reg but + // we may have a "MOV marked, old_value" for UnsafeCASObject. + if ((prev_insn & 0xffe0ffff) != (0x2a0003e0 | root_reg)) { // MOV? + CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg); // LDR? + } + break; + } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } + } + } } void ParallelMoveResolverARM64::PrepareForEmitNativeCode() { @@ -1543,7 +1118,7 @@ void CodeGeneratorARM64::GenerateFrameEntry() { } } - MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void CodeGeneratorARM64::GenerateFrameExit() { @@ -1600,8 +1175,24 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_ if (value_can_be_null) { __ Cbz(value, &done); } + // Load the address of the card table into `card`. __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value())); + // Calculate the offset (in the card table) of the card corresponding to + // `object`. __ Lsr(temp, object, gc::accounting::CardTable::kCardShift); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the STRB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ Strb(card, MemOperand(card, temp.X())); if (value_can_be_null) { __ Bind(&done); @@ -1615,6 +1206,7 @@ void CodeGeneratorARM64::SetupBlockedRegisters() const { // mr : Runtime reserved. // ip1 : VIXL core temp. // ip0 : VIXL core temp. + // x18 : Platform register. // // Blocked fp registers: // d31 : VIXL fp temp. @@ -1623,6 +1215,7 @@ void CodeGeneratorARM64::SetupBlockedRegisters() const { while (!reserved_core_registers.IsEmpty()) { blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true; } + blocked_core_registers_[X18] = true; CPURegList reserved_fp_registers = vixl_reserved_fp_registers; while (!reserved_fp_registers.IsEmpty()) { @@ -1672,6 +1265,10 @@ void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg stream << DRegister(reg); } +const Arm64InstructionSetFeatures& CodeGeneratorARM64::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsArm64InstructionSetFeatures(); +} + void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) { if (constant->IsIntConstant()) { __ Mov(Register(destination), constant->AsIntConstant()->GetValue()); @@ -2128,6 +1725,26 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare( + HTypeCheckInstruction* check, vixl::aarch64::Register temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset())); + } else { + // /* uint32_t */ temp = temp->status_ + __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset())); + // Extract the bitstring bits. + __ Ubfx(temp, temp, 0, mask_bits); + } + // Compare the bitstring bits to `path_to_root`. + __ Cmp(temp, path_to_root); +} + void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { BarrierType type = BarrierAll; @@ -2224,18 +1841,12 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction, : LocationSummary::kNoCall); if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier. - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !Runtime::Current()->UseJitCompilation() && - !field_info.IsVolatile()) { - // If link-time thunks for the Baker read barrier are enabled, for AOT - // non-volatile loads we need a temporary only if the offset is too big. - if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { - locations->AddTemp(FixedTempLocation()); - } - } else { - locations->AddTemp(Location::RequiresRegister()); + // We need a temporary register for the read barrier load in + // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier() + // only if the field is volatile or the offset is too big. + if (field_info.IsVolatile() || + field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { + locations->AddTemp(FixedTempLocation()); } } locations->SetInAt(0, Location::RequiresRegister()); @@ -2277,7 +1888,7 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, base, offset, maybe_temp, - /* needs_null_check */ true, + /* needs_null_check= */ true, field_info.IsVolatile()); } else { // General case. @@ -2286,7 +1897,7 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, // CodeGeneratorARM64::LoadAcquire call. // NB: LoadAcquire will record the pc info if needed. codegen_->LoadAcquire( - instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true); + instruction, OutputCPURegister(instruction), field, /* needs_null_check= */ true); } else { // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); @@ -2341,7 +1952,7 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, if (field_info.IsVolatile()) { codegen_->StoreRelease( - instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check */ true); + instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check= */ true); } else { // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); @@ -2383,6 +1994,9 @@ void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { // all & reg_bits - 1. __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type)); } + } else if (instr->IsMin() || instr->IsMax()) { + __ Cmp(lhs, rhs); + __ Csel(dst, lhs, rhs, instr->IsMin() ? lt : gt); } else { DCHECK(instr->IsXor()); __ Eor(dst, lhs, rhs); @@ -2398,6 +2012,10 @@ void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { __ Fadd(dst, lhs, rhs); } else if (instr->IsSub()) { __ Fsub(dst, lhs, rhs); + } else if (instr->IsMin()) { + __ Fmin(dst, lhs, rhs); + } else if (instr->IsMax()) { + __ Fmax(dst, lhs, rhs); } else { LOG(FATAL) << "Unexpected floating-point binary operation"; } @@ -2618,7 +2236,7 @@ void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIn void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex( HIntermediateAddressIndex* instruction) { Register index_reg = InputRegisterAt(instruction, 0); - uint32_t shift = Int64ConstantFrom(instruction->GetLocations()->InAt(2)); + uint32_t shift = Int64FromLocation(instruction->GetLocations()->InAt(2)); uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue(); if (shift == 0) { @@ -2691,21 +2309,21 @@ void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { : LocationSummary::kNoCall); if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier. - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !Runtime::Current()->UseJitCompilation() && - instruction->GetIndex()->IsConstant()) { + if (instruction->GetIndex()->IsConstant()) { // Array loads with constant index are treated as field loads. - // If link-time thunks for the Baker read barrier are enabled, for AOT - // constant index loads we need a temporary only if the offset is too big. + // We need a temporary register for the read barrier load in + // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier() + // only if the offset is too big. uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); offset += index << DataType::SizeShift(DataType::Type::kReference); if (offset >= kReferenceLoadMinFarOffset) { locations->AddTemp(FixedTempLocation()); } - } else { + } else if (!instruction->GetArray()->IsIntermediateAddress()) { + // We need a non-scratch temporary for the array data pointer in + // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier() for the case with no + // intermediate address. locations->AddTemp(Location::RequiresRegister()); } } @@ -2735,11 +2353,12 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { MacroAssembler* masm = GetVIXLAssembler(); UseScratchRegisterScope temps(masm); - // The read barrier instrumentation of object ArrayGet instructions + // The non-Baker read barrier instrumentation of object ArrayGet instructions // does not support the HIntermediateAddress instruction. DCHECK(!((type == DataType::Type::kReference) && instruction->GetArray()->IsIntermediateAddress() && - kEmitCompilerReadBarrier)); + kEmitCompilerReadBarrier && + !kUseBakerReadBarrier)); if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Object ArrayGet with Baker's read barrier case. @@ -2747,8 +2366,9 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); if (index.IsConstant()) { + DCHECK(!instruction->GetArray()->IsIntermediateAddress()); // Array load with a constant index can be treated as a field load. - offset += Int64ConstantFrom(index) << DataType::SizeShift(type); + offset += Int64FromLocation(index) << DataType::SizeShift(type); Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation(); codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, @@ -2756,12 +2376,11 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { obj.W(), offset, maybe_temp, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); } else { - Register temp = WRegisterFrom(locations->GetTemp(0)); codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false); + instruction, out, obj.W(), offset, index, /* needs_null_check= */ false); } } else { // General case. @@ -2793,14 +2412,14 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { "Expecting 0=compressed, 1=uncompressed"); __ Tbnz(length.W(), 0, &uncompressed_load); __ Ldrb(Register(OutputCPURegister(instruction)), - HeapOperand(obj, offset + Int64ConstantFrom(index))); + HeapOperand(obj, offset + Int64FromLocation(index))); __ B(&done); __ Bind(&uncompressed_load); __ Ldrh(Register(OutputCPURegister(instruction)), - HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1))); + HeapOperand(obj, offset + (Int64FromLocation(index) << 1))); __ Bind(&done); } else { - offset += Int64ConstantFrom(index) << DataType::SizeShift(type); + offset += Int64FromLocation(index) << DataType::SizeShift(type); source = HeapOperand(obj, offset); } } else { @@ -2810,8 +2429,8 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { // input instruction has done it already. See the comment in // `TryExtractArrayAccessAddress()`. if (kIsDebugBuild) { - HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress(); - DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset); + HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress(); + DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset); } temp = obj; } else { @@ -2913,7 +2532,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { if (!needs_write_barrier) { DCHECK(!may_need_runtime_call_for_type_check); if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << DataType::SizeShift(value_type); + offset += Int64FromLocation(index) << DataType::SizeShift(value_type); destination = HeapOperand(array, offset); } else { UseScratchRegisterScope temps(masm); @@ -2923,8 +2542,8 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { // input instruction has done it already. See the comment in // `TryExtractArrayAccessAddress()`. if (kIsDebugBuild) { - HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress(); - DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset); + HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress(); + DCHECK(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset); } temp = array; } else { @@ -2951,7 +2570,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { UseScratchRegisterScope temps(masm); Register temp = temps.AcquireSameSizeAs(array); if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << DataType::SizeShift(value_type); + offset += Int64FromLocation(index) << DataType::SizeShift(value_type); destination = HeapOperand(array, offset); } else { destination = HeapOperand(temp, @@ -3093,12 +2712,14 @@ void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) { if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } + // Rely on the type initialization to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) { // We assume the class is not null. - SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64( - check->GetLoadClass(), check, check->GetDexPc(), true); + SlowPathCodeARM64* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); } @@ -3260,61 +2881,30 @@ FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS) #undef DEFINE_CONDITION_VISITORS #undef FOR_EACH_CONDITION_INSTRUCTION -void InstructionCodeGeneratorARM64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { - DCHECK(instruction->IsDiv() || instruction->IsRem()); - - LocationSummary* locations = instruction->GetLocations(); - Location second = locations->InAt(1); - DCHECK(second.IsConstant()); +void InstructionCodeGeneratorARM64::GenerateIntDivForPower2Denom(HDiv* instruction) { + int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); + uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); + DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm; Register out = OutputRegister(instruction); Register dividend = InputRegisterAt(instruction, 0); - int64_t imm = Int64FromConstant(second.GetConstant()); - DCHECK(imm == 1 || imm == -1); - if (instruction->IsRem()) { - __ Mov(out, 0); + if (abs_imm == 2) { + int bits = DataType::Size(instruction->GetResultType()) * kBitsPerByte; + __ Add(out, dividend, Operand(dividend, LSR, bits - 1)); } else { - if (imm == 1) { - __ Mov(out, dividend); - } else { - __ Neg(out, dividend); - } - } -} - -void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruction) { - DCHECK(instruction->IsDiv() || instruction->IsRem()); - - LocationSummary* locations = instruction->GetLocations(); - Location second = locations->InAt(1); - DCHECK(second.IsConstant()); - - Register out = OutputRegister(instruction); - Register dividend = InputRegisterAt(instruction, 0); - int64_t imm = Int64FromConstant(second.GetConstant()); - uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); - int ctz_imm = CTZ(abs_imm); - - UseScratchRegisterScope temps(GetVIXLAssembler()); - Register temp = temps.AcquireSameSizeAs(out); - - if (instruction->IsDiv()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireSameSizeAs(out); __ Add(temp, dividend, abs_imm - 1); __ Cmp(dividend, 0); __ Csel(out, temp, dividend, lt); - if (imm > 0) { - __ Asr(out, out, ctz_imm); - } else { - __ Neg(out, Operand(out, ASR, ctz_imm)); - } + } + + int ctz_imm = CTZ(abs_imm); + if (imm > 0) { + __ Asr(out, out, ctz_imm); } else { - int bits = instruction->GetResultType() == DataType::Type::kInt32 ? 32 : 64; - __ Asr(temp, dividend, bits - 1); - __ Lsr(temp, temp, bits - ctz_imm); - __ Add(out, dividend, temp); - __ And(out, out, abs_imm - 1); - __ Sub(out, out, temp); + __ Neg(out, Operand(out, ASR, ctz_imm)); } } @@ -3335,7 +2925,7 @@ void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperati int64_t magic; int shift; CalculateMagicAndShiftForDivRem( - imm, type == DataType::Type::kInt64 /* is_long */, &magic, &shift); + imm, /* is_long= */ type == DataType::Type::kInt64, &magic, &shift); UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireSameSizeAs(out); @@ -3370,39 +2960,34 @@ void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperati } } -void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) { - DCHECK(instruction->IsDiv() || instruction->IsRem()); - DataType::Type type = instruction->GetResultType(); - DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); +void InstructionCodeGeneratorARM64::GenerateIntDivForConstDenom(HDiv *instruction) { + int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); - LocationSummary* locations = instruction->GetLocations(); - Register out = OutputRegister(instruction); - Location second = locations->InAt(1); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + return; + } - if (second.IsConstant()) { - int64_t imm = Int64FromConstant(second.GetConstant()); + if (IsPowerOfTwo(AbsOrMin(imm))) { + GenerateIntDivForPower2Denom(instruction); + } else { + // Cases imm == -1 or imm == 1 are handled by InstructionSimplifier. + DCHECK(imm < -2 || imm > 2) << imm; + GenerateDivRemWithAnyConstant(instruction); + } +} - if (imm == 0) { - // Do not generate anything. DivZeroCheck would prevent any code to be executed. - } else if (imm == 1 || imm == -1) { - DivRemOneOrMinusOne(instruction); - } else if (IsPowerOfTwo(AbsOrMin(imm))) { - DivRemByPowerOfTwo(instruction); - } else { - DCHECK(imm <= -2 || imm >= 2); - GenerateDivRemWithAnyConstant(instruction); - } +void InstructionCodeGeneratorARM64::GenerateIntDiv(HDiv *instruction) { + DCHECK(DataType::IsIntOrLongType(instruction->GetResultType())) + << instruction->GetResultType(); + + if (instruction->GetLocations()->InAt(1).IsConstant()) { + GenerateIntDivForConstDenom(instruction); } else { + Register out = OutputRegister(instruction); Register dividend = InputRegisterAt(instruction, 0); Register divisor = InputRegisterAt(instruction, 1); - if (instruction->IsDiv()) { - __ Sdiv(out, dividend, divisor); - } else { - UseScratchRegisterScope temps(GetVIXLAssembler()); - Register temp = temps.AcquireSameSizeAs(out); - __ Sdiv(temp, dividend, divisor); - __ Msub(out, temp, divisor, dividend); - } + __ Sdiv(out, dividend, divisor); } } @@ -3434,7 +3019,7 @@ void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) { switch (type) { case DataType::Type::kInt32: case DataType::Type::kInt64: - GenerateDivRemIntegral(div); + GenerateIntDiv(div); break; case DataType::Type::kFloat32: @@ -3462,11 +3047,11 @@ void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction if (!DataType::IsIntegralType(type)) { LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck."; - return; + UNREACHABLE(); } if (value.IsConstant()) { - int64_t divisor = Int64ConstantFrom(value); + int64_t divisor = Int64FromLocation(value); if (divisor == 0) { __ B(slow_path->GetEntryLabel()); } else { @@ -3531,7 +3116,7 @@ void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* s } if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } if (!codegen_->GoesToNextBlock(block, successor)) { __ B(codegen_->GetLabelOf(successor)); @@ -3681,7 +3266,7 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) { false_target = nullptr; } - GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); + GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { @@ -3700,9 +3285,9 @@ void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCodeARM64* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize); GenerateTestAndBranch(deoptimize, - /* condition_input_index */ 0, + /* condition_input_index= */ 0, slow_path->GetEntryLabel(), - /* false_target */ nullptr); + /* false_target= */ nullptr); } void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { @@ -3865,6 +3450,8 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -3873,7 +3460,13 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The "out" register is used as a temporary, so it overlaps with the inputs. // Note that TypeCheckSlowPathARM64 uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -3886,7 +3479,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); - Register cls = InputRegisterAt(instruction, 1); + Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? Register() + : InputRegisterAt(instruction, 1); Location out_loc = locations->Out(); Register out = OutputRegister(instruction); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -4032,7 +3627,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ Cmp(out, cls); DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ B(ne, slow_path->GetEntryLabel()); __ Mov(out, 1); @@ -4064,7 +3659,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); if (zero.IsLinked()) { @@ -4072,6 +3667,23 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ Cset(out, eq); + if (zero.IsLinked()) { + __ B(&done); + } + break; + } } if (zero.IsLinked()) { @@ -4094,7 +3706,13 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64. locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -4104,7 +3722,9 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); - Register cls = InputRegisterAt(instruction, 1); + Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? Register() + : InputRegisterAt(instruction, 1); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); DCHECK_GE(num_temps, 1u); DCHECK_LE(num_temps, 3u); @@ -4285,6 +3905,20 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ B(ne, &start_loop); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ B(ne, type_check_slow_path->GetEntryLabel()); + break; + } } __ Bind(&done); @@ -4318,7 +3952,7 @@ void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) { @@ -4388,7 +4022,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -4424,7 +4058,7 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { + ArtMethod* method ATTRIBUTE_UNUSED) { // On ARM64 we support all dispatch types. return desired_dispatch_info; } @@ -4455,21 +4089,32 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall( EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp)); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: - // Load method address from literal pool. - __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + // Add ADRP with its PC-relative .data.bimg.rel.ro patch. + uint32_t boot_image_offset = GetBootImageOffset(invoke); + vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset); + EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); + // Add LDR with its PC-relative .data.bimg.rel.ro patch. + vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label); + // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. + EmitLdrOffsetPlaceholder(ldr_label, WRegisterFrom(temp), XRegisterFrom(temp)); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { - // Add ADRP with its PC-relative DexCache access patch. + // Add ADRP with its PC-relative .bss entry patch. MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method); EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); - // Add LDR with its PC-relative DexCache access patch. + // Add LDR with its PC-relative .bss entry patch. vixl::aarch64::Label* ldr_label = NewMethodBssEntryPatch(target_method, adrp_label); EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp)); break; } + case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: + // Load method address from literal pool. + __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); + break; case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); return; // No code pointer retrieval; the runtime performs the call directly. @@ -4556,7 +4201,30 @@ void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { codegen_->GenerateInvokePolymorphicCall(invoke); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); +} + +void LocationsBuilderARM64::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorARM64::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); +} + +vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageIntrinsicPatch( + uint32_t intrinsic_data, + vixl::aarch64::Label* adrp_label) { + return NewPcRelativePatch( + /* dex_file= */ nullptr, intrinsic_data, adrp_label, &boot_image_intrinsic_patches_); +} + +vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch( + uint32_t boot_image_offset, + vixl::aarch64::Label* adrp_label) { + return NewPcRelativePatch( + /* dex_file= */ nullptr, boot_image_offset, adrp_label, &boot_image_method_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch( @@ -4602,9 +4270,18 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch( return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_); } -vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) { - baker_read_barrier_patches_.emplace_back(custom_data); - return &baker_read_barrier_patches_.back().label; +void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) { + DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope. + if (Runtime::Current()->UseJitCompilation()) { + auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data); + vixl::aarch64::Label* slow_path_entry = &it->second.label; + __ cbnz(mr, slow_path_entry); + } else { + baker_read_barrier_patches_.emplace_back(custom_data); + vixl::aarch64::Label* cbnz_label = &baker_read_barrier_patches_.back().label; + __ bind(cbnz_label); + __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + } } vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch( @@ -4631,7 +4308,7 @@ vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLitera ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); return jit_string_patches_.GetOrCreate( StringReference(&dex_file, string_index), - [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); }); + [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); }); } vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral( @@ -4639,7 +4316,7 @@ vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); return jit_class_patches_.GetOrCreate( TypeReference(&dex_file, type_index), - [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); }); + [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); }); } void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, @@ -4669,6 +4346,55 @@ void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_la __ ldr(out, MemOperand(base, /* offset placeholder */ 0)); } +void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg, + uint32_t boot_image_reference) { + if (GetCompilerOptions().IsBootImage()) { + // Add ADRP with its PC-relative type patch. + vixl::aarch64::Label* adrp_label = NewBootImageIntrinsicPatch(boot_image_reference); + EmitAdrpPlaceholder(adrp_label, reg.X()); + // Add ADD with its PC-relative type patch. + vixl::aarch64::Label* add_label = NewBootImageIntrinsicPatch(boot_image_reference, adrp_label); + EmitAddPlaceholder(add_label, reg.X(), reg.X()); + } else if (GetCompilerOptions().GetCompilePic()) { + // Add ADRP with its PC-relative .data.bimg.rel.ro patch. + vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_reference); + EmitAdrpPlaceholder(adrp_label, reg.X()); + // Add LDR with its PC-relative .data.bimg.rel.ro patch. + vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_reference, adrp_label); + EmitLdrOffsetPlaceholder(ldr_label, reg.W(), reg.X()); + } else { + DCHECK(Runtime::Current()->UseJitCompilation()); + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; + __ Ldr(reg.W(), DeduplicateBootImageAddressLiteral(reinterpret_cast<uintptr_t>(address))); + } +} + +void CodeGeneratorARM64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConvention calling_convention; + Register argument = calling_convention.GetRegisterAt(0); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + // Add ADRP with its PC-relative type patch. + vixl::aarch64::Label* adrp_label = NewBootImageTypePatch(*target_method.dex_file, type_idx); + EmitAdrpPlaceholder(adrp_label, argument.X()); + // Add ADD with its PC-relative type patch. + vixl::aarch64::Label* add_label = + NewBootImageTypePatch(*target_method.dex_file, type_idx, adrp_label); + EmitAddPlaceholder(add_label, argument.X(), argument.X()); + } else { + LoadBootImageAddress(argument, boot_image_offset); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( const ArenaDeque<PcRelativePatchInfo>& infos, @@ -4681,6 +4407,15 @@ inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( } } +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -4690,6 +4425,7 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin type_bss_entry_patches_.size() + boot_image_string_patches_.size() + string_bss_entry_patches_.size() + + boot_image_intrinsic_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { @@ -4699,12 +4435,14 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -4719,6 +4457,44 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin DCHECK_EQ(size, linker_patches->size()); } +bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const { + return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch || + patch.GetType() == linker::LinkerPatch::Type::kCallRelative; +} + +void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch, + /*out*/ ArenaVector<uint8_t>* code, + /*out*/ std::string* debug_name) { + Arm64Assembler assembler(GetGraph()->GetAllocator()); + switch (patch.GetType()) { + case linker::LinkerPatch::Type::kCallRelative: { + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArm64PointerSize).Int32Value()); + assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); + if (GetCompilerOptions().GenerateAnyDebugInfo()) { + *debug_name = "MethodCallThunk"; + } + break; + } + case linker::LinkerPatch::Type::kBakerReadBarrierBranch: { + DCHECK_EQ(patch.GetBakerCustomValue2(), 0u); + CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name); + break; + } + default: + LOG(FATAL) << "Unexpected patch type " << patch.GetType(); + UNREACHABLE(); + } + + // Ensure we emit the literal pool if any. + assembler.FinalizeCode(); + code->resize(assembler.CodeSize()); + MemoryRegion code_region(code->data(), code->size()); + assembler.FinalizeInstructions(code_region); +} + vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) { return uint32_literals_.GetOrCreate( value, @@ -4737,7 +4513,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); return; } @@ -4750,12 +4526,12 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { if (TryGenerateIntrinsicCode(invoke, codegen_)) { - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); return; } @@ -4767,7 +4543,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { DCHECK(!codegen_->IsLeafMethod()); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( @@ -4779,14 +4555,14 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadClass::LoadKind::kJitBootImageAddress: case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kRuntimeCall: break; } @@ -4822,13 +4598,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); - DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), - RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference), - DataType::Type::kReference).GetCode()); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -4841,7 +4611,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); return; } DCHECK(!cls->NeedsAccessCheck()); @@ -4859,12 +4629,12 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA DCHECK(!cls->MustGenerateClinitCheck()); // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ Register current_method = InputRegisterAt(cls, 0); - GenerateGcRootFieldLoad(cls, - out_loc, - current_method, - ArtMethod::DeclaringClassOffset().Int32Value(), - /* fixup_label */ nullptr, - read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, + out_loc, + current_method, + ArtMethod::DeclaringClassOffset().Int32Value(), + /* fixup_label= */ nullptr, + read_barrier_option); break; } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { @@ -4880,31 +4650,16 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); break; } - case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(cls->GetClass().Get())); - DCHECK_NE(address, 0u); - __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); - break; - } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); - // Add ADRP with its PC-relative type patch. - const DexFile& dex_file = cls->GetDexFile(); - dex::TypeIndex type_index = cls->GetTypeIndex(); - vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls); + // Add ADRP with its PC-relative .data.bimg.rel.ro patch. + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); - // Add LDR with its PC-relative type patch. + // Add LDR with its PC-relative .data.bimg.rel.ro patch. vixl::aarch64::Label* ldr_label = - codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label); + codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label); codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(dex_file.StringByTypeIdx(type_index))); - if (masked_hash != 0) { - __ Sub(out.W(), out.W(), Operand(masked_hash)); - } break; } case HLoadClass::LoadKind::kBssEntry: { @@ -4914,29 +4669,36 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA vixl::aarch64::Register temp = XRegisterFrom(out_loc); vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index); codegen_->EmitAdrpPlaceholder(adrp_label, temp); - // Add LDR with its PC-relative Class patch. + // Add LDR with its PC-relative Class .bss entry patch. vixl::aarch64::Label* ldr_label = codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label); // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */ - GenerateGcRootFieldLoad(cls, - out_loc, - temp, - /* offset placeholder */ 0u, - ldr_label, - read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, + out_loc, + temp, + /* offset placeholder */ 0u, + ldr_label, + read_barrier_option); generate_null_check = true; break; } + case HLoadClass::LoadKind::kJitBootImageAddress: { + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); + DCHECK_NE(address, 0u); + __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); + break; + } case HLoadClass::LoadKind::kJitTableAddress: { __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass())); - GenerateGcRootFieldLoad(cls, - out_loc, - out.X(), - /* offset */ 0, - /* fixup_label */ nullptr, - read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, + out_loc, + out.X(), + /* offset= */ 0, + /* fixup_label= */ nullptr, + read_barrier_option); break; } case HLoadClass::LoadKind::kRuntimeCall: @@ -4948,8 +4710,8 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA bool do_clinit = cls->MustGenerateClinitCheck(); if (generate_null_check || do_clinit) { DCHECK(cls->CanCallRuntime()); - SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64( - cls, cls, cls->GetDexPc(), do_clinit); + SlowPathCodeARM64* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ Cbz(out, slow_path->GetEntryLabel()); @@ -4959,10 +4721,30 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA } else { __ Bind(slow_path->GetExitLabel()); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } } +void LocationsBuilderARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) { + InvokeRuntimeCallingConvention calling_convention; + Location location = LocationFrom(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location); +} + +void InstructionCodeGeneratorARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) { + codegen_->GenerateLoadMethodHandleRuntimeCall(load); +} + +void LocationsBuilderARM64::VisitLoadMethodType(HLoadMethodType* load) { + InvokeRuntimeCallingConvention calling_convention; + Location location = LocationFrom(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location); +} + +void InstructionCodeGeneratorARM64::VisitLoadMethodType(HLoadMethodType* load) { + codegen_->GenerateLoadMethodTypeRuntimeCall(load); +} + static MemOperand GetExceptionTlsAddress() { return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value()); } @@ -4989,14 +4771,14 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadString::LoadKind::kJitBootImageAddress: case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kRuntimeCall: break; } @@ -5014,13 +4796,7 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString and marking to save everything we need. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); - DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), - RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference), - DataType::Type::kReference).GetCode()); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -5048,23 +4824,15 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); return; } - case HLoadString::LoadKind::kBootImageAddress: { - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(load->GetString().Get())); - DCHECK_NE(address, 0u); - __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); - return; - } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); - // Add ADRP with its PC-relative String patch. - const DexFile& dex_file = load->GetDexFile(); - const dex::StringIndex string_index = load->GetStringIndex(); - vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index); + // Add ADRP with its PC-relative .data.bimg.rel.ro patch. + uint32_t boot_image_offset = codegen_->GetBootImageOffset(load); + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); - // Add LDR with its PC-relative String patch. + // Add LDR with its PC-relative .data.bimg.rel.ro patch. vixl::aarch64::Label* ldr_label = - codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label); + codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label); codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); return; } @@ -5072,38 +4840,43 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD // Add ADRP with its PC-relative String .bss entry patch. const DexFile& dex_file = load->GetDexFile(); const dex::StringIndex string_index = load->GetStringIndex(); - DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); Register temp = XRegisterFrom(out_loc); vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index); codegen_->EmitAdrpPlaceholder(adrp_label, temp); - // Add LDR with its .bss entry String patch. + // Add LDR with its PC-relative String .bss entry patch. vixl::aarch64::Label* ldr_label = codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label); // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */ - GenerateGcRootFieldLoad(load, - out_loc, - temp, - /* offset placeholder */ 0u, - ldr_label, - kCompilerReadBarrierOption); + codegen_->GenerateGcRootFieldLoad(load, + out_loc, + temp, + /* offset placeholder */ 0u, + ldr_label, + kCompilerReadBarrierOption); SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load); codegen_->AddSlowPath(slow_path); __ Cbz(out.X(), slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); + return; + } + case HLoadString::LoadKind::kJitBootImageAddress: { + uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); + DCHECK_NE(address, 0u); + __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); return; } case HLoadString::LoadKind::kJitTableAddress: { __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(), load->GetStringIndex(), load->GetString())); - GenerateGcRootFieldLoad(load, - out_loc, - out.X(), - /* offset */ 0, - /* fixup_label */ nullptr, - kCompilerReadBarrierOption); + codegen_->GenerateGcRootFieldLoad(load, + out_loc, + out.X(), + /* offset= */ 0, + /* fixup_label= */ nullptr, + kCompilerReadBarrierOption); return; } default: @@ -5116,7 +4889,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) { @@ -5144,7 +4917,7 @@ void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* ins } else { CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void LocationsBuilderARM64::VisitMul(HMul* mul) { @@ -5235,50 +5008,25 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { } void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - QuickEntrypointEnum entrypoint = - CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); + // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. + QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; - if (instruction->IsStringAlloc()) { - locations->AddTemp(LocationFrom(kArtMethodRegister)); - } else { - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); - } + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); } void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) { - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - if (instruction->IsStringAlloc()) { - // String is allocated through StringFactory. Call NewEmptyString entry point. - Location temp = instruction->GetLocations()->GetTemp(0); - MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize); - __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString))); - __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value())); - - { - // Ensure the pc position is recorded immediately after the `blr` instruction. - ExactAssemblyScope eas(GetVIXLAssembler(), - kInstructionSize, - CodeBufferCheckScope::kExactSize); - __ blr(lr); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); - } - } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); - } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void LocationsBuilderARM64::VisitNot(HNot* instruction) { @@ -5319,7 +5067,7 @@ void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) { return; } { - // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. + // Ensure that between load and RecordPcInfo there are no pools emitted. EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); Location obj = instruction->GetLocations()->InAt(0); __ Ldr(wzr, HeapOperandFrom(obj, Offset(0))); @@ -5433,13 +5181,75 @@ void LocationsBuilderARM64::VisitRem(HRem* rem) { } } +void InstructionCodeGeneratorARM64::GenerateIntRemForPower2Denom(HRem *instruction) { + int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); + uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); + DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm; + + Register out = OutputRegister(instruction); + Register dividend = InputRegisterAt(instruction, 0); + + if (abs_imm == 2) { + __ Cmp(dividend, 0); + __ And(out, dividend, 1); + __ Csneg(out, out, out, ge); + } else { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireSameSizeAs(out); + + __ Negs(temp, dividend); + __ And(out, dividend, abs_imm - 1); + __ And(temp, temp, abs_imm - 1); + __ Csneg(out, out, temp, mi); + } +} + +void InstructionCodeGeneratorARM64::GenerateIntRemForConstDenom(HRem *instruction) { + int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); + + if (imm == 0) { + // Do not generate anything. + // DivZeroCheck would prevent any code to be executed. + return; + } + + if (IsPowerOfTwo(AbsOrMin(imm))) { + // Cases imm == -1 or imm == 1 are handled in constant folding by + // InstructionWithAbsorbingInputSimplifier. + // If the cases have survided till code generation they are handled in + // GenerateIntRemForPower2Denom becauses -1 and 1 are the power of 2 (2^0). + // The correct code is generated for them, just more instructions. + GenerateIntRemForPower2Denom(instruction); + } else { + DCHECK(imm < -2 || imm > 2) << imm; + GenerateDivRemWithAnyConstant(instruction); + } +} + +void InstructionCodeGeneratorARM64::GenerateIntRem(HRem* instruction) { + DCHECK(DataType::IsIntOrLongType(instruction->GetResultType())) + << instruction->GetResultType(); + + if (instruction->GetLocations()->InAt(1).IsConstant()) { + GenerateIntRemForConstDenom(instruction); + } else { + Register out = OutputRegister(instruction); + Register dividend = InputRegisterAt(instruction, 0); + Register divisor = InputRegisterAt(instruction, 1); + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireSameSizeAs(out); + __ Sdiv(temp, dividend, divisor); + __ Msub(out, temp, divisor, dividend); + } +} + void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { DataType::Type type = rem->GetResultType(); switch (type) { case DataType::Type::kInt32: case DataType::Type::kInt64: { - GenerateDivRemIntegral(rem); + GenerateIntRem(rem); break; } @@ -5462,6 +5272,62 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { } } +void LocationsBuilderARM64::VisitMin(HMin* min) { + HandleBinaryOp(min); +} + +void InstructionCodeGeneratorARM64::VisitMin(HMin* min) { + HandleBinaryOp(min); +} + +void LocationsBuilderARM64::VisitMax(HMax* max) { + HandleBinaryOp(max); +} + +void InstructionCodeGeneratorARM64::VisitMax(HMax* max) { + HandleBinaryOp(max); +} + +void LocationsBuilderARM64::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) { + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { + Register in_reg = InputRegisterAt(abs, 0); + Register out_reg = OutputRegister(abs); + __ Cmp(in_reg, Operand(0)); + __ Cneg(out_reg, in_reg, lt); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + FPRegister in_reg = InputFPRegisterAt(abs, 0); + FPRegister out_reg = OutputFPRegister(abs); + __ Fabs(out_reg, in_reg); + break; + } + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} + void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) { constructor_fence->SetLocations(nullptr); } @@ -5635,7 +5501,7 @@ void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction return; } GenerateSuspendCheck(instruction, nullptr); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void LocationsBuilderARM64::VisitThrow(HThrow* instruction) { @@ -5848,8 +5714,8 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister( out_reg, offset, maybe_temp, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it @@ -5889,8 +5755,8 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters( obj_reg, offset, maybe_temp, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -5905,7 +5771,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters( } } -void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( +void CodeGeneratorARM64::GenerateGcRootFieldLoad( HInstruction* instruction, Location root, Register obj, @@ -5919,77 +5785,39 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && - !Runtime::Current()->UseJitCompilation()) { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in - // the Marking Register) to decide whether we need to enter - // the slow path to mark the GC root. - // - // We use link-time generated thunks for the slow path. That thunk - // checks the reference and jumps to the entrypoint if needed. - // - // lr = &return_address; - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (mr) { // Thread::Current()->GetIsGcMarking() - // goto gc_root_thunk<root_reg>(lr) - // } - // return_address: - - UseScratchRegisterScope temps(GetVIXLAssembler()); - DCHECK(temps.IsAvailable(ip0)); - DCHECK(temps.IsAvailable(ip1)); - temps.Exclude(ip0, ip1); - uint32_t custom_data = - linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); - vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data); - - EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); - vixl::aarch64::Label return_address; - __ adr(lr, &return_address); - if (fixup_label != nullptr) { - __ Bind(fixup_label); - } - static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, - "GC root LDR must be 2 instruction (8B) before the return address label."); - __ ldr(root_reg, MemOperand(obj.X(), offset)); - __ Bind(cbnz_label); - __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. - __ Bind(&return_address); - } else { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in - // the Marking Register) to decide whether we need to enter - // the slow path to mark the GC root. - // - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (mr) { // Thread::Current()->GetIsGcMarking() - // // Slow path. - // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call. - // } - - // Slow path marking the GC root `root`. The entrypoint will - // be loaded by the slow path code. - SlowPathCodeARM64* slow_path = - new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root); - codegen_->AddSlowPath(slow_path); - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - if (fixup_label == nullptr) { - __ Ldr(root_reg, MemOperand(obj, offset)); - } else { - codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj); - } - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - __ Cbnz(mr, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in + // the Marking Register) to decide whether we need to enter + // the slow path to mark the GC root. + // + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the reference + // and jumps to the entrypoint if needed. + // + // lr = &return_address; + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto gc_root_thunk<root_reg>(lr) + // } + // return_address: + + UseScratchRegisterScope temps(GetVIXLAssembler()); + DCHECK(temps.IsAvailable(ip0)); + DCHECK(temps.IsAvailable(ip1)); + temps.Exclude(ip0, ip1); + uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); + + ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + if (fixup_label != nullptr) { + __ bind(fixup_label); } + static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, + "GC root LDR must be 2 instructions (8B) before the return address label."); + __ ldr(root_reg, MemOperand(obj.X(), offset)); + EmitBakerReadBarrierCbnz(custom_data); + __ bind(&return_address); } else { // GC root loaded through a slow path for read barriers other // than Baker's. @@ -5997,10 +5825,10 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( if (fixup_label == nullptr) { __ Add(root_reg.X(), obj.X(), offset); } else { - codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X()); + EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X()); } // /* mirror::Object* */ root = root->Read() - codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + GenerateReadBarrierForRootSlow(instruction, root, root); } } else { // Plain GC root load with no read barrier. @@ -6008,108 +5836,134 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( if (fixup_label == nullptr) { __ Ldr(root_reg, MemOperand(obj, offset)); } else { - codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X()); + EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X()); } // Note that GC roots are not affected by heap poisoning, thus we // do not have to unpoison `root_reg` here. } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); +} + +void CodeGeneratorARM64::GenerateUnsafeCasOldValueMovWithBakerReadBarrier( + vixl::aarch64::Register marked, + vixl::aarch64::Register old_value) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR. + uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked.GetCode()); + + ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, + "GC root LDR must be 2 instructions (8B) before the return address label."); + __ mov(marked, old_value); + EmitBakerReadBarrierCbnz(custom_data); + __ bind(&return_address); } void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, - Register obj, - uint32_t offset, - Location maybe_temp, + vixl::aarch64::Register obj, + const vixl::aarch64::MemOperand& src, bool needs_null_check, bool use_load_acquire) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !use_load_acquire && - !Runtime::Current()->UseJitCompilation()) { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to mark the reference. Then, in the slow path, check the - // gray bit in the lock word of the reference's holder (`obj`) to - // decide whether to mark `ref` or not. - // - // We use link-time generated thunks for the slow path. That thunk checks - // the holder and jumps to the entrypoint if needed. If the holder is not - // gray, it creates a fake dependency and returns to the LDR instruction. - // - // lr = &gray_return_address; - // if (mr) { // Thread::Current()->GetIsGcMarking() - // goto field_thunk<holder_reg, base_reg>(lr) - // } - // not_gray_return_address: - // // Original reference load. If the offset is too large to fit - // // into LDR, we use an adjusted base register here. - // HeapReference<mirror::Object> reference = *(obj+offset); - // gray_return_address: - - DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); - Register base = obj; - if (offset >= kReferenceLoadMinFarOffset) { - DCHECK(maybe_temp.IsRegister()); - base = WRegisterFrom(maybe_temp); - static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); - __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); - offset &= (kReferenceLoadMinFarOffset - 1u); - } - UseScratchRegisterScope temps(GetVIXLAssembler()); - DCHECK(temps.IsAvailable(ip0)); - DCHECK(temps.IsAvailable(ip1)); - temps.Exclude(ip0, ip1); - uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData( - base.GetCode(), - obj.GetCode()); - vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the + // Marking Register) to decide whether we need to enter the slow + // path to mark the reference. Then, in the slow path, check the + // gray bit in the lock word of the reference's holder (`obj`) to + // decide whether to mark `ref` or not. + // + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the holder + // and jumps to the entrypoint if needed. If the holder is not gray, + // it creates a fake dependency and returns to the LDR instruction. + // + // lr = &gray_return_address; + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto field_thunk<holder_reg, base_reg, use_load_acquire>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = *(obj+offset); + // gray_return_address: - { - EmissionCheckScope guard(GetVIXLAssembler(), - (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); - vixl::aarch64::Label return_address; - __ adr(lr, &return_address); - __ Bind(cbnz_label); - __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. - static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Field LDR must be 1 instruction (4B) before the return address label; " - " 2 instructions (8B) for heap poisoning."); - Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); - __ ldr(ref_reg, MemOperand(base.X(), offset)); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); - __ Bind(&return_address); + DCHECK(src.GetAddrMode() == vixl::aarch64::Offset); + DCHECK_ALIGNED(src.GetOffset(), sizeof(mirror::HeapReference<mirror::Object>)); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + DCHECK(temps.IsAvailable(ip0)); + DCHECK(temps.IsAvailable(ip1)); + temps.Exclude(ip0, ip1); + uint32_t custom_data = use_load_acquire + ? EncodeBakerReadBarrierAcquireData(src.GetBaseRegister().GetCode(), obj.GetCode()) + : EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode()); + + { + ExactAssemblyScope guard(GetVIXLAssembler(), + (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + EmitBakerReadBarrierCbnz(custom_data); + static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Field LDR must be 1 instruction (4B) before the return address label; " + " 2 instructions (8B) for heap poisoning."); + Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); + if (use_load_acquire) { + DCHECK_EQ(src.GetOffset(), 0); + __ ldar(ref_reg, src); + } else { + __ ldr(ref_reg, src); } - MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); - return; + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses + // macro instructions disallowed in ExactAssemblyScope. + if (kPoisonHeapReferences) { + __ neg(ref_reg, Operand(ref_reg)); + } + __ bind(&return_address); } - - // /* HeapReference<Object> */ ref = *(obj + offset) - Register temp = WRegisterFrom(maybe_temp); - Location no_index = Location::NoLocation(); - size_t no_scale_factor = 0u; - GenerateReferenceLoadWithBakerReadBarrier(instruction, - ref, - obj, - offset, - no_index, - no_scale_factor, - temp, - needs_null_check, - use_load_acquire); + MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1)); } -void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, +void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location maybe_temp, + bool needs_null_check, + bool use_load_acquire) { + DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + Register base = obj; + if (use_load_acquire) { + DCHECK(maybe_temp.IsRegister()); + base = WRegisterFrom(maybe_temp); + __ Add(base, obj, offset); + offset = 0u; + } else if (offset >= kReferenceLoadMinFarOffset) { + DCHECK(maybe_temp.IsRegister()); + base = WRegisterFrom(maybe_temp); + static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); + __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); + offset &= (kReferenceLoadMinFarOffset - 1u); + } + MemOperand src(base.X(), offset); + GenerateFieldLoadWithBakerReadBarrier( + instruction, ref, obj, src, needs_null_check, use_load_acquire); +} + +void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction, Location ref, Register obj, uint32_t data_offset, Location index, - Register temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6119,267 +5973,72 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); size_t scale_factor = DataType::SizeShift(DataType::Type::kReference); - if (kBakerReadBarrierLinkTimeThunksEnableForArrays && - !Runtime::Current()->UseJitCompilation()) { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to mark the reference. Then, in the slow path, check the - // gray bit in the lock word of the reference's holder (`obj`) to - // decide whether to mark `ref` or not. - // - // We use link-time generated thunks for the slow path. That thunk checks - // the holder and jumps to the entrypoint if needed. If the holder is not - // gray, it creates a fake dependency and returns to the LDR instruction. - // - // lr = &gray_return_address; - // if (mr) { // Thread::Current()->GetIsGcMarking() - // goto array_thunk<base_reg>(lr) - // } - // not_gray_return_address: - // // Original reference load. If the offset is too large to fit - // // into LDR, we use an adjusted base register here. - // HeapReference<mirror::Object> reference = data[index]; - // gray_return_address: - - DCHECK(index.IsValid()); - Register index_reg = RegisterFrom(index, DataType::Type::kInt32); - Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); - - UseScratchRegisterScope temps(GetVIXLAssembler()); - DCHECK(temps.IsAvailable(ip0)); - DCHECK(temps.IsAvailable(ip1)); - temps.Exclude(ip0, ip1); - uint32_t custom_data = - linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode()); - vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); - - __ Add(temp.X(), obj.X(), Operand(data_offset)); - { - EmissionCheckScope guard(GetVIXLAssembler(), - (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); - vixl::aarch64::Label return_address; - __ adr(lr, &return_address); - __ Bind(cbnz_label); - __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. - static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Array LDR must be 1 instruction (4B) before the return address label; " - " 2 instructions (8B) for heap poisoning."); - __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); - DCHECK(!needs_null_check); // The thunk cannot handle the null check. - GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); - __ Bind(&return_address); - } - MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); - return; - } - - // Array cells are never volatile variables, therefore array loads - // never use Load-Acquire instructions on ARM64. - const bool use_load_acquire = false; - - // /* HeapReference<Object> */ ref = - // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - GenerateReferenceLoadWithBakerReadBarrier(instruction, - ref, - obj, - data_offset, - index, - scale_factor, - temp, - needs_null_check, - use_load_acquire); -} - -void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - Register temp, - bool needs_null_check, - bool use_load_acquire) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - // If we are emitting an array load, we should not be using a - // Load Acquire instruction. In other words: - // `instruction->IsArrayGet()` => `!use_load_acquire`. - DCHECK(!instruction->IsArrayGet() || !use_load_acquire); - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the // Marking Register) to decide whether we need to enter the slow // path to mark the reference. Then, in the slow path, check the // gray bit in the lock word of the reference's holder (`obj`) to // decide whether to mark `ref` or not. // - // if (mr) { // Thread::Current()->GetIsGcMarking() - // // Slow path. - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } - // } else { - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // } - - // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will be loaded by the slow path code. - SlowPathCodeARM64* slow_path = - new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierSlowPathARM64( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - use_load_acquire, - temp); - AddSlowPath(slow_path); - - __ Cbnz(mr, slow_path->GetEntryLabel()); - // Fast path: the GC is not marking: just load the reference. - GenerateRawReferenceLoad( - instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire); - __ Bind(slow_path->GetExitLabel()); - MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); -} - -void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, - Location ref, - Register obj, - Location field_offset, - Register temp, - bool needs_null_check, - bool use_load_acquire) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - // If we are emitting an array load, we should not be using a - // Load Acquire instruction. In other words: - // `instruction->IsArrayGet()` => `!use_load_acquire`. - DCHECK(!instruction->IsArrayGet() || !use_load_acquire); - - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to update the reference field within `obj`. Then, in the - // slow path, check the gray bit in the lock word of the reference's - // holder (`obj`) to decide whether to mark `ref` and update the - // field or not. + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the holder + // and jumps to the entrypoint if needed. If the holder is not gray, + // it creates a fake dependency and returns to the LDR instruction. // - // if (mr) { // Thread::Current()->GetIsGcMarking() - // // Slow path. - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // old_ref = ref; - // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // compareAndSwapObject(obj, field_offset, old_ref, ref); + // lr = &gray_return_address; + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto array_thunk<base_reg>(lr) // } - // } - - // Slow path updating the object reference at address `obj + field_offset` - // when the GC is marking. The entrypoint will be loaded by the slow path code. - SlowPathCodeARM64* slow_path = - new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( - instruction, - ref, - obj, - /* offset */ 0u, - /* index */ field_offset, - /* scale_factor */ 0u /* "times 1" */, - needs_null_check, - use_load_acquire, - temp); - AddSlowPath(slow_path); - - __ Cbnz(mr, slow_path->GetEntryLabel()); - // Fast path: the GC is not marking: nothing to do (the field is - // up-to-date, and we don't need to load the reference). - __ Bind(slow_path->GetExitLabel()); - MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); -} - -void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - bool needs_null_check, - bool use_load_acquire) { - DCHECK(obj.IsW()); - DataType::Type type = DataType::Type::kReference; - Register ref_reg = RegisterFrom(ref, type); + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = data[index]; + // gray_return_address: - // If needed, vixl::EmissionCheckScope guards are used to ensure - // that no pools are emitted between the load (macro) instruction - // and MaybeRecordImplicitNullCheck. + DCHECK(index.IsValid()); + Register index_reg = RegisterFrom(index, DataType::Type::kInt32); + Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); - if (index.IsValid()) { - // Load types involving an "index": ArrayGet, - // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject - // intrinsics. - if (use_load_acquire) { - // UnsafeGetObjectVolatile intrinsic case. - // Register `index` is not an index in an object array, but an - // offset to an object reference field within object `obj`. - DCHECK(instruction->IsInvoke()) << instruction->DebugName(); - DCHECK(instruction->GetLocations()->Intrinsified()); - DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) - << instruction->AsInvoke()->GetIntrinsic(); - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, 0u); - DCHECK_EQ(needs_null_check, false); - // /* HeapReference<mirror::Object> */ ref = *(obj + index) - MemOperand field = HeapOperand(obj, XRegisterFrom(index)); - LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false); - } else { - // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases. - // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor)) - if (index.IsConstant()) { - uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor); - EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - Load(type, ref_reg, HeapOperand(obj, computed_offset)); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - } else { - UseScratchRegisterScope temps(GetVIXLAssembler()); - Register temp = temps.AcquireW(); - __ Add(temp, obj, offset); - { - EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - Load(type, ref_reg, HeapOperand(temp, XRegisterFrom(index), LSL, scale_factor)); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - } - } + UseScratchRegisterScope temps(GetVIXLAssembler()); + DCHECK(temps.IsAvailable(ip0)); + DCHECK(temps.IsAvailable(ip1)); + temps.Exclude(ip0, ip1); + + Register temp; + if (instruction->GetArray()->IsIntermediateAddress()) { + // We do not need to compute the intermediate address from the array: the + // input instruction has done it already. See the comment in + // `TryExtractArrayAccessAddress()`. + if (kIsDebugBuild) { + HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress(); + DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset); } + temp = obj; } else { - // /* HeapReference<mirror::Object> */ ref = *(obj + offset) - MemOperand field = HeapOperand(obj, offset); - if (use_load_acquire) { - // Implicit null checks are handled by CodeGeneratorARM64::LoadAcquire. - LoadAcquire(instruction, ref_reg, field, needs_null_check); - } else { - EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - Load(type, ref_reg, field); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - } + temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0)); + __ Add(temp.X(), obj.X(), Operand(data_offset)); } - // Object* ref = ref_addr->AsMirrorPtr() - GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode()); + + { + ExactAssemblyScope guard(GetVIXLAssembler(), + (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + EmitBakerReadBarrierCbnz(custom_data); + static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Array LDR must be 1 instruction (4B) before the return address label; " + " 2 instructions (8B) for heap poisoning."); + __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses + // macro instructions disallowed in ExactAssemblyScope. + if (kPoisonHeapReferences) { + __ neg(ref_reg, Operand(ref_reg)); + } + __ bind(&return_address); + } + MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1)); } void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { @@ -6510,5 +6169,193 @@ void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_ #undef __ #undef QUICK_ENTRY_POINT +#define __ assembler.GetVIXLAssembler()-> + +static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, + vixl::aarch64::Register base_reg, + vixl::aarch64::MemOperand& lock_word, + vixl::aarch64::Label* slow_path, + vixl::aarch64::Label* throw_npe = nullptr) { + // Load the lock word containing the rb_state. + __ Ldr(ip0.W(), lock_word); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path); + static_assert( + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET, + "Field and array LDR offsets must be the same to reuse the same code."); + // To throw NPE, we return to the fast path; the artificial dependence below does not matter. + if (throw_npe != nullptr) { + __ Bind(throw_npe); + } + // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning). + static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Field LDR must be 1 instruction (4B) before the return address label; " + " 2 instructions (8B) for heap poisoning."); + __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + __ Add(base_reg, base_reg, Operand(ip0, LSR, 32)); + __ Br(lr); // And return back to the function. + // Note: The fake dependency is unnecessary for the slow path. +} + +// Load the read barrier introspection entrypoint in register `entrypoint`. +static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler, + vixl::aarch64::Register entrypoint) { + // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip0.GetCode(), 16u); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode()); + __ Ldr(entrypoint, MemOperand(tr, entry_point_offset)); +} + +void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler, + uint32_t encoded_data, + /*out*/ std::string* debug_name) { + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + switch (kind) { + case BakerReadBarrierKind::kField: + case BakerReadBarrierKind::kAcquire: { + auto base_reg = + Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + auto holder_reg = + Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data)); + CheckValidReg(holder_reg.GetCode()); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip0, ip1); + // If base_reg differs from holder_reg, the offset was too large and we must have emitted + // an explicit null check before the load. Otherwise, for implicit null checks, we need to + // null-check the holder as we do not necessarily do that check before going to the thunk. + vixl::aarch64::Label throw_npe_label; + vixl::aarch64::Label* throw_npe = nullptr; + if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) { + throw_npe = &throw_npe_label; + __ Cbz(holder_reg.W(), throw_npe); + } + // Check if the holder is gray and, if not, add fake dependency to the base register + // and return to the LDR instruction to load the reference. Otherwise, use introspection + // to load the reference and call the entrypoint that performs further checks on the + // reference and marks it if needed. + vixl::aarch64::Label slow_path; + MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, throw_npe); + __ Bind(&slow_path); + if (kind == BakerReadBarrierKind::kField) { + MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); + __ Ldr(ip0.W(), ldr_address); // Load the LDR (immediate) unsigned offset. + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); + __ Ubfx(ip0.W(), ip0.W(), 10, 12); // Extract the offset. + __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2)); // Load the reference. + } else { + DCHECK(kind == BakerReadBarrierKind::kAcquire); + DCHECK(!base_reg.Is(holder_reg)); + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); + __ Ldar(ip0.W(), MemOperand(base_reg)); + } + // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. + __ Br(ip1); // Jump to the entrypoint. + break; + } + case BakerReadBarrierKind::kArray: { + auto base_reg = + Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip0, ip1); + vixl::aarch64::Label slow_path; + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); + DCHECK_LT(lock_word.GetOffset(), 0); + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); + __ Bind(&slow_path); + MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); + __ Ldr(ip0.W(), ldr_address); // Load the LDR (register) unsigned offset. + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); + __ Ubfx(ip0, ip0, 16, 6); // Extract the index register, plus 32 (bit 21 is set). + __ Bfi(ip1, ip0, 3, 6); // Insert ip0 to the entrypoint address to create + // a switch case target based on the index register. + __ Mov(ip0, base_reg); // Move the base register to ip0. + __ Br(ip1); // Jump to the entrypoint's array switch case. + break; + } + case BakerReadBarrierKind::kGcRoot: { + // Check if the reference needs to be marked and if so (i.e. not null, not marked yet + // and it does not have a forwarding address), call the correct introspection entrypoint; + // otherwise return the reference (or the extracted forwarding address). + // There is no gray bit check for GC roots. + auto root_reg = + Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(root_reg.GetCode()); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip0, ip1); + vixl::aarch64::Label return_label, not_marked, forwarding_address; + __ Cbz(root_reg, &return_label); + MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value()); + __ Ldr(ip0.W(), lock_word); + __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, ¬_marked); + __ Bind(&return_label); + __ Br(lr); + __ Bind(¬_marked); + __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1)); + __ B(&forwarding_address, mi); + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); + // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to + // art_quick_read_barrier_mark_introspection_gc_roots. + __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET)); + __ Mov(ip0.W(), root_reg); + __ Br(ip1); + __ Bind(&forwarding_address); + __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift); + __ Br(lr); + break; + } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } + + // For JIT, the slow path is considered part of the compiled method, + // so JIT should pass null as `debug_name`. Tests may not have a runtime. + DCHECK(Runtime::Current() == nullptr || + !Runtime::Current()->UseJitCompilation() || + debug_name == nullptr); + if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) { + std::ostringstream oss; + oss << "BakerReadBarrierThunk"; + switch (kind) { + case BakerReadBarrierKind::kField: + oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data) + << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data); + break; + case BakerReadBarrierKind::kAcquire: + oss << "Acquire_r" << BakerReadBarrierFirstRegField::Decode(encoded_data) + << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data); + break; + case BakerReadBarrierKind::kArray: + oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + break; + case BakerReadBarrierKind::kGcRoot: + oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + break; + } + *debug_name = oss.str(); + } +} + +#undef __ + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 0654046de5..ada5742fc0 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -17,7 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_ -#include "arch/arm64/quick_method_frame_info_arm64.h" +#include "base/bit_field.h" #include "code_generator.h" #include "common_arm64.h" #include "dex/dex_file_types.h" @@ -36,6 +36,11 @@ #pragma GCC diagnostic pop namespace art { + +namespace linker { +class Arm64RelativePatcherTest; +} // namespace linker + namespace arm64 { class CodeGeneratorARM64; @@ -87,6 +92,16 @@ const vixl::aarch64::CPURegList runtime_reserved_core_registers = ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) ? mr : vixl::aarch64::NoCPUReg), vixl::aarch64::lr); +// Some instructions have special requirements for a temporary, for example +// LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require +// temp that's not an R0 (to avoid an extra move) and Baker read barrier field +// loads with large offsets need a fixed register to limit the number of link-time +// thunks we generate. For these and similar cases, we want to reserve a specific +// register that's neither callee-save nor an argument register. We choose x15. +inline Location FixedTempLocation() { + return Location::RegisterLocation(vixl::aarch64::x15.GetCode()); +} + // Callee-save registers AAPCS64, without x19 (Thread Register) (nor // x20 (Marking Register) when emitting Baker read barriers). const vixl::aarch64::CPURegList callee_saved_core_registers( @@ -110,8 +125,8 @@ class SlowPathCodeARM64 : public SlowPathCode { vixl::aarch64::Label* GetEntryLabel() { return &entry_label_; } vixl::aarch64::Label* GetExitLabel() { return &exit_label_; } - void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE; - void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE; + void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override; + void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override; private: vixl::aarch64::Label entry_label_; @@ -201,11 +216,11 @@ class InvokeDexCallingConventionVisitorARM64 : public InvokeDexCallingConvention InvokeDexCallingConventionVisitorARM64() {} virtual ~InvokeDexCallingConventionVisitorARM64() {} - Location GetNextLocation(DataType::Type type) OVERRIDE; - Location GetReturnLocation(DataType::Type return_type) const OVERRIDE { + Location GetNextLocation(DataType::Type type) override; + Location GetReturnLocation(DataType::Type return_type) const override { return calling_convention.GetReturnLocation(return_type); } - Location GetMethodLocation() const OVERRIDE; + Location GetMethodLocation() const override; private: InvokeDexCallingConvention calling_convention; @@ -217,22 +232,22 @@ class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention { public: FieldAccessCallingConventionARM64() {} - Location GetObjectLocation() const OVERRIDE { + Location GetObjectLocation() const override { return helpers::LocationFrom(vixl::aarch64::x1); } - Location GetFieldIndexLocation() const OVERRIDE { + Location GetFieldIndexLocation() const override { return helpers::LocationFrom(vixl::aarch64::x0); } - Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { return helpers::LocationFrom(vixl::aarch64::x0); } Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, - bool is_instance) const OVERRIDE { + bool is_instance) const override { return is_instance ? helpers::LocationFrom(vixl::aarch64::x2) : helpers::LocationFrom(vixl::aarch64::x1); } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { return helpers::LocationFrom(vixl::aarch64::d0); } @@ -245,7 +260,7 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen); #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) @@ -253,7 +268,7 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -264,6 +279,8 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::aarch64::Register class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + vixl::aarch64::Register temp); void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* instr); @@ -303,17 +320,6 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { uint32_t offset, Location maybe_temp, ReadBarrierOption read_barrier_option); - // Generate a GC root reference load: - // - // root <- *(obj + offset) - // - // while honoring read barriers based on read_barrier_option. - void GenerateGcRootFieldLoad(HInstruction* instruction, - Location root, - vixl::aarch64::Register obj, - uint32_t offset, - vixl::aarch64::Label* fixup_label, - ReadBarrierOption read_barrier_option); // Generate a floating-point comparison. void GenerateFcmp(HInstruction* instruction); @@ -326,7 +332,12 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); - void GenerateDivRemIntegral(HBinaryOperation* instruction); + void GenerateIntDiv(HDiv* instruction); + void GenerateIntDivForConstDenom(HDiv *instruction); + void GenerateIntDivForPower2Denom(HDiv *instruction); + void GenerateIntRem(HRem* instruction); + void GenerateIntRemForConstDenom(HRem *instruction); + void GenerateIntRemForPower2Denom(HRem *instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); vixl::aarch64::MemOperand VecAddress( @@ -349,7 +360,7 @@ class LocationsBuilderARM64 : public HGraphVisitor { : HGraphVisitor(graph), codegen_(codegen) {} #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) @@ -357,7 +368,7 @@ class LocationsBuilderARM64 : public HGraphVisitor { #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -382,11 +393,11 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap { : ParallelMoveResolverNoSwap(allocator), codegen_(codegen), vixl_temps_() {} protected: - void PrepareForEmitNativeCode() OVERRIDE; - void FinishEmitNativeCode() OVERRIDE; - Location AllocateScratchLocationFor(Location::Kind kind) OVERRIDE; - void FreeScratchLocation(Location loc) OVERRIDE; - void EmitMove(size_t index) OVERRIDE; + void PrepareForEmitNativeCode() override; + void FinishEmitNativeCode() override; + Location AllocateScratchLocationFor(Location::Kind kind) override; + void FreeScratchLocation(Location loc) override; + void EmitMove(size_t index) override; private: Arm64Assembler* GetAssembler() const; @@ -403,44 +414,43 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap { class CodeGeneratorARM64 : public CodeGenerator { public: CodeGeneratorARM64(HGraph* graph, - const Arm64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorARM64() {} - void GenerateFrameEntry() OVERRIDE; - void GenerateFrameExit() OVERRIDE; + void GenerateFrameEntry() override; + void GenerateFrameExit() override; vixl::aarch64::CPURegList GetFramePreservedCoreRegisters() const; vixl::aarch64::CPURegList GetFramePreservedFPRegisters() const; - void Bind(HBasicBlock* block) OVERRIDE; + void Bind(HBasicBlock* block) override; vixl::aarch64::Label* GetLabelOf(HBasicBlock* block) { block = FirstNonEmptyBlock(block); return &(block_labels_[block->GetBlockId()]); } - size_t GetWordSize() const OVERRIDE { + size_t GetWordSize() const override { return kArm64WordSize; } - size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + size_t GetFloatingPointSpillSlotSize() const override { return GetGraph()->HasSIMD() ? 2 * kArm64WordSize // 16 bytes == 2 arm64 words for each spill : 1 * kArm64WordSize; // 8 bytes == 1 arm64 words for each spill } - uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE { + uintptr_t GetAddressOf(HBasicBlock* block) override { vixl::aarch64::Label* block_entry_label = GetLabelOf(block); DCHECK(block_entry_label->IsBound()); return block_entry_label->GetLocation(); } - HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } - HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; } - Arm64Assembler* GetAssembler() OVERRIDE { return &assembler_; } - const Arm64Assembler& GetAssembler() const OVERRIDE { return assembler_; } + HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } + HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; } + Arm64Assembler* GetAssembler() override { return &assembler_; } + const Arm64Assembler& GetAssembler() const override { return assembler_; } vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); } // Emit a write barrier. @@ -452,12 +462,12 @@ class CodeGeneratorARM64 : public CodeGenerator { // Register allocation. - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters() const override; - size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; // The number of registers that can be allocated. The register allocator may // decide to reserve and not use a few of them. @@ -469,37 +479,35 @@ class CodeGeneratorARM64 : public CodeGenerator { static const int kNumberOfAllocatableFPRegisters = vixl::aarch64::kNumberOfFPRegisters; static constexpr int kNumberOfAllocatableRegisterPairs = 0; - void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; - void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + void DumpCoreRegister(std::ostream& stream, int reg) const override; + void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; - InstructionSet GetInstructionSet() const OVERRIDE { + InstructionSet GetInstructionSet() const override { return InstructionSet::kArm64; } - const Arm64InstructionSetFeatures& GetInstructionSetFeatures() const { - return isa_features_; - } + const Arm64InstructionSetFeatures& GetInstructionSetFeatures() const; - void Initialize() OVERRIDE { + void Initialize() override { block_labels_.resize(GetGraph()->GetBlocks().size()); } // We want to use the STP and LDP instructions to spill and restore registers for slow paths. // These instructions can only encode offsets that are multiples of the register size accessed. - uint32_t GetPreferredSlotsAlignment() const OVERRIDE { return vixl::aarch64::kXRegSizeInBytes; } + uint32_t GetPreferredSlotsAlignment() const override { return vixl::aarch64::kXRegSizeInBytes; } JumpTableARM64* CreateJumpTable(HPackedSwitch* switch_instr) { jump_tables_.emplace_back(new (GetGraph()->GetAllocator()) JumpTableARM64(switch_instr)); return jump_tables_.back().get(); } - void Finalize(CodeAllocator* allocator) OVERRIDE; + void Finalize(CodeAllocator* allocator) override; // Code generation helpers. void MoveConstant(vixl::aarch64::CPURegister destination, HConstant* constant); - void MoveConstant(Location destination, int32_t value) OVERRIDE; - void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; - void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + void MoveConstant(Location destination, int32_t value) override; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; + void AddLocationAsTemp(Location location, LocationSummary* locations) override; void Load(DataType::Type type, vixl::aarch64::CPURegister dst, @@ -521,7 +529,7 @@ class CodeGeneratorARM64 : public CodeGenerator { void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path = nullptr) OVERRIDE; + SlowPathCode* slow_path = nullptr) override; // Generate code to invoke a runtime entry point, but do not record // PC-related information in a stack map. @@ -529,39 +537,53 @@ class CodeGeneratorARM64 : public CodeGenerator { HInstruction* instruction, SlowPathCode* slow_path); - ParallelMoveResolverARM64* GetMoveResolver() OVERRIDE { return &move_resolver_; } + ParallelMoveResolverARM64* GetMoveResolver() override { return &move_resolver_; } - bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override { return false; } // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadString::LoadKind GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + HLoadString::LoadKind desired_string_load_kind) override; // Check if the desired_class_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadClass::LoadKind GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + HLoadClass::LoadKind desired_class_load_kind) override; // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) OVERRIDE; + ArtMethod* method) override; void GenerateStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; void GenerateVirtualCall( - HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, - DataType::Type type ATTRIBUTE_UNUSED) OVERRIDE { + DataType::Type type ATTRIBUTE_UNUSED) override { UNIMPLEMENTED(FATAL); } - // Add a new PC-relative method patch for an instruction and return the label + // Add a new boot image intrinsic patch for an instruction and return the label + // to be bound before the instruction. The instruction will be either the + // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing + // to the associated ADRP patch label). + vixl::aarch64::Label* NewBootImageIntrinsicPatch(uint32_t intrinsic_data, + vixl::aarch64::Label* adrp_label = nullptr); + + // Add a new boot image relocation patch for an instruction and return the label + // to be bound before the instruction. The instruction will be either the + // ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` pointing + // to the associated ADRP patch label). + vixl::aarch64::Label* NewBootImageRelRoPatch(uint32_t boot_image_offset, + vixl::aarch64::Label* adrp_label = nullptr); + + // Add a new boot image method patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). @@ -575,7 +597,7 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Label* NewMethodBssEntryPatch(MethodReference target_method, vixl::aarch64::Label* adrp_label = nullptr); - // Add a new PC-relative type patch for an instruction and return the label + // Add a new boot image type patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). @@ -591,7 +613,7 @@ class CodeGeneratorARM64 : public CodeGenerator { dex::TypeIndex type_index, vixl::aarch64::Label* adrp_label = nullptr); - // Add a new PC-relative string patch for an instruction and return the label + // Add a new boot image string patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). @@ -607,9 +629,9 @@ class CodeGeneratorARM64 : public CodeGenerator { dex::StringIndex string_index, vixl::aarch64::Label* adrp_label = nullptr); - // Add a new baker read barrier patch and return the label to be bound - // before the CBNZ instruction. - vixl::aarch64::Label* NewBakerReadBarrierPatch(uint32_t custom_data); + // Emit the CBNZ instruction for baker read barrier and record + // the associated patch for AOT or slow path for JIT. + void EmitBakerReadBarrierCbnz(uint32_t custom_data); vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address); vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file, @@ -627,10 +649,40 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Register out, vixl::aarch64::Register base); - void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; + void LoadBootImageAddress(vixl::aarch64::Register reg, uint32_t boot_image_reference); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); - void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; + bool NeedsThunkCode(const linker::LinkerPatch& patch) const override; + void EmitThunkCode(const linker::LinkerPatch& patch, + /*out*/ ArenaVector<uint8_t>* code, + /*out*/ std::string* debug_name) override; + void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; + + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers based on read_barrier_option. + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + vixl::aarch64::Register obj, + uint32_t offset, + vixl::aarch64::Label* fixup_label, + ReadBarrierOption read_barrier_option); + // Generate MOV for the `old_value` in UnsafeCASObject and mark it with Baker read barrier. + void GenerateUnsafeCasOldValueMovWithBakerReadBarrier(vixl::aarch64::Register marked, + vixl::aarch64::Register old_value); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + // Overload suitable for Unsafe.getObject/-Volatile() intrinsic. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::aarch64::Register obj, + const vixl::aarch64::MemOperand& src, + bool needs_null_check, + bool use_load_acquire); // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -642,58 +694,12 @@ class CodeGeneratorARM64 : public CodeGenerator { bool use_load_acquire); // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. - void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + void GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction, Location ref, vixl::aarch64::Register obj, uint32_t data_offset, Location index, - vixl::aarch64::Register temp, bool needs_null_check); - // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, - // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. - // - // Load the object reference located at the address - // `obj + offset + (index << scale_factor)`, held by object `obj`, into - // `ref`, and mark it if needed. - void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl::aarch64::Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - vixl::aarch64::Register temp, - bool needs_null_check, - bool use_load_acquire); - - // Generate code checking whether the the reference field at the - // address `obj + field_offset`, held by object `obj`, needs to be - // marked, and if so, marking it and updating the field within `obj` - // with the marked value. - // - // This routine is used for the implementation of the - // UnsafeCASObject intrinsic with Baker read barriers. - // - // This method has a structure similar to - // GenerateReferenceLoadWithBakerReadBarrier, but note that argument - // `ref` is only as a temporary here, and thus its value should not - // be used afterwards. - void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl::aarch64::Register obj, - Location field_offset, - vixl::aarch64::Register temp, - bool needs_null_check, - bool use_load_acquire); - - // Generate a heap reference load (with no read barrier). - void GenerateRawReferenceLoad(HInstruction* instruction, - Location ref, - vixl::aarch64::Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - bool needs_null_check, - bool use_load_acquire); // Emit code checking the status of the Marking Register, and // aborting the program if MR does not match the value stored in the @@ -759,12 +765,78 @@ class CodeGeneratorARM64 : public CodeGenerator { // artReadBarrierForRootSlow. void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); - void GenerateNop() OVERRIDE; + void GenerateNop() override; - void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; - void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateImplicitNullCheck(HNullCheck* instruction) override; + void GenerateExplicitNullCheck(HNullCheck* instruction) override; private: + // Encoding of thunk type and data for link-time generated thunks for Baker read barriers. + + enum class BakerReadBarrierKind : uint8_t { + kField, // Field get or array get with constant offset (i.e. constant index). + kAcquire, // Volatile field get. + kArray, // Array get with index in register. + kGcRoot, // GC root load. + kLast = kGcRoot + }; + + static constexpr uint32_t kBakerReadBarrierInvalidEncodedReg = /* sp/zr is invalid */ 31u; + + static constexpr size_t kBitsForBakerReadBarrierKind = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); + static constexpr size_t kBakerReadBarrierBitsForRegister = + MinimumBitsToStore(kBakerReadBarrierInvalidEncodedReg); + using BakerReadBarrierKindField = + BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>; + using BakerReadBarrierFirstRegField = + BitField<uint32_t, kBitsForBakerReadBarrierKind, kBakerReadBarrierBitsForRegister>; + using BakerReadBarrierSecondRegField = + BitField<uint32_t, + kBitsForBakerReadBarrierKind + kBakerReadBarrierBitsForRegister, + kBakerReadBarrierBitsForRegister>; + + static void CheckValidReg(uint32_t reg) { + DCHECK(reg < vixl::aarch64::lr.GetCode() && + reg != vixl::aarch64::ip0.GetCode() && + reg != vixl::aarch64::ip1.GetCode()) << reg; + } + + static inline uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) { + CheckValidReg(base_reg); + CheckValidReg(holder_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(holder_reg); + } + + static inline uint32_t EncodeBakerReadBarrierAcquireData(uint32_t base_reg, uint32_t holder_reg) { + CheckValidReg(base_reg); + CheckValidReg(holder_reg); + DCHECK_NE(base_reg, holder_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kAcquire) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(holder_reg); + } + + static inline uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { + CheckValidReg(base_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg); + } + + static inline uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) { + CheckValidReg(root_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | + BakerReadBarrierFirstRegField::Encode(root_reg) | + BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg); + } + + void CompileBakerReadBarrierThunk(Arm64Assembler& assembler, + uint32_t encoded_data, + /*out*/ std::string* debug_name); + using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>; using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>; using StringToLiteralMap = ArenaSafeMap<StringReference, @@ -814,13 +886,13 @@ class CodeGeneratorARM64 : public CodeGenerator { InstructionCodeGeneratorARM64 instruction_visitor_; ParallelMoveResolverARM64 move_resolver_; Arm64Assembler assembler_; - const Arm64InstructionSetFeatures& isa_features_; // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; // Deduplication map for 64-bit literals, used for non-patchable method address or method code. Uint64ToLiteralMap uint64_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/BootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; @@ -828,10 +900,12 @@ class CodeGeneratorARM64 : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; @@ -840,6 +914,20 @@ class CodeGeneratorARM64 : public CodeGenerator { // Patches for class literals in JIT compiled code. TypeToLiteralMap jit_class_patches_; + // Baker read barrier slow paths, mapping custom data (uint32_t) to label. + // Wrap the label to work around vixl::aarch64::Label being non-copyable + // and non-moveable and as such unusable in ArenaSafeMap<>. + struct LabelWrapper { + LabelWrapper(const LabelWrapper& src) + : label() { + DCHECK(!src.label.IsLinked() && !src.label.IsBound()); + } + LabelWrapper() = default; + vixl::aarch64::Label label; + }; + ArenaSafeMap<uint32_t, LabelWrapper> jit_baker_read_barrier_slow_paths_; + + friend class linker::Arm64RelativePatcherTest; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64); }; diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 2452139d42..6469c6964a 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -27,9 +27,10 @@ #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" +#include "intrinsics.h" #include "intrinsics_arm_vixl.h" -#include "linker/arm/relative_patcher_thumb2.h" #include "linker/linker_patch.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" @@ -47,7 +48,6 @@ using namespace vixl32; // NOLINT(build/namespaces) using helpers::DRegisterFrom; using helpers::DWARFReg; -using helpers::HighDRegisterFrom; using helpers::HighRegisterFrom; using helpers::InputDRegisterAt; using helpers::InputOperandAt; @@ -85,18 +85,10 @@ static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; // Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle // offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions. -// For the Baker read barrier implementation using link-generated thunks we need to split +// For the Baker read barrier implementation using link-time generated thunks we need to split // the offset explicitly. constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB; -// Flags controlling the use of link-time generated thunks for Baker read barriers. -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; - -// The reserved entrypoint register for link-time generated thunks. -const vixl32::Register kBakerCcEntrypointRegister = r4; - // Using a base helps identify when we hit Marking Register check breakpoints. constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10; @@ -111,26 +103,6 @@ constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10; // Marker that code is yet to be, and must, be implemented. #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented " -static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope* temps, - HInstruction* instruction) { - DCHECK(temps->IsAvailable(ip)); - temps->Exclude(ip); - DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister)); - DCHECK_EQ(kBakerCcEntrypointRegister.GetCode(), - linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister); - DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u); - DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp( - instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister)); -} - -static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) { - ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes); - __ bind(patch_label); - vixl32::Label placeholder_label; - __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time. - __ bind(&placeholder_label); -} - static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) { return rt.IsLow() && rn.IsLow() && offset < 32u; } @@ -139,7 +111,7 @@ class EmitAdrCode { public: EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label) : assembler_(assembler), rd_(rd), label_(label) { - ExactAssemblyScope aas(assembler, kMaxInstructionSizeInBytes); + DCHECK(!assembler->AllowMacroInstructions()); // In ExactAssemblyScope. adr_location_ = assembler->GetCursorOffset(); assembler->adr(EncodingSize(Wide), rd, label); } @@ -165,6 +137,15 @@ class EmitAdrCode { int32_t adr_location_; }; +static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { + InvokeRuntimeCallingConventionARMVIXL calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); + // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() + // that the the kPrimNot result register is the same as the first argument register. + return caller_saves; +} + // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers, // for each live D registers they treat two corresponding S registers as live ones. // @@ -338,7 +319,7 @@ void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSumm size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); size_t orig_offset = stack_offset; - const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); for (uint32_t i : LowToHighBits(core_spills)) { // If the register holds an object, update the stack mask. if (locations->RegisterContainsObject(i)) { @@ -353,7 +334,7 @@ void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSumm CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); arm_codegen->GetAssembler()->StoreRegisterList(core_spills, orig_offset); - uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); orig_offset = stack_offset; for (uint32_t i : LowToHighBits(fp_spills)) { DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); @@ -376,7 +357,7 @@ void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationS size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); size_t orig_offset = stack_offset; - const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); for (uint32_t i : LowToHighBits(core_spills)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); @@ -387,7 +368,7 @@ void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationS CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); arm_codegen->GetAssembler()->LoadRegisterList(core_spills, orig_offset); - uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); while (fp_spills != 0u) { uint32_t begin = CTZ(fp_spills); uint32_t tmp = fp_spills + (1u << begin); @@ -402,7 +383,7 @@ class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: explicit NullCheckSlowPathARMVIXL(HNullCheck* instruction) : SlowPathCodeARMVIXL(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { @@ -416,9 +397,9 @@ class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARMVIXL"; } + const char* GetDescription() const override { return "NullCheckSlowPathARMVIXL"; } private: DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARMVIXL); @@ -429,16 +410,16 @@ class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction) : SlowPathCodeARMVIXL(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); __ Bind(GetEntryLabel()); arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARMVIXL"; } + const char* GetDescription() const override { return "DivZeroCheckSlowPathARMVIXL"; } private: DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARMVIXL); @@ -449,7 +430,7 @@ class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { SuspendCheckSlowPathARMVIXL(HSuspendCheck* instruction, HBasicBlock* successor) : SlowPathCodeARMVIXL(instruction), successor_(successor) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); __ Bind(GetEntryLabel()); arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); @@ -470,7 +451,7 @@ class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { return successor_; } - const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARMVIXL"; } + const char* GetDescription() const override { return "SuspendCheckSlowPathARMVIXL"; } private: // If not null, the block to branch to after the suspend check. @@ -487,7 +468,7 @@ class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { explicit BoundsCheckSlowPathARMVIXL(HBoundsCheck* instruction) : SlowPathCodeARMVIXL(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); LocationSummary* locations = instruction_->GetLocations(); @@ -514,9 +495,9 @@ class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARMVIXL"; } + const char* GetDescription() const override { return "BoundsCheckSlowPathARMVIXL"; } private: DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARMVIXL); @@ -524,29 +505,39 @@ class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: - LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, bool do_clinit) - : SlowPathCodeARMVIXL(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) { + LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at) + : SlowPathCodeARMVIXL(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Location out = locations->Out(); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConventionARMVIXL calling_convention; - dex::TypeIndex type_index = cls_->GetTypeIndex(); - __ Mov(calling_convention.GetRegisterAt(0), type_index.index_); - QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage - : kQuickInitializeType; - arm_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), arm_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ Mov(calling_convention.GetRegisterAt(0), type_index.index_); + arm_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. } else { - CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), source); + } + if (must_do_clinit) { + arm_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } // Move the class to the desired location. @@ -558,18 +549,12 @@ class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARMVIXL"; } + const char* GetDescription() const override { return "LoadClassSlowPathARMVIXL"; } private: // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL); }; @@ -578,7 +563,7 @@ class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL { explicit LoadStringSlowPathARMVIXL(HLoadString* instruction) : SlowPathCodeARMVIXL(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { DCHECK(instruction_->IsLoadString()); DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry); LocationSummary* locations = instruction_->GetLocations(); @@ -600,7 +585,7 @@ class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARMVIXL"; } + const char* GetDescription() const override { return "LoadStringSlowPathARMVIXL"; } private: DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARMVIXL); @@ -611,7 +596,7 @@ class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal) : SlowPathCodeARMVIXL(instruction), is_fatal_(is_fatal) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DCHECK(instruction_->IsCheckCast() || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); @@ -655,9 +640,9 @@ class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { } } - const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARMVIXL"; } + const char* GetDescription() const override { return "TypeCheckSlowPathARMVIXL"; } - bool IsFatal() const OVERRIDE { return is_fatal_; } + bool IsFatal() const override { return is_fatal_; } private: const bool is_fatal_; @@ -670,7 +655,7 @@ class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL { explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction) : SlowPathCodeARMVIXL(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); __ Bind(GetEntryLabel()); LocationSummary* locations = instruction_->GetLocations(); @@ -683,7 +668,7 @@ class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL { CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } - const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARMVIXL"; } + const char* GetDescription() const override { return "DeoptimizationSlowPathARMVIXL"; } private: DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARMVIXL); @@ -693,7 +678,7 @@ class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: explicit ArraySetSlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -724,495 +709,12 @@ class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARMVIXL"; } + const char* GetDescription() const override { return "ArraySetSlowPathARMVIXL"; } private: DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL); }; -// Abstract base class for read barrier slow paths marking a reference -// `ref`. -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL { - protected: - ReadBarrierMarkSlowPathBaseARMVIXL(HInstruction* instruction, Location ref, Location entrypoint) - : SlowPathCodeARMVIXL(instruction), ref_(ref), entrypoint_(entrypoint) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARMVIXL"; } - - // Generate assembly code calling the read barrier marking runtime - // entry point (ReadBarrierMarkRegX). - void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) { - vixl32::Register ref_reg = RegisterFrom(ref_); - - // No need to save live registers; it's taken care of by the - // entrypoint. Also, there is no need to update the stack mask, - // as this runtime call will not trigger a garbage collection. - CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); - DCHECK(!ref_reg.Is(sp)); - DCHECK(!ref_reg.Is(lr)); - DCHECK(!ref_reg.Is(pc)); - // IP is used internally by the ReadBarrierMarkRegX entry point - // as a temporary, it cannot be the entry point's input/output. - DCHECK(!ref_reg.Is(ip)); - DCHECK(ref_reg.IsRegister()) << ref_reg; - // "Compact" slow path, saving two moves. - // - // Instead of using the standard runtime calling convention (input - // and output in R0): - // - // R0 <- ref - // R0 <- ReadBarrierMark(R0) - // ref <- R0 - // - // we just use rX (the register containing `ref`) as input and output - // of a dedicated entrypoint: - // - // rX <- ReadBarrierMarkRegX(rX) - // - if (entrypoint_.IsValid()) { - arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); - __ Blx(RegisterFrom(entrypoint_)); - } else { - // Entrypoint is not already loaded, load from the thread. - int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode()); - // This runtime call does not require a stack map. - arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); - } - } - - // The location (register) of the marked object reference. - const Location ref_; - - // The location of the entrypoint if already loaded. - const Location entrypoint_; - - private: - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARMVIXL); -}; - -// Slow path marking an object reference `ref` during a read -// barrier. The field `obj.field` in the object `obj` holding this -// reference does not get updated by this slow path after marking. -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class ReadBarrierMarkSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL { - public: - ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction, - Location ref, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARMVIXL"; } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - DCHECK(locations->CanCall()); - DCHECK(ref_.IsRegister()) << ref_; - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - - __ Bind(GetEntryLabel()); - GenerateReadBarrierMarkRuntimeCall(codegen); - __ B(GetExitLabel()); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARMVIXL); -}; - -// Slow path loading `obj`'s lock word, loading a reference from -// object `*(obj + offset + (index << scale_factor))` into `ref`, and -// marking `ref` if `obj` is gray according to the lock word (Baker -// read barrier). The field `obj.field` in the object `obj` holding -// this reference does not get updated by this slow path after marking -// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL -// below for that). -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL { - public: - LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(HInstruction* instruction, - Location ref, - vixl32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check, - vixl32::Register temp, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint), - obj_(obj), - offset_(offset), - index_(index), - scale_factor_(scale_factor), - needs_null_check_(needs_null_check), - temp_(temp) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { - return "LoadReferenceWithBakerReadBarrierSlowPathARMVIXL"; - } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - vixl32::Register ref_reg = RegisterFrom(ref_); - DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg; - DCHECK(instruction_->IsInstanceFieldGet() || - instruction_->IsStaticFieldGet() || - instruction_->IsArrayGet() || - instruction_->IsArraySet() || - instruction_->IsInstanceOf() || - instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || - (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - // The read barrier instrumentation of object ArrayGet - // instructions does not support the HIntermediateAddress - // instruction. - DCHECK(!(instruction_->IsArrayGet() && - instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); - - // Temporary register `temp_`, used to store the lock word, must - // not be IP, as we may use it to emit the reference load (in the - // call to GenerateRawReferenceLoad below), and we need the lock - // word to still be in `temp_` after the reference load. - DCHECK(!temp_.Is(ip)); - - __ Bind(GetEntryLabel()); - - // When using MaybeGenerateReadBarrierSlow, the read barrier call is - // inserted after the original load. However, in fast path based - // Baker's read barriers, we need to perform the load of - // mirror::Object::monitor_ *before* the original reference load. - // This load-load ordering is required by the read barrier. - // The slow path (for Baker's algorithm) should look like: - // - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } - // - // Note: the original implementation in ReadBarrier::Barrier is - // slightly more complex as it performs additional checks that we do - // not do here for performance reasons. - - CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); - - // /* int32_t */ monitor = obj->monitor_ - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset); - if (needs_null_check_) { - codegen->MaybeRecordImplicitNullCheck(instruction_); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including the rb_state, - // which shall prevent load-load reordering without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp`. - __ Add(obj_, obj_, Operand(temp_, ShiftType::LSR, 32)); - - // The actual reference load. - // A possible implicit null check has already been handled above. - arm_codegen->GenerateRawReferenceLoad( - instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false); - - // Mark the object `ref` when `obj` is gray. - // - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with LSRS - // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1); - __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS. - GenerateReadBarrierMarkRuntimeCall(codegen); - - __ B(GetExitLabel()); - } - - private: - // The register containing the object holding the marked object reference field. - vixl32::Register obj_; - // The offset, index and scale factor to access the reference in `obj_`. - uint32_t offset_; - Location index_; - ScaleFactor scale_factor_; - // Is a null check required? - bool needs_null_check_; - // A temporary register used to hold the lock word of `obj_`. - vixl32::Register temp_; - - DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARMVIXL); -}; - -// Slow path loading `obj`'s lock word, loading a reference from -// object `*(obj + offset + (index << scale_factor))` into `ref`, and -// marking `ref` if `obj` is gray according to the lock word (Baker -// read barrier). If needed, this slow path also atomically updates -// the field `obj.field` in the object `obj` holding this reference -// after marking (contrary to -// LoadReferenceWithBakerReadBarrierSlowPathARMVIXL above, which never -// tries to update `obj.field`). -// -// This means that after the execution of this slow path, both `ref` -// and `obj.field` will be up-to-date; i.e., after the flip, both will -// hold the same to-space reference (unless another thread installed -// another object reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL - : public ReadBarrierMarkSlowPathBaseARMVIXL { - public: - LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( - HInstruction* instruction, - Location ref, - vixl32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check, - vixl32::Register temp1, - vixl32::Register temp2, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint), - obj_(obj), - offset_(offset), - index_(index), - scale_factor_(scale_factor), - needs_null_check_(needs_null_check), - temp1_(temp1), - temp2_(temp2) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { - return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL"; - } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - vixl32::Register ref_reg = RegisterFrom(ref_); - DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg; - DCHECK_NE(ref_.reg(), LocationFrom(temp1_).reg()); - - // This slow path is only used by the UnsafeCASObject intrinsic at the moment. - DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier marking and field updating slow path: " - << instruction_->DebugName(); - DCHECK(instruction_->GetLocations()->Intrinsified()); - DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); - DCHECK_EQ(offset_, 0u); - DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1); - Location field_offset = index_; - DCHECK(field_offset.IsRegisterPair()) << field_offset; - - // Temporary register `temp1_`, used to store the lock word, must - // not be IP, as we may use it to emit the reference load (in the - // call to GenerateRawReferenceLoad below), and we need the lock - // word to still be in `temp1_` after the reference load. - DCHECK(!temp1_.Is(ip)); - - __ Bind(GetEntryLabel()); - - // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARMVIXL's: - // - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // old_ref = ref; - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // compareAndSwapObject(obj, field_offset, old_ref, ref); - // } - - CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); - - // /* int32_t */ monitor = obj->monitor_ - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset); - if (needs_null_check_) { - codegen->MaybeRecordImplicitNullCheck(instruction_); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including the rb_state, - // which shall prevent load-load reordering without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp`. - __ Add(obj_, obj_, Operand(temp1_, ShiftType::LSR, 32)); - - // The actual reference load. - // A possible implicit null check has already been handled above. - arm_codegen->GenerateRawReferenceLoad( - instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false); - - // Mark the object `ref` when `obj` is gray. - // - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with LSRS - // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1); - __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS. - - // Save the old value of the reference before marking it. - // Note that we cannot use IP to save the old reference, as IP is - // used internally by the ReadBarrierMarkRegX entry point, and we - // need the old reference after the call to that entry point. - DCHECK(!temp1_.Is(ip)); - __ Mov(temp1_, ref_reg); - - GenerateReadBarrierMarkRuntimeCall(codegen); - - // If the new reference is different from the old reference, - // update the field in the holder (`*(obj_ + field_offset)`). - // - // Note that this field could also hold a different object, if - // another thread had concurrently changed it. In that case, the - // LDREX/CMP/BNE sequence of instructions in the compare-and-set - // (CAS) operation below would abort the CAS, leaving the field - // as-is. - __ Cmp(temp1_, ref_reg); - __ B(eq, GetExitLabel()); - - // Update the the holder's field atomically. This may fail if - // mutator updates before us, but it's OK. This is achieved - // using a strong compare-and-set (CAS) operation with relaxed - // memory synchronization ordering, where the expected value is - // the old reference and the desired value is the new reference. - - UseScratchRegisterScope temps(arm_codegen->GetVIXLAssembler()); - // Convenience aliases. - vixl32::Register base = obj_; - // The UnsafeCASObject intrinsic uses a register pair as field - // offset ("long offset"), of which only the low part contains - // data. - vixl32::Register offset = LowRegisterFrom(field_offset); - vixl32::Register expected = temp1_; - vixl32::Register value = ref_reg; - vixl32::Register tmp_ptr = temps.Acquire(); // Pointer to actual memory. - vixl32::Register tmp = temp2_; // Value in memory. - - __ Add(tmp_ptr, base, offset); - - if (kPoisonHeapReferences) { - arm_codegen->GetAssembler()->PoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not poison `value`, as it is the same register as - // `expected`, which has just been poisoned. - } else { - arm_codegen->GetAssembler()->PoisonHeapReference(value); - } - } - - // do { - // tmp = [r_ptr] - expected; - // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); - - vixl32::Label loop_head, comparison_failed, exit_loop; - __ Bind(&loop_head); - __ Ldrex(tmp, MemOperand(tmp_ptr)); - __ Cmp(tmp, expected); - __ B(ne, &comparison_failed, /* far_target */ false); - __ Strex(tmp, value, MemOperand(tmp_ptr)); - __ CompareAndBranchIfZero(tmp, &exit_loop, /* far_target */ false); - __ B(&loop_head); - __ Bind(&comparison_failed); - __ Clrex(); - __ Bind(&exit_loop); - - if (kPoisonHeapReferences) { - arm_codegen->GetAssembler()->UnpoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not unpoison `value`, as it is the same register as - // `expected`, which has just been unpoisoned. - } else { - arm_codegen->GetAssembler()->UnpoisonHeapReference(value); - } - } - - __ B(GetExitLabel()); - } - - private: - // The register containing the object holding the marked object reference field. - const vixl32::Register obj_; - // The offset, index and scale factor to access the reference in `obj_`. - uint32_t offset_; - Location index_; - ScaleFactor scale_factor_; - // Is a null check required? - bool needs_null_check_; - // A temporary register used to hold the lock word of `obj_`; and - // also to hold the original reference value, when the reference is - // marked. - const vixl32::Register temp1_; - // A temporary register used in the implementation of the CAS, to - // update the object's reference field. - const vixl32::Register temp2_; - - DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL); -}; - // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: @@ -1242,7 +744,7 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL { DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); LocationSummary* locations = instruction_->GetLocations(); vixl32::Register reg_out = RegisterFrom(out_); @@ -1366,7 +868,7 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { + const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathARMVIXL"; } @@ -1408,7 +910,7 @@ class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL { DCHECK(kEmitCompilerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); vixl32::Register reg_out = RegisterFrom(out_); DCHECK(locations->CanCall()); @@ -1434,7 +936,7 @@ class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARMVIXL"; } + const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARMVIXL"; } private: const Location out_; @@ -1517,6 +1019,10 @@ void CodeGeneratorARMVIXL::DumpFloatingPointRegister(std::ostream& stream, int r stream << vixl32::SRegister(reg); } +const ArmInstructionSetFeatures& CodeGeneratorARMVIXL::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsArmInstructionSetFeatures(); +} + static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) { uint32_t mask = 0; for (uint32_t i = regs.GetFirstSRegister().GetCode(); @@ -1531,26 +1037,26 @@ static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) { size_t CodeGeneratorARMVIXL::SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED, uint32_t reg_id ATTRIBUTE_UNUSED) { TODO_VIXL32(FATAL); - return 0; + UNREACHABLE(); } // Restores the register from the stack. Returns the size taken on stack. size_t CodeGeneratorARMVIXL::RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED, uint32_t reg_id ATTRIBUTE_UNUSED) { TODO_VIXL32(FATAL); - return 0; + UNREACHABLE(); } size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, uint32_t reg_id ATTRIBUTE_UNUSED) { TODO_VIXL32(FATAL); - return 0; + UNREACHABLE(); } size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, uint32_t reg_id ATTRIBUTE_UNUSED) { TODO_VIXL32(FATAL); - return 0; + UNREACHABLE(); } static void GenerateDataProcInstruction(HInstruction::InstructionKind kind, @@ -2033,7 +1539,7 @@ static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* cod vixl32::Label done_label; vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label); - __ B(condition.second, final_label, /* far_target */ false); + __ B(condition.second, final_label, /* is_far_target= */ false); __ Mov(out, 1); if (done_label.IsReferenced()) { @@ -2334,7 +1840,6 @@ vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction, } CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, - const ArmInstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, @@ -2351,7 +1856,6 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), assembler_(graph->GetAllocator()), - isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -2360,11 +1864,14 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { // Always save the LR register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(LR)); // Give D30 and D31 as scratch register to VIXL. The register allocator only works on @@ -2420,8 +1927,100 @@ void CodeGeneratorARMVIXL::FixJumpTables() { void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) { FixJumpTables(); + + // Emit JIT baker read barrier slow paths. + DCHECK(Runtime::Current()->UseJitCompilation() || jit_baker_read_barrier_slow_paths_.empty()); + for (auto& entry : jit_baker_read_barrier_slow_paths_) { + uint32_t encoded_data = entry.first; + vixl::aarch32::Label* slow_path_entry = &entry.second.label; + __ Bind(slow_path_entry); + CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr); + } + GetAssembler()->FinalizeCode(); CodeGenerator::Finalize(allocator); + + // Verify Baker read barrier linker patches. + if (kIsDebugBuild) { + ArrayRef<const uint8_t> code = allocator->GetMemory(); + for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { + DCHECK(info.label.IsBound()); + uint32_t literal_offset = info.label.GetLocation(); + DCHECK_ALIGNED(literal_offset, 2u); + + auto GetInsn16 = [&code](uint32_t offset) { + DCHECK_ALIGNED(offset, 2u); + return (static_cast<uint32_t>(code[offset + 0]) << 0) + + (static_cast<uint32_t>(code[offset + 1]) << 8); + }; + auto GetInsn32 = [=](uint32_t offset) { + return (GetInsn16(offset) << 16) + (GetInsn16(offset + 2u) << 0); + }; + + uint32_t encoded_data = info.custom_data; + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + // Check that the next instruction matches the expected LDR. + switch (kind) { + case BakerReadBarrierKind::kField: { + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + if (width == BakerReadBarrierWidth::kWide) { + DCHECK_GE(code.size() - literal_offset, 8u); + uint32_t next_insn = GetInsn32(literal_offset + 4u); + // LDR (immediate), encoding T3, with correct base_reg. + CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16)); + } else { + DCHECK_GE(code.size() - literal_offset, 6u); + uint32_t next_insn = GetInsn16(literal_offset + 4u); + // LDR (immediate), encoding T1, with correct base_reg. + CheckValidReg(next_insn & 0x7u); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3)); + } + break; + } + case BakerReadBarrierKind::kArray: { + DCHECK_GE(code.size() - literal_offset, 8u); + uint32_t next_insn = GetInsn32(literal_offset + 4u); + // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]). + CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16)); + CheckValidReg(next_insn & 0xf); // Check index register + break; + } + case BakerReadBarrierKind::kGcRoot: { + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + if (width == BakerReadBarrierWidth::kWide) { + DCHECK_GE(literal_offset, 4u); + uint32_t prev_insn = GetInsn32(literal_offset - 4u); + // LDR (immediate), encoding T3, with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12)); + } else { + DCHECK_GE(literal_offset, 2u); + uint32_t prev_insn = GetInsn16(literal_offset - 2u); + // LDR (immediate), encoding T1, with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg); + } + break; + } + case BakerReadBarrierKind::kUnsafeCas: { + DCHECK_GE(literal_offset, 4u); + uint32_t prev_insn = GetInsn32(literal_offset - 4u); + // ADD (register), encoding T3, with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xfff0fff0u, 0xeb000000u | (root_reg << 8)); + break; + } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } + } + } } void CodeGeneratorARMVIXL::SetupBlockedRegisters() const { @@ -2494,6 +2093,8 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { } if (HasEmptyFrame()) { + // Ensure that the CFI opcode list is not empty. + GetAssembler()->cfi().Nop(); return; } @@ -2560,7 +2161,7 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag()); } - MaybeGenerateMarkingRegisterCheck(/* code */ 1); + MaybeGenerateMarkingRegisterCheck(/* code= */ 1); } void CodeGeneratorARMVIXL::GenerateFrameExit() { @@ -2669,7 +2270,7 @@ Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Typ case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; - break; + UNREACHABLE(); } return Location::NoLocation(); } @@ -2828,7 +2429,7 @@ void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* } if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 2); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 2); } if (!codegen_->GoesToNextBlock(block, successor)) { __ B(codegen_->GetLabelOf(successor)); @@ -3007,7 +2608,7 @@ void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) { nullptr : codegen_->GetLabelOf(true_successor); vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); - GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); + GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) { @@ -3026,9 +2627,9 @@ void InstructionCodeGeneratorARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCodeARMVIXL* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARMVIXL>(deoptimize); GenerateTestAndBranch(deoptimize, - /* condition_input_index */ 0, + /* condition_input_index= */ 0, slow_path->GetEntryLabel(), - /* false_target */ nullptr); + /* false_target= */ nullptr); } void LocationsBuilderARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { @@ -3194,7 +2795,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { } } - GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target */ false); + GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target= */ false); codegen_->MoveLocation(out, src, type); if (output_overlaps_with_condition_inputs) { __ B(target); @@ -3536,7 +3137,7 @@ void LocationsBuilderARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 3); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 3); } void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { @@ -3567,7 +3168,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrD DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 4); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 4); return; } @@ -3575,7 +3176,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrD codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 5); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 5); } void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) { @@ -3594,14 +3195,14 @@ void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { if (TryGenerateIntrinsicCode(invoke, codegen_)) { - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 6); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 6); return; } codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 7); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 7); } void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) { @@ -3679,7 +3280,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* inv DCHECK(!codegen_->IsLeafMethod()); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 8); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 8); } void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { @@ -3688,7 +3289,16 @@ void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { codegen_->GenerateInvokePolymorphicCall(invoke); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 9); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 9); +} + +void LocationsBuilderARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 10); } void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) { @@ -4405,7 +4015,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOpera int64_t magic; int shift; - CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift); // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed. __ Mov(temp1, static_cast<int32_t>(magic)); @@ -4697,6 +4307,299 @@ void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kFloat32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxInt(LocationSummary* locations, bool is_min) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + vixl32::Register op1 = RegisterFrom(op1_loc); + vixl32::Register op2 = RegisterFrom(op2_loc); + vixl32::Register out = RegisterFrom(out_loc); + + __ Cmp(op1, op2); + + { + ExactAssemblyScope aas(GetVIXLAssembler(), + 3 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + + __ ite(is_min ? lt : gt); + __ mov(is_min ? lt : gt, out, op1); + __ mov(is_min ? ge : le, out, op2); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. + return; + } + + vixl32::Register op1_lo = LowRegisterFrom(op1_loc); + vixl32::Register op1_hi = HighRegisterFrom(op1_loc); + vixl32::Register op2_lo = LowRegisterFrom(op2_loc); + vixl32::Register op2_hi = HighRegisterFrom(op2_loc); + vixl32::Register out_lo = LowRegisterFrom(out_loc); + vixl32::Register out_hi = HighRegisterFrom(out_loc); + UseScratchRegisterScope temps(GetVIXLAssembler()); + const vixl32::Register temp = temps.Acquire(); + + DCHECK(op1_lo.Is(out_lo)); + DCHECK(op1_hi.Is(out_hi)); + + // Compare op1 >= op2, or op1 < op2. + __ Cmp(out_lo, op2_lo); + __ Sbcs(temp, out_hi, op2_hi); + + // Now GE/LT condition code is correct for the long comparison. + { + vixl32::ConditionType cond = is_min ? ge : lt; + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 3 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ itt(cond); + __ mov(cond, out_lo, op2_lo); + __ mov(cond, out_hi, op2_hi); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* minmax, bool is_min) { + LocationSummary* locations = minmax->GetLocations(); + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. + return; + } + + vixl32::SRegister op1 = SRegisterFrom(op1_loc); + vixl32::SRegister op2 = SRegisterFrom(op2_loc); + vixl32::SRegister out = SRegisterFrom(out_loc); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + const vixl32::Register temp1 = temps.Acquire(); + vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0)); + vixl32::Label nan, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done); + + DCHECK(op1.Is(out)); + + __ Vcmp(op1, op2); + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + __ B(vs, &nan, /* is_far_target= */ false); // if un-ordered, go to NaN handling. + + // op1 <> op2 + vixl32::ConditionType cond = is_min ? gt : lt; + { + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(cond); + __ vmov(cond, F32, out, op2); + } + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* is_far_target= */ false); + + // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0). + __ Vmov(temp1, op1); + __ Vmov(temp2, op2); + if (is_min) { + __ Orr(temp1, temp1, temp2); + } else { + __ And(temp1, temp1, temp2); + } + __ Vmov(out, temp1); + __ B(final_label); + + // handle NaN input. + __ Bind(&nan); + __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN. + __ Vmov(out, temp1); + + if (done.IsReferenced()) { + __ Bind(&done); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* minmax, bool is_min) { + LocationSummary* locations = minmax->GetLocations(); + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in. + return; + } + + vixl32::DRegister op1 = DRegisterFrom(op1_loc); + vixl32::DRegister op2 = DRegisterFrom(op2_loc); + vixl32::DRegister out = DRegisterFrom(out_loc); + vixl32::Label handle_nan_eq, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done); + + DCHECK(op1.Is(out)); + + __ Vcmp(op1, op2); + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + __ B(vs, &handle_nan_eq, /* is_far_target= */ false); // if un-ordered, go to NaN handling. + + // op1 <> op2 + vixl32::ConditionType cond = is_min ? gt : lt; + { + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(cond); + __ vmov(cond, F64, out, op2); + } + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* is_far_target= */ false); + + // handle op1 == op2, max(+0.0,-0.0). + if (!is_min) { + __ Vand(F64, out, op1, op2); + __ B(final_label); + } + + // handle op1 == op2, min(+0.0,-0.0), NaN input. + __ Bind(&handle_nan_eq); + __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN. + + if (done.IsReferenced()) { + __ Bind(&done); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + GenerateMinMaxInt(minmax->GetLocations(), is_min); + break; + case DataType::Type::kInt64: + GenerateMinMaxLong(minmax->GetLocations(), is_min); + break; + case DataType::Type::kFloat32: + GenerateMinMaxFloat(minmax, is_min); + break; + case DataType::Type::kFloat64: + GenerateMinMaxDouble(minmax, is_min); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderARMVIXL::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderARMVIXL::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + vixl32::Register in_reg = RegisterFrom(locations->InAt(0)); + vixl32::Register out_reg = RegisterFrom(locations->Out()); + vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); + __ Asr(mask, in_reg, 31); + __ Add(out_reg, in_reg, mask); + __ Eor(out_reg, out_reg, mask); + break; + } + case DataType::Type::kInt64: { + Location in = locations->InAt(0); + vixl32::Register in_reg_lo = LowRegisterFrom(in); + vixl32::Register in_reg_hi = HighRegisterFrom(in); + Location output = locations->Out(); + vixl32::Register out_reg_lo = LowRegisterFrom(output); + vixl32::Register out_reg_hi = HighRegisterFrom(output); + DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected."; + vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); + __ Asr(mask, in_reg_hi, 31); + __ Adds(out_reg_lo, in_reg_lo, mask); + __ Adc(out_reg_hi, in_reg_hi, mask); + __ Eor(out_reg_lo, out_reg_lo, mask); + __ Eor(out_reg_hi, out_reg_hi, mask); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + __ Vabs(OutputVRegister(abs), InputVRegisterAt(abs, 0)); + break; + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) { LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); @@ -4813,7 +4716,7 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { __ And(shift_right, RegisterFrom(rhs), 0x1F); __ Lsrs(shift_left, RegisterFrom(rhs), 6); __ Rsb(LeaveFlags, shift_left, shift_right, Operand::From(kArmBitsPerWord)); - __ B(cc, &shift_by_32_plus_shift_right, /* far_target */ false); + __ B(cc, &shift_by_32_plus_shift_right, /* is_far_target= */ false); // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right). // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right). @@ -5069,8 +4972,11 @@ void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) { __ Lsrs(o_h, high, 1); __ Rrx(o_l, low); } + } else if (shift_value == 0) { + __ Mov(o_l, low); + __ Mov(o_h, high); } else { - DCHECK(2 <= shift_value && shift_value < 32) << shift_value; + DCHECK(0 < shift_value && shift_value < 32) << shift_value; if (op->IsShl()) { __ Lsl(o_h, high, shift_value); __ Orr(o_h, o_h, Operand(low, ShiftType::LSR, 32 - shift_value)); @@ -5121,35 +5027,15 @@ void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) { void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, LocationSummary::kCallOnMainOnly); - if (instruction->IsStringAlloc()) { - locations->AddTemp(LocationFrom(kMethodRegister)); - } else { - InvokeRuntimeCallingConventionARMVIXL calling_convention; - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); - } + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); locations->SetOut(LocationFrom(r0)); } void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) { - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - if (instruction->IsStringAlloc()) { - // String is allocated through StringFactory. Call NewEmptyString entry point. - vixl32::Register temp = RegisterFrom(instruction->GetLocations()->GetTemp(0)); - MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize); - GetAssembler()->LoadFromOffset(kLoadWord, temp, tr, QUICK_ENTRY_POINT(pNewEmptyString)); - GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, code_offset.Int32Value()); - // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. - ExactAssemblyScope aas(GetVIXLAssembler(), - vixl32::k16BitT32InstructionSizeInBytes, - CodeBufferCheckScope::kExactSize); - __ blx(lr); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); - } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); - } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 10); + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 11); } void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) { @@ -5162,14 +5048,12 @@ void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) { - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - QuickEntrypointEnum entrypoint = - CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); + // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. + QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); DCHECK(!codegen_->IsLeafMethod()); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 11); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 12); } void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) { @@ -5291,8 +5175,8 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { } case DataType::Type::kInt64: { __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // Signed compare. - __ B(lt, &less, /* far_target */ false); - __ B(gt, &greater, /* far_target */ false); + __ B(lt, &less, /* is_far_target= */ false); + __ B(gt, &greater, /* is_far_target= */ false); // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags. __ Mov(out, 0); __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); // Unsigned compare. @@ -5313,8 +5197,8 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { UNREACHABLE(); } - __ B(eq, final_label, /* far_target */ false); - __ B(less_cond, &less, /* far_target */ false); + __ B(eq, final_label, /* is_far_target= */ false); + __ B(less_cond, &less, /* is_far_target= */ false); __ Bind(&greater); __ Mov(out, 1); @@ -5610,18 +5494,10 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier. - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !Runtime::Current()->UseJitCompilation()) { - // If link-time thunks for the Baker read barrier are enabled, for AOT - // loads we need a temporary only if the offset is too big. - if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { - locations->AddTemp(Location::RequiresRegister()); - } - // And we always need the reserved entrypoint register. - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); - } else { + // We need a temporary register for the read barrier load in + // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier() + // only if the offset is too big. + if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { locations->AddTemp(Location::RequiresRegister()); } } @@ -5733,11 +5609,11 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, case DataType::Type::kReference: { // /* HeapReference<Object> */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = locations->GetTemp(0); + Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location(); // Note that a potential implicit null check is handled in this // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + instruction, out, base, offset, maybe_temp, /* needs_null_check= */ true); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -6036,30 +5912,20 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier. - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !Runtime::Current()->UseJitCompilation() && - instruction->GetIndex()->IsConstant()) { + if (instruction->GetIndex()->IsConstant()) { // Array loads with constant index are treated as field loads. - // If link-time thunks for the Baker read barrier are enabled, for AOT - // constant index loads we need a temporary only if the offset is too big. + // We need a temporary register for the read barrier load in + // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier() + // only if the offset is too big. uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); offset += index << DataType::SizeShift(DataType::Type::kReference); if (offset >= kReferenceLoadMinFarOffset) { locations->AddTemp(Location::RequiresRegister()); } - // And we always need the reserved entrypoint register. - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); - } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays && - !Runtime::Current()->UseJitCompilation() && - !instruction->GetIndex()->IsConstant()) { - // We need a non-scratch temporary for the array data pointer. - locations->AddTemp(Location::RequiresRegister()); - // And we always need the reserved entrypoint register. - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); } else { + // We need a non-scratch temporary for the array data pointer in + // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(). locations->AddTemp(Location::RequiresRegister()); } } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { @@ -6103,7 +5969,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); - __ B(cs, &uncompressed_load, /* far_target */ false); + __ B(cs, &uncompressed_load, /* is_far_target= */ false); GetAssembler()->LoadFromOffset(kLoadUnsignedByte, RegisterFrom(out_loc), obj, @@ -6145,7 +6011,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); - __ B(cs, &uncompressed_load, /* far_target */ false); + __ B(cs, &uncompressed_load, /* is_far_target= */ false); __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0)); __ B(final_label); __ Bind(&uncompressed_load); @@ -6172,22 +6038,24 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call. DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); if (index.IsConstant()) { // Array load with a constant index can be treated as a field load. + Location maybe_temp = + (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location(); data_offset += Int32ConstantFrom(index) << DataType::SizeShift(type); codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, out_loc, obj, data_offset, - locations->GetTemp(0), - /* needs_null_check */ false); + maybe_temp, + /* needs_null_check= */ false); } else { + Location temp = locations->GetTemp(0); codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false); + out_loc, obj, data_offset, index, temp, /* needs_null_check= */ false); } } else { vixl32::Register out = OutputRegister(instruction); @@ -6462,7 +6330,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { if (instruction->StaticTypeOfArrayIsObjectArray()) { vixl32::Label do_put; - __ B(eq, &do_put, /* far_target */ false); + __ B(eq, &do_put, /* is_far_target= */ false); // If heap poisoning is enabled, the `temp1` reference has // not been unpoisoned yet; unpoison it now. GetAssembler()->MaybeUnpoisonHeapReference(temp1); @@ -6706,9 +6574,25 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp, if (can_be_null) { __ CompareAndBranchIfZero(value, &is_null); } + // Load the address of the card table into `card`. GetAssembler()->LoadFromOffset( kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value()); + // Calculate the offset (in the card table) of the card corresponding to + // `object`. __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift)); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the STRB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ Strb(card, MemOperand(card, temp)); if (can_be_null) { __ Bind(&is_null); @@ -6748,7 +6632,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instructi return; } GenerateSuspendCheck(instruction, nullptr); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 12); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 13); } void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction, @@ -7040,14 +6924,14 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadClass::LoadKind::kJitBootImageAddress: case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kRuntimeCall: break; } @@ -7083,23 +6967,11 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { if (load_kind == HLoadClass::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConventionARMVIXL calling_convention; - caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); - // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() - // that the the kPrimNot result register is the same as the first argument register. - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barrier we have a temp-clobbering call. } } - if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { - if (load_kind == HLoadClass::LoadKind::kBssEntry || - (load_kind == HLoadClass::LoadKind::kReferrersClass && - !Runtime::Current()->UseJitCompilation())) { - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); - } - } } // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not @@ -7108,7 +6980,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 13); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 14); return; } DCHECK(!cls->NeedsAccessCheck()); @@ -7127,11 +6999,11 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ DCHECK(!cls->MustGenerateClinitCheck()); // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ vixl32::Register current_method = InputRegisterAt(cls, 0); - GenerateGcRootFieldLoad(cls, - out_loc, - current_method, - ArtMethod::DeclaringClassOffset().Int32Value(), - read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, + out_loc, + current_method, + ArtMethod::DeclaringClassOffset().Int32Value(), + read_barrier_option); break; } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { @@ -7142,42 +7014,35 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ codegen_->EmitMovwMovtPlaceholder(labels, out); break; } - case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(cls->GetClass().Get())); - DCHECK_NE(address, 0u); - __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); - break; - } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageRelRoPatch(codegen_->GetBootImageOffset(cls)); codegen_->EmitMovwMovtPlaceholder(labels, out); - __ Ldr(out, MemOperand(out, /* offset */ 0)); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); - if (masked_hash != 0) { - __ Sub(out, out, Operand(masked_hash)); - } + __ Ldr(out, MemOperand(out, /* offset= */ 0)); break; } case HLoadClass::LoadKind::kBssEntry: { CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); - GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset= */ 0, read_barrier_option); generate_null_check = true; break; } + case HLoadClass::LoadKind::kJitBootImageAddress: { + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); + DCHECK_NE(address, 0u); + __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); + break; + } case HLoadClass::LoadKind::kJitTableAddress: { __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass())); // /* GcRoot<mirror::Class> */ out = *out - GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset= */ 0, read_barrier_option); break; } case HLoadClass::LoadKind::kRuntimeCall: @@ -7189,8 +7054,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); LoadClassSlowPathARMVIXL* slow_path = - new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); @@ -7200,10 +7064,30 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ } else { __ Bind(slow_path->GetExitLabel()); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 14); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 15); } } +void LocationsBuilderARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) { + InvokeRuntimeCallingConventionARMVIXL calling_convention; + Location location = LocationFrom(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location); +} + +void InstructionCodeGeneratorARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) { + codegen_->GenerateLoadMethodHandleRuntimeCall(load); +} + +void LocationsBuilderARMVIXL::VisitLoadMethodType(HLoadMethodType* load) { + InvokeRuntimeCallingConventionARMVIXL calling_convention; + Location location = LocationFrom(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location); +} + +void InstructionCodeGeneratorARMVIXL::VisitLoadMethodType(HLoadMethodType* load) { + codegen_->GenerateLoadMethodTypeRuntimeCall(load); +} + void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); @@ -7211,15 +7095,14 @@ void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) { if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } + // Rely on the type initialization to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) { // We assume the class is not null. LoadClassSlowPathARMVIXL* slow_path = - new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), - check, - check->GetDexPc(), - /* do_clinit */ true); + new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); } @@ -7243,18 +7126,79 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare( + HTypeCheckInstruction* check, + vixl32::Register temp, + vixl32::FlagsUpdate flags_update) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs + // the Z flag for BNE. This is indicated by the `flags_update` parameter. + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value())); + // Check if the bitstring bits are equal to `path_to_root`. + if (flags_update == SetFlags) { + __ Cmp(temp, path_to_root); + } else { + __ Sub(temp, temp, path_to_root); + } + } else { + // /* uint32_t */ temp = temp->status_ + __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value())); + if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) { + // Compare the bitstring bits using SUB. + __ Sub(temp, temp, path_to_root); + // Shift out bits that do not contribute to the comparison. + __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); + } else if (IsUint<16>(path_to_root)) { + if (temp.IsLow()) { + // Note: Optimized for size but contains one more dependent instruction than necessary. + // MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the + // macro assembler would use the high reg IP for the constant by default. + // Compare the bitstring bits using SUB. + __ Sub(temp, temp, path_to_root & 0x00ffu); // 16-bit SUB (immediate) T2 + __ Sub(temp, temp, path_to_root & 0xff00u); // 32-bit SUB (immediate) T3 + // Shift out bits that do not contribute to the comparison. + __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); + } else { + // Extract the bitstring bits. + __ Ubfx(temp, temp, 0, mask_bits); + // Check if the bitstring bits are equal to `path_to_root`. + if (flags_update == SetFlags) { + __ Cmp(temp, path_to_root); + } else { + __ Sub(temp, temp, path_to_root); + } + } + } else { + // Shift out bits that do not contribute to the comparison. + __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); + // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`. + if (flags_update == SetFlags) { + __ Cmp(temp, path_to_root << (32u - mask_bits)); + } else { + __ Sub(temp, temp, path_to_root << (32u - mask_bits)); + } + } + } +} + HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadString::LoadKind::kJitBootImageAddress: case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kRuntimeCall: break; } @@ -7272,15 +7216,7 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { if (load_kind == HLoadString::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString and marking to save everything we need, including temps. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConventionARMVIXL calling_convention; - caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); - // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() - // that the the kPrimNot result register is the same as the first argument register. - locations->SetCustomSlowPathCallerSaves(caller_saves); - if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); - } + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -7304,33 +7240,32 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE codegen_->EmitMovwMovtPlaceholder(labels, out); return; } - case HLoadString::LoadKind::kBootImageAddress: { - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(load->GetString().Get())); - DCHECK_NE(address, 0u); - __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); - return; - } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageRelRoPatch(codegen_->GetBootImageOffset(load)); codegen_->EmitMovwMovtPlaceholder(labels, out); - __ Ldr(out, MemOperand(out, /* offset */ 0)); + __ Ldr(out, MemOperand(out, /* offset= */ 0)); return; } case HLoadString::LoadKind::kBssEntry: { - DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); - GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption); + codegen_->GenerateGcRootFieldLoad( + load, out_loc, out, /* offset= */ 0, kCompilerReadBarrierOption); LoadStringSlowPathARMVIXL* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load); codegen_->AddSlowPath(slow_path); __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 15); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 16); + return; + } + case HLoadString::LoadKind::kJitBootImageAddress: { + uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); + DCHECK_NE(address, 0u); + __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); return; } case HLoadString::LoadKind::kJitTableAddress: { @@ -7338,7 +7273,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE load->GetStringIndex(), load->GetString())); // /* GcRoot<mirror::String> */ out = *out - GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption); + codegen_->GenerateGcRootFieldLoad( + load, out_loc, out, /* offset= */ 0, kCompilerReadBarrierOption); return; } default: @@ -7351,7 +7287,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 16); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 17); } static int32_t GetExceptionTlsOffset() { @@ -7434,6 +7370,8 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -7442,14 +7380,17 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The "out" register is used as a temporary, so it overlaps with the inputs. // Note that TypeCheckSlowPathARM uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - codegen_->MaybeAddBakerCcEntrypointTempForFields(locations); - } } void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { @@ -7457,7 +7398,9 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); vixl32::Register obj = InputRegisterAt(instruction, 0); - vixl32::Register cls = InputRegisterAt(instruction, 1); + vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? vixl32::Register() + : InputRegisterAt(instruction, 1); Location out_loc = locations->Out(); vixl32::Register out = OutputRegister(instruction); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -7476,7 +7419,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) if (instruction->MustDoNullCheck()) { DCHECK(!out.Is(obj)); __ Mov(out, 0); - __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false); + __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false); } switch (type_check_kind) { @@ -7508,7 +7451,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) __ it(eq); __ mov(eq, out, 1); } else { - __ B(ne, final_label, /* far_target */ false); + __ B(ne, final_label, /* is_far_target= */ false); __ Mov(out, 1); } @@ -7536,9 +7479,9 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) maybe_temp_loc, read_barrier_option); // If `out` is null, we use it for the result, and jump to the final label. - __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); + __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false); __ Cmp(out, cls); - __ B(ne, &loop, /* far_target */ false); + __ B(ne, &loop, /* is_far_target= */ false); __ Mov(out, 1); break; } @@ -7557,7 +7500,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) vixl32::Label loop, success; __ Bind(&loop); __ Cmp(out, cls); - __ B(eq, &success, /* far_target */ false); + __ B(eq, &success, /* is_far_target= */ false); // /* HeapReference<Class> */ out = out->super_class_ GenerateReferenceLoadOneRegister(instruction, out_loc, @@ -7567,7 +7510,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) // This is essentially a null check, but it sets the condition flags to the // proper value for the code that follows the loop, i.e. not `eq`. __ Cmp(out, 1); - __ B(hs, &loop, /* far_target */ false); + __ B(hs, &loop, /* is_far_target= */ false); // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, // we check that the output is in a low register, so that a 16-bit MOV @@ -7612,7 +7555,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) // Do an exact check. vixl32::Label exact_check; __ Cmp(out, cls); - __ B(eq, &exact_check, /* far_target */ false); + __ B(eq, &exact_check, /* is_far_target= */ false); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -7621,7 +7564,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) maybe_temp_loc, read_barrier_option); // If `out` is null, we use it for the result, and jump to the final label. - __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); + __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false); GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); __ Cmp(out, 0); @@ -7643,7 +7586,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) __ it(eq); __ mov(eq, out, 1); } else { - __ B(ne, final_label, /* far_target */ false); + __ B(ne, final_label, /* is_far_target= */ false); __ Bind(&exact_check); __ Mov(out, 1); } @@ -7663,7 +7606,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) __ Cmp(out, cls); DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ B(ne, slow_path->GetEntryLabel()); __ Mov(out, 1); @@ -7692,11 +7635,31 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out, DontCare); + // If `out` is a low reg and we would have another low reg temp, we could + // optimize this as RSBS+ADC, see GenerateConditionWithZero(). + // + // Also, in some cases when `out` is a low reg and we're loading a constant to IP + // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size + // would be the same and we would have fewer direct data dependencies. + codegen_->GenerateConditionWithZero(kCondEQ, out, out); // CLZ+LSR + break; + } } if (done.IsReferenced()) { @@ -7714,7 +7677,13 @@ void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -7723,7 +7692,9 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); vixl32::Register obj = InputRegisterAt(instruction, 0); - vixl32::Register cls = InputRegisterAt(instruction, 1); + vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? vixl32::Register() + : InputRegisterAt(instruction, 1); Location temp_loc = locations->GetTemp(0); vixl32::Register temp = RegisterFrom(temp_loc); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -7749,7 +7720,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); // Avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { - __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false); + __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false); } switch (type_check_kind) { @@ -7796,7 +7767,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { // Otherwise, compare the classes. __ Cmp(temp, cls); - __ B(ne, &loop, /* far_target */ false); + __ B(ne, &loop, /* is_far_target= */ false); break; } @@ -7813,7 +7784,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { vixl32::Label loop; __ Bind(&loop); __ Cmp(temp, cls); - __ B(eq, final_label, /* far_target */ false); + __ B(eq, final_label, /* is_far_target= */ false); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, @@ -7841,7 +7812,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { // Do an exact check. __ Cmp(temp, cls); - __ B(eq, final_label, /* far_target */ false); + __ B(eq, final_label, /* is_far_target= */ false); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ @@ -7905,7 +7876,21 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { __ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2); // Compare the classes and continue the loop if they do not match. __ Cmp(cls, RegisterFrom(maybe_temp3_loc)); - __ B(ne, &start_loop, /* far_target */ false); + __ B(ne, &start_loop, /* is_far_target= */ false); + break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags); + __ B(ne, type_check_slow_path->GetEntryLabel()); break; } } @@ -7932,7 +7917,7 @@ void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* i } else { CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 17); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 18); } void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) { @@ -8287,7 +8272,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister( // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, out_reg, offset, maybe_temp, /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it @@ -8322,7 +8307,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters( // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -8337,7 +8322,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters( } } -void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( +void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( HInstruction* instruction, Location root, vixl32::Register obj, @@ -8349,81 +8334,52 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && - !Runtime::Current()->UseJitCompilation()) { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in - // the Marking Register) to decide whether we need to enter - // the slow path to mark the GC root. - // - // We use link-time generated thunks for the slow path. That thunk - // checks the reference and jumps to the entrypoint if needed. - // - // lr = &return_address; - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (mr) { // Thread::Current()->GetIsGcMarking() - // goto gc_root_thunk<root_reg>(lr) - // } - // return_address: - UseScratchRegisterScope temps(GetVIXLAssembler()); - ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); - bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); - uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData( - root_reg.GetCode(), narrow); - vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); - - vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes); - vixl32::Label return_address; - EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); - __ cmp(mr, Operand(0)); - // Currently the offset is always within range. If that changes, - // we shall have to split the load the same way as for fields. - DCHECK_LT(offset, kReferenceLoadMinFarOffset); - ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); - __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset)); - EmitPlaceholderBne(codegen_, bne_label); - __ Bind(&return_address); - DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), - narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET - : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); - } else { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in - // the Marking Register) to decide whether we need to enter - // the slow path to mark the GC root. - // - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (mr) { // Thread::Current()->GetIsGcMarking() - // // Slow path. - // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call. - // } - - // Slow path marking the GC root `root`. The entrypoint will - // be loaded by the slow path code. - SlowPathCodeARMVIXL* slow_path = - new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARMVIXL(instruction, root); - codegen_->AddSlowPath(slow_path); + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in + // the Marking Register) to decide whether we need to enter + // the slow path to mark the GC root. + // + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the reference + // and jumps to the entrypoint if needed. + // + // lr = &return_address; + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto gc_root_thunk<root_reg>(lr) + // } + // return_address: - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + UseScratchRegisterScope temps(GetVIXLAssembler()); + temps.Exclude(ip); + bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); + uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow); + + size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* LDR */ (narrow ? 1u : 0u); + size_t wide_instructions = /* ADR+CMP+LDR+BNE */ 4u - narrow_instructions; + size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes + + narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes; + ExactAssemblyScope guard(GetVIXLAssembler(), exact_size); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(mr, Operand(0)); + // Currently the offset is always within range. If that changes, + // we shall have to split the load the same way as for fields. + DCHECK_LT(offset, kReferenceLoadMinFarOffset); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset)); + EmitBakerReadBarrierBne(custom_data); + __ bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); } else { // GC root loaded through a slow path for read barriers other // than Baker's. // /* GcRoot<mirror::Object>* */ root = obj + offset __ Add(root_reg, obj, offset); // /* mirror::Object* */ root = root->Read() - codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + GenerateReadBarrierForRootSlow(instruction, root, root); } } else { // Plain GC root load with no read barrier. @@ -8432,112 +8388,129 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( // Note that GC roots are not affected by heap poisoning, thus we // do not have to unpoison `root_reg` here. } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 18); + MaybeGenerateMarkingRegisterCheck(/* code= */ 19); } -void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) { +void CodeGeneratorARMVIXL::GenerateUnsafeCasOldValueAddWithBakerReadBarrier( + vixl::aarch32::Register old_value, + vixl::aarch32::Register adjusted_old_value, + vixl::aarch32::Register expected) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - if (kBakerReadBarrierLinkTimeThunksEnableForFields) { - if (!Runtime::Current()->UseJitCompilation()) { - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); - } - } + + // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with an ADD instead of LDR. + uint32_t custom_data = EncodeBakerReadBarrierUnsafeCasData(old_value.GetCode()); + + size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u); + size_t wide_instructions = /* ADR+CMP+ADD+BNE */ 4u - narrow_instructions; + size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes + + narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes; + ExactAssemblyScope guard(GetVIXLAssembler(), exact_size); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(mr, Operand(0)); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ add(EncodingSize(Wide), old_value, adjusted_old_value, Operand(expected)); // Preserves flags. + EmitBakerReadBarrierBne(custom_data); + __ bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ADD_OFFSET); } void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl32::Register obj, - uint32_t offset, - Location temp, + const vixl32::MemOperand& src, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !Runtime::Current()->UseJitCompilation()) { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to mark the reference. Then, in the slow path, check the - // gray bit in the lock word of the reference's holder (`obj`) to - // decide whether to mark `ref` or not. - // - // We use link-time generated thunks for the slow path. That thunk checks - // the holder and jumps to the entrypoint if needed. If the holder is not - // gray, it creates a fake dependency and returns to the LDR instruction. - // - // lr = &gray_return_address; - // if (mr) { // Thread::Current()->GetIsGcMarking() - // goto field_thunk<holder_reg, base_reg>(lr) - // } - // not_gray_return_address: - // // Original reference load. If the offset is too large to fit - // // into LDR, we use an adjusted base register here. - // HeapReference<mirror::Object> reference = *(obj+offset); - // gray_return_address: - - DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); - vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); - bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset); - vixl32::Register base = obj; - if (offset >= kReferenceLoadMinFarOffset) { - base = RegisterFrom(temp); - DCHECK(!base.Is(kBakerCcEntrypointRegister)); - static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); - __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); - offset &= (kReferenceLoadMinFarOffset - 1u); - // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large - // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely - // increase the overall code size when taking the generated thunks into account. - DCHECK(!narrow); - } - UseScratchRegisterScope temps(GetVIXLAssembler()); - ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); - uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( - base.GetCode(), obj.GetCode(), narrow); - vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the + // Marking Register) to decide whether we need to enter the slow + // path to mark the reference. Then, in the slow path, check the + // gray bit in the lock word of the reference's holder (`obj`) to + // decide whether to mark `ref` or not. + // + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the holder + // and jumps to the entrypoint if needed. If the holder is not gray, + // it creates a fake dependency and returns to the LDR instruction. + // + // lr = &gray_return_address; + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = *(obj+offset); + // gray_return_address: - { - vixl::EmissionCheckScope guard( - GetVIXLAssembler(), - (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); - vixl32::Label return_address; - EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); - __ cmp(mr, Operand(0)); - EmitPlaceholderBne(this, bne_label); - ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); - __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - // Note: We need a specific width for the unpoisoning NEG. - if (kPoisonHeapReferences) { - if (narrow) { - // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB). - __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0)); - } else { - __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); - } + DCHECK(src.GetAddrMode() == vixl32::Offset); + DCHECK_ALIGNED(src.GetOffsetImmediate(), sizeof(mirror::HeapReference<mirror::Object>)); + vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); + bool narrow = CanEmitNarrowLdr(ref_reg, src.GetBaseRegister(), src.GetOffsetImmediate()); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + temps.Exclude(ip); + uint32_t custom_data = + EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode(), narrow); + + { + size_t narrow_instructions = + /* CMP */ (mr.IsLow() ? 1u : 0u) + + /* LDR+unpoison? */ (narrow ? (kPoisonHeapReferences ? 2u : 1u) : 0u); + size_t wide_instructions = + /* ADR+CMP+LDR+BNE+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions; + size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes + + narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes; + ExactAssemblyScope guard(GetVIXLAssembler(), exact_size); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(mr, Operand(0)); + EmitBakerReadBarrierBne(custom_data); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, src); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // Note: We need a specific width for the unpoisoning NEG. + if (kPoisonHeapReferences) { + if (narrow) { + // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB). + __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0)); + } else { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); } - __ Bind(&return_address); - DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), - narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET - : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); } - MaybeGenerateMarkingRegisterCheck(/* code */ 19, /* temp_loc */ LocationFrom(ip)); - return; + __ bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); } - - // /* HeapReference<Object> */ ref = *(obj + offset) - Location no_index = Location::NoLocation(); - ScaleFactor no_scale_factor = TIMES_1; - GenerateReferenceLoadWithBakerReadBarrier( - instruction, ref, obj, offset, no_index, no_scale_factor, temp, needs_null_check); + MaybeGenerateMarkingRegisterCheck(/* code= */ 20, /* temp_loc= */ LocationFrom(ip)); } -void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, +void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl32::Register obj, + uint32_t offset, + Location temp, + bool needs_null_check) { + DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + vixl32::Register base = obj; + if (offset >= kReferenceLoadMinFarOffset) { + base = RegisterFrom(temp); + static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); + __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); + offset &= (kReferenceLoadMinFarOffset - 1u); + } + GenerateFieldLoadWithBakerReadBarrier( + instruction, ref, obj, MemOperand(base, offset), needs_null_check); +} + +void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref, + vixl32::Register obj, uint32_t data_offset, Location index, Location temp, @@ -8550,229 +8523,60 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); ScaleFactor scale_factor = TIMES_4; - if (kBakerReadBarrierLinkTimeThunksEnableForArrays && - !Runtime::Current()->UseJitCompilation()) { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to mark the reference. Then, in the slow path, check the - // gray bit in the lock word of the reference's holder (`obj`) to - // decide whether to mark `ref` or not. - // - // We use link-time generated thunks for the slow path. That thunk checks - // the holder and jumps to the entrypoint if needed. If the holder is not - // gray, it creates a fake dependency and returns to the LDR instruction. - // - // lr = &gray_return_address; - // if (mr) { // Thread::Current()->GetIsGcMarking() - // goto array_thunk<base_reg>(lr) - // } - // not_gray_return_address: - // // Original reference load. If the offset is too large to fit - // // into LDR, we use an adjusted base register here. - // HeapReference<mirror::Object> reference = data[index]; - // gray_return_address: - - DCHECK(index.IsValid()); - vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32); - vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); - vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer. - DCHECK(!data_reg.Is(kBakerCcEntrypointRegister)); - - UseScratchRegisterScope temps(GetVIXLAssembler()); - ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); - uint32_t custom_data = - linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode()); - vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); - - __ Add(data_reg, obj, Operand(data_offset)); - { - vixl::EmissionCheckScope guard( - GetVIXLAssembler(), - (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); - vixl32::Label return_address; - EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); - __ cmp(mr, Operand(0)); - EmitPlaceholderBne(this, bne_label); - ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); - __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); - DCHECK(!needs_null_check); // The thunk cannot handle the null check. - // Note: We need a Wide NEG for the unpoisoning. - if (kPoisonHeapReferences) { - __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); - } - __ Bind(&return_address); - DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), - BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); - } - MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip)); - return; - } - - // /* HeapReference<Object> */ ref = - // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - GenerateReferenceLoadWithBakerReadBarrier( - instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check); -} - -void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - Location temp, - bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the // Marking Register) to decide whether we need to enter the slow // path to mark the reference. Then, in the slow path, check the // gray bit in the lock word of the reference's holder (`obj`) to // decide whether to mark `ref` or not. // - // if (mr) { // Thread::Current()->GetIsGcMarking() - // // Slow path. - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } - // } else { - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // } - - vixl32::Register temp_reg = RegisterFrom(temp); - - // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will be loaded by the slow path code. - SlowPathCodeARMVIXL* slow_path = - new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL( - instruction, ref, obj, offset, index, scale_factor, needs_null_check, temp_reg); - AddSlowPath(slow_path); - - __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel()); - // Fast path: the GC is not marking: just load the reference. - GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); - __ Bind(slow_path->GetExitLabel()); - MaybeGenerateMarkingRegisterCheck(/* code */ 21); -} - -void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl32::Register obj, - Location field_offset, - Location temp, - bool needs_null_check, - vixl32::Register temp2) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to update the reference field within `obj`. Then, in the - // slow path, check the gray bit in the lock word of the reference's - // holder (`obj`) to decide whether to mark `ref` and update the - // field or not. + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the holder + // and jumps to the entrypoint if needed. If the holder is not gray, + // it creates a fake dependency and returns to the LDR instruction. // - // if (mr) { // Thread::Current()->GetIsGcMarking() - // // Slow path. - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // old_ref = ref; - // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // compareAndSwapObject(obj, field_offset, old_ref, ref); + // lr = &gray_return_address; + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto array_thunk<base_reg>(lr) // } - // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = data[index]; + // gray_return_address: - vixl32::Register temp_reg = RegisterFrom(temp); + DCHECK(index.IsValid()); + vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32); + vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); + vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer. - // Slow path updating the object reference at address `obj + field_offset` - // when the GC is marking. The entrypoint will be loaded by the slow path code. - SlowPathCodeARMVIXL* slow_path = - new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( - instruction, - ref, - obj, - /* offset */ 0u, - /* index */ field_offset, - /* scale_factor */ ScaleFactor::TIMES_1, - needs_null_check, - temp_reg, - temp2); - AddSlowPath(slow_path); + UseScratchRegisterScope temps(GetVIXLAssembler()); + temps.Exclude(ip); + uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode()); - __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel()); - // Fast path: the GC is not marking: nothing to do (the field is - // up-to-date, and we don't need to load the reference). - __ Bind(slow_path->GetExitLabel()); - MaybeGenerateMarkingRegisterCheck(/* code */ 22); -} - -void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction, - Location ref, - vixl::aarch32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check) { - DataType::Type type = DataType::Type::kReference; - vixl32::Register ref_reg = RegisterFrom(ref, type); - - // If needed, vixl::EmissionCheckScope guards are used to ensure - // that no pools are emitted between the load (macro) instruction - // and MaybeRecordImplicitNullCheck. - - if (index.IsValid()) { - // Load types involving an "index": ArrayGet, - // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject - // intrinsics. - // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor)) - if (index.IsConstant()) { - size_t computed_offset = - (Int32ConstantFrom(index) << scale_factor) + offset; - vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - } else { - // Handle the special case of the - // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject - // intrinsics, which use a register pair as index ("long - // offset"), of which only the low part contains data. - vixl32::Register index_reg = index.IsRegisterPair() - ? LowRegisterFrom(index) - : RegisterFrom(index); - UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); - __ Add(temp, obj, Operand(index_reg, ShiftType::LSL, scale_factor)); - { - vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, temp, offset); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - } - } - } else { - // /* HeapReference<mirror::Object> */ ref = *(obj + offset) - vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, offset); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); + __ Add(data_reg, obj, Operand(data_offset)); + { + size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u); + size_t wide_instructions = + /* ADR+CMP+BNE+LDR+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions; + size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes + + narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes; + ExactAssemblyScope guard(GetVIXLAssembler(), exact_size); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(mr, Operand(0)); + EmitBakerReadBarrierBne(custom_data); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + // Note: We need a Wide NEG for the unpoisoning. + if (kPoisonHeapReferences) { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); } + __ bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); } - - // Object* ref = ref_addr->AsMirrorPtr() - GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + MaybeGenerateMarkingRegisterCheck(/* code= */ 21, /* temp_loc= */ LocationFrom(ip)); } void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { @@ -8855,7 +8659,7 @@ void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruct // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { + ArtMethod* method ATTRIBUTE_UNUSED) { return desired_dispatch_info; } @@ -8905,9 +8709,14 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( EmitMovwMovtPlaceholder(labels, temp_reg); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: - __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress())); + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + uint32_t boot_image_offset = GetBootImageOffset(invoke); + PcRelativePatchInfo* labels = NewBootImageRelRoPatch(boot_image_offset); + vixl32::Register temp_reg = RegisterFrom(temp); + EmitMovwMovtPlaceholder(labels, temp_reg); + GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { PcRelativePatchInfo* labels = NewMethodBssEntryPatch( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); @@ -8916,6 +8725,9 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0); break; } + case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: + __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress())); + break; case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); return; // No code pointer retrieval; the runtime performs the call directly. @@ -9005,6 +8817,18 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall( } } +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageIntrinsicPatch( + uint32_t intrinsic_data) { + return NewPcRelativePatch(/* dex_file= */ nullptr, intrinsic_data, &boot_image_intrinsic_patches_); +} + +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch( + uint32_t boot_image_offset) { + return NewPcRelativePatch(/* dex_file= */ nullptr, + boot_image_offset, + &boot_image_method_patches_); +} + CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch( MethodReference target_method) { return NewPcRelativePatch( @@ -9043,13 +8867,24 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePa return &patches->back(); } -vixl::aarch32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) { - baker_read_barrier_patches_.emplace_back(custom_data); - return &baker_read_barrier_patches_.back().label; +void CodeGeneratorARMVIXL::EmitBakerReadBarrierBne(uint32_t custom_data) { + DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope. + if (Runtime::Current()->UseJitCompilation()) { + auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data); + vixl::aarch32::Label* slow_path_entry = &it->second.label; + __ b(ne, EncodingSize(Wide), slow_path_entry); + } else { + baker_read_barrier_patches_.emplace_back(custom_data); + vixl::aarch32::Label* patch_label = &baker_read_barrier_patches_.back().label; + __ bind(patch_label); + vixl32::Label placeholder_label; + __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time. + __ bind(&placeholder_label); + } } VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) { - return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_); + return DeduplicateUint32Literal(address, &uint32_literals_); } VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral( @@ -9060,7 +8895,7 @@ VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral( return jit_string_patches_.GetOrCreate( StringReference(&dex_file, string_index), [this]() { - return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); + return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); }); } @@ -9071,10 +8906,50 @@ VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitClassLiteral(const DexFil return jit_class_patches_.GetOrCreate( TypeReference(&dex_file, type_index), [this]() { - return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); + return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); }); } +void CodeGeneratorARMVIXL::LoadBootImageAddress(vixl32::Register reg, + uint32_t boot_image_reference) { + if (GetCompilerOptions().IsBootImage()) { + CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = + NewBootImageIntrinsicPatch(boot_image_reference); + EmitMovwMovtPlaceholder(labels, reg); + } else if (GetCompilerOptions().GetCompilePic()) { + CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = + NewBootImageRelRoPatch(boot_image_reference); + EmitMovwMovtPlaceholder(labels, reg); + __ Ldr(reg, MemOperand(reg, /* offset= */ 0)); + } else { + DCHECK(Runtime::Current()->UseJitCompilation()); + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + uintptr_t address = + reinterpret_cast<uintptr_t>(heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference); + __ Ldr(reg, DeduplicateBootImageAddressLiteral(dchecked_integral_cast<uint32_t>(address))); + } +} + +void CodeGeneratorARMVIXL::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + vixl32::Register argument = calling_convention.GetRegisterAt(0); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + PcRelativePatchInfo* labels = NewBootImageTypePatch(*target_method.dex_file, type_idx); + EmitMovwMovtPlaceholder(labels, argument); + } else { + LoadBootImageAddress(argument, boot_image_offset); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches( const ArenaDeque<PcRelativePatchInfo>& infos, @@ -9095,6 +8970,15 @@ inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches( } } +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -9104,6 +8988,7 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + /* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() + /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * boot_image_intrinsic_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { @@ -9113,12 +8998,14 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -9133,13 +9020,52 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l DCHECK_EQ(size, linker_patches->size()); } +bool CodeGeneratorARMVIXL::NeedsThunkCode(const linker::LinkerPatch& patch) const { + return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch || + patch.GetType() == linker::LinkerPatch::Type::kCallRelative; +} + +void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch, + /*out*/ ArenaVector<uint8_t>* code, + /*out*/ std::string* debug_name) { + arm::ArmVIXLAssembler assembler(GetGraph()->GetAllocator()); + switch (patch.GetType()) { + case linker::LinkerPatch::Type::kCallRelative: + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + assembler.LoadFromOffset( + arm::kLoadWord, + vixl32::pc, + vixl32::r0, + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); + assembler.GetVIXLAssembler()->Bkpt(0); + if (GetCompilerOptions().GenerateAnyDebugInfo()) { + *debug_name = "MethodCallThunk"; + } + break; + case linker::LinkerPatch::Type::kBakerReadBarrierBranch: + DCHECK_EQ(patch.GetBakerCustomValue2(), 0u); + CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name); + break; + default: + LOG(FATAL) << "Unexpected patch type " << patch.GetType(); + UNREACHABLE(); + } + + // Ensure we emit the literal pool if any. + assembler.FinalizeCode(); + code->resize(assembler.CodeSize()); + MemoryRegion code_region(code->data(), code->size()); + assembler.FinalizeInstructions(code_region); +} + VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal( uint32_t value, Uint32ToLiteralMap* map) { return map->GetOrCreate( value, [this, value]() { - return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ value); + return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ value); }); } @@ -9366,9 +9292,9 @@ void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder( CodeBufferCheckScope::kMaximumSize); // TODO(VIXL): Think about using mov instead of movw. __ bind(&labels->movw_label); - __ movw(out, /* placeholder */ 0u); + __ movw(out, /* operand= */ 0u); __ bind(&labels->movt_label); - __ movt(out, /* placeholder */ 0u); + __ movt(out, /* operand= */ 0u); __ bind(&labels->add_pc_label); __ add(out, out, pc); } @@ -9377,5 +9303,224 @@ void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder( #undef QUICK_ENTRY_POINT #undef TODO_VIXL32 +#define __ assembler.GetVIXLAssembler()-> + +static void EmitGrayCheckAndFastPath(ArmVIXLAssembler& assembler, + vixl32::Register base_reg, + vixl32::MemOperand& lock_word, + vixl32::Label* slow_path, + int32_t raw_ldr_offset, + vixl32::Label* throw_npe = nullptr) { + // Load the lock word containing the rb_state. + __ Ldr(ip, lock_word); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted)); + __ B(ne, slow_path, /* is_far_target= */ false); + // To throw NPE, we return to the fast path; the artificial dependence below does not matter. + if (throw_npe != nullptr) { + __ Bind(throw_npe); + } + __ Add(lr, lr, raw_ldr_offset); + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + __ Add(base_reg, base_reg, Operand(ip, LSR, 32)); + __ Bx(lr); // And return back to the function. + // Note: The fake dependency is unnecessary for the slow path. +} + +// Load the read barrier introspection entrypoint in register `entrypoint` +static vixl32::Register LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler) { + // The register where the read barrier introspection entrypoint is loaded + // is the marking register. We clobber it here and the entrypoint restores it to 1. + vixl32::Register entrypoint = mr; + // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip.GetCode(), 12u); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + __ Ldr(entrypoint, MemOperand(tr, entry_point_offset)); + return entrypoint; +} + +void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler, + uint32_t encoded_data, + /*out*/ std::string* debug_name) { + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + switch (kind) { + case BakerReadBarrierKind::kField: { + vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + vixl32::Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data)); + CheckValidReg(holder_reg.GetCode()); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + // If base_reg differs from holder_reg, the offset was too large and we must have emitted + // an explicit null check before the load. Otherwise, for implicit null checks, we need to + // null-check the holder as we do not necessarily do that check before going to the thunk. + vixl32::Label throw_npe_label; + vixl32::Label* throw_npe = nullptr; + if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) { + throw_npe = &throw_npe_label; + __ CompareAndBranchIfZero(holder_reg, throw_npe, /* is_far_target= */ false); + } + // Check if the holder is gray and, if not, add fake dependency to the base register + // and return to the LDR instruction to load the reference. Otherwise, use introspection + // to load the reference and call the entrypoint that performs further checks on the + // reference and marks it if needed. + vixl32::Label slow_path; + MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); + const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide) + ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET; + EmitGrayCheckAndFastPath( + assembler, base_reg, lock_word, &slow_path, raw_ldr_offset, throw_npe); + __ Bind(&slow_path); + const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + + raw_ldr_offset; + vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler); + if (width == BakerReadBarrierWidth::kWide) { + MemOperand ldr_half_address(lr, ldr_offset + 2); + __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12". + __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12. + __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference. + } else { + MemOperand ldr_address(lr, ldr_offset); + __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1. + __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint + ep_reg, // for narrow LDR. + Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET)); + __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4. + __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference. + } + // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. + __ Bx(ep_reg); // Jump to the entrypoint. + break; + } + case BakerReadBarrierKind::kArray: { + vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + vixl32::Label slow_path; + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); + DCHECK_LT(lock_word.GetOffsetImmediate(), 0); + const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET; + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset); + __ Bind(&slow_path); + const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + + raw_ldr_offset; + MemOperand ldr_address(lr, ldr_offset + 2); + __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm", + // i.e. Rm+32 because the scale in imm2 is 2. + vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler); + __ Bfi(ep_reg, ip, 3, 6); // Insert ip to the entrypoint address to create + // a switch case target based on the index register. + __ Mov(ip, base_reg); // Move the base register to ip0. + __ Bx(ep_reg); // Jump to the entrypoint's array switch case. + break; + } + case BakerReadBarrierKind::kGcRoot: + case BakerReadBarrierKind::kUnsafeCas: { + // Check if the reference needs to be marked and if so (i.e. not null, not marked yet + // and it does not have a forwarding address), call the correct introspection entrypoint; + // otherwise return the reference (or the extracted forwarding address). + // There is no gray bit check for GC roots. + vixl32::Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(root_reg.GetCode()); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + vixl32::Label return_label, not_marked, forwarding_address; + __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target= */ false); + MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value()); + __ Ldr(ip, lock_word); + __ Tst(ip, LockWord::kMarkBitStateMaskShifted); + __ B(eq, ¬_marked); + __ Bind(&return_label); + __ Bx(lr); + __ Bind(¬_marked); + static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3, + "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in " + " the highest bits and the 'forwarding address' state to have all bits set"); + __ Cmp(ip, Operand(0xc0000000)); + __ B(hs, &forwarding_address); + vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler); + // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister + // to one of art_quick_read_barrier_mark_introspection_{gc_roots_{wide,narrow},unsafe_cas}. + DCHECK(kind != BakerReadBarrierKind::kUnsafeCas || width == BakerReadBarrierWidth::kWide); + int32_t entrypoint_offset = + (kind == BakerReadBarrierKind::kGcRoot) + ? (width == BakerReadBarrierWidth::kWide) + ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET + : BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ENTRYPOINT_OFFSET; + __ Add(ep_reg, ep_reg, Operand(entrypoint_offset)); + __ Mov(ip, root_reg); + __ Bx(ep_reg); + __ Bind(&forwarding_address); + __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift); + __ Bx(lr); + break; + } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } + + // For JIT, the slow path is considered part of the compiled method, + // so JIT should pass null as `debug_name`. Tests may not have a runtime. + DCHECK(Runtime::Current() == nullptr || + !Runtime::Current()->UseJitCompilation() || + debug_name == nullptr); + if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) { + std::ostringstream oss; + oss << "BakerReadBarrierThunk"; + switch (kind) { + case BakerReadBarrierKind::kField: + oss << "Field"; + if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) { + oss << "Wide"; + } + oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data) + << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data); + break; + case BakerReadBarrierKind::kArray: + oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide); + break; + case BakerReadBarrierKind::kGcRoot: + oss << "GcRoot"; + if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) { + oss << "Wide"; + } + oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + break; + case BakerReadBarrierKind::kUnsafeCas: + oss << "UnsafeCas_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide); + break; + } + *debug_name = oss.str(); + } +} + +#undef __ + } // namespace arm } // namespace art diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 536da41d07..5edca87147 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -36,6 +36,11 @@ #pragma GCC diagnostic pop namespace art { + +namespace linker { +class Thumb2RelativePatcherTest; +} // namespace linker + namespace arm { // This constant is used as an approximate margin when emission of veneer and literal pools @@ -173,9 +178,9 @@ class InvokeDexCallingConventionVisitorARMVIXL : public InvokeDexCallingConventi InvokeDexCallingConventionVisitorARMVIXL() {} virtual ~InvokeDexCallingConventionVisitorARMVIXL() {} - Location GetNextLocation(DataType::Type type) OVERRIDE; - Location GetReturnLocation(DataType::Type type) const OVERRIDE; - Location GetMethodLocation() const OVERRIDE; + Location GetNextLocation(DataType::Type type) override; + Location GetReturnLocation(DataType::Type type) const override; + Location GetMethodLocation() const override; private: InvokeDexCallingConventionARMVIXL calling_convention; @@ -188,25 +193,25 @@ class FieldAccessCallingConventionARMVIXL : public FieldAccessCallingConvention public: FieldAccessCallingConventionARMVIXL() {} - Location GetObjectLocation() const OVERRIDE { + Location GetObjectLocation() const override { return helpers::LocationFrom(vixl::aarch32::r1); } - Location GetFieldIndexLocation() const OVERRIDE { + Location GetFieldIndexLocation() const override { return helpers::LocationFrom(vixl::aarch32::r0); } - Location GetReturnLocation(DataType::Type type) const OVERRIDE { + Location GetReturnLocation(DataType::Type type) const override { return DataType::Is64BitType(type) ? helpers::LocationFrom(vixl::aarch32::r0, vixl::aarch32::r1) : helpers::LocationFrom(vixl::aarch32::r0); } - Location GetSetValueLocation(DataType::Type type, bool is_instance) const OVERRIDE { + Location GetSetValueLocation(DataType::Type type, bool is_instance) const override { return DataType::Is64BitType(type) ? helpers::LocationFrom(vixl::aarch32::r2, vixl::aarch32::r3) : (is_instance ? helpers::LocationFrom(vixl::aarch32::r2) : helpers::LocationFrom(vixl::aarch32::r1)); } - Location GetFpuLocation(DataType::Type type) const OVERRIDE { + Location GetFpuLocation(DataType::Type type) const override { return DataType::Is64BitType(type) ? helpers::LocationFrom(vixl::aarch32::s0, vixl::aarch32::s1) : helpers::LocationFrom(vixl::aarch32::s0); @@ -224,8 +229,8 @@ class SlowPathCodeARMVIXL : public SlowPathCode { vixl::aarch32::Label* GetEntryLabel() { return &entry_label_; } vixl::aarch32::Label* GetExitLabel() { return &exit_label_; } - void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE; - void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE; + void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override; + void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override; private: vixl::aarch32::Label entry_label_; @@ -239,10 +244,10 @@ class ParallelMoveResolverARMVIXL : public ParallelMoveResolverWithSwap { ParallelMoveResolverARMVIXL(ArenaAllocator* allocator, CodeGeneratorARMVIXL* codegen) : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} - void EmitMove(size_t index) OVERRIDE; - void EmitSwap(size_t index) OVERRIDE; - void SpillScratch(int reg) OVERRIDE; - void RestoreScratch(int reg) OVERRIDE; + void EmitMove(size_t index) override; + void EmitSwap(size_t index) override; + void SpillScratch(int reg) override; + void RestoreScratch(int reg) override; ArmVIXLAssembler* GetAssembler() const; @@ -261,7 +266,7 @@ class LocationsBuilderARMVIXL : public HGraphVisitor { : HGraphVisitor(graph), codegen_(codegen) {} #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) @@ -269,7 +274,7 @@ class LocationsBuilderARMVIXL : public HGraphVisitor { #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -299,7 +304,7 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { InstructionCodeGeneratorARMVIXL(HGraph* graph, CodeGeneratorARMVIXL* codegen); #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) @@ -307,7 +312,7 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -322,6 +327,9 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + vixl::aarch32::Register temp, + vixl::aarch32::FlagsUpdate flags_update); void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); @@ -349,6 +357,12 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min); + void GenerateMinMaxLong(LocationSummary* locations, bool is_min); + void GenerateMinMaxFloat(HInstruction* minmax, bool is_min); + void GenerateMinMaxDouble(HInstruction* minmax, bool is_min); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -379,16 +393,6 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { uint32_t offset, Location maybe_temp, ReadBarrierOption read_barrier_option); - // Generate a GC root reference load: - // - // root <- *(obj + offset) - // - // while honoring read barriers based on read_barrier_option. - void GenerateGcRootFieldLoad(HInstruction* instruction, - Location root, - vixl::aarch32::Register obj, - uint32_t offset, - ReadBarrierOption read_barrier_option); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, vixl::aarch32::Label* true_target, @@ -424,53 +428,55 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { class CodeGeneratorARMVIXL : public CodeGenerator { public: CodeGeneratorARMVIXL(HGraph* graph, - const ArmInstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorARMVIXL() {} - void GenerateFrameEntry() OVERRIDE; - void GenerateFrameExit() OVERRIDE; - void Bind(HBasicBlock* block) OVERRIDE; - void MoveConstant(Location destination, int32_t value) OVERRIDE; - void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; - void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + void GenerateFrameEntry() override; + void GenerateFrameExit() override; + void Bind(HBasicBlock* block) override; + void MoveConstant(Location destination, int32_t value) override; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; + void AddLocationAsTemp(Location location, LocationSummary* locations) override; - size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; - size_t GetWordSize() const OVERRIDE { + size_t GetWordSize() const override { return static_cast<size_t>(kArmPointerSize); } - size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return vixl::aarch32::kRegSizeInBytes; } + size_t GetFloatingPointSpillSlotSize() const override { return vixl::aarch32::kRegSizeInBytes; } - HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } + HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } - HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; } + HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; } - ArmVIXLAssembler* GetAssembler() OVERRIDE { return &assembler_; } + ArmVIXLAssembler* GetAssembler() override { return &assembler_; } - const ArmVIXLAssembler& GetAssembler() const OVERRIDE { return assembler_; } + const ArmVIXLAssembler& GetAssembler() const override { return assembler_; } ArmVIXLMacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); } - uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE { + uintptr_t GetAddressOf(HBasicBlock* block) override { vixl::aarch32::Label* block_entry_label = GetLabelOf(block); DCHECK(block_entry_label->IsBound()); return block_entry_label->GetLocation(); } void FixJumpTables(); - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters() const override; + + void DumpCoreRegister(std::ostream& stream, int reg) const override; + void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; - void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; - void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + ParallelMoveResolver* GetMoveResolver() override { return &move_resolver_; } + InstructionSet GetInstructionSet() const override { return InstructionSet::kThumb2; } + + const ArmInstructionSetFeatures& GetInstructionSetFeatures() const; - ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; } - InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kThumb2; } // Helper method to move a 32-bit value between two locations. void Move32(Location destination, Location source); @@ -489,7 +495,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path = nullptr) OVERRIDE; + SlowPathCode* slow_path = nullptr) override; // Generate code to invoke a runtime entry point, but do not record // PC-related information in a stack map. @@ -513,44 +519,42 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl32::Label* GetFinalLabel(HInstruction* instruction, vixl32::Label* final_label); - void Initialize() OVERRIDE { + void Initialize() override { block_labels_.resize(GetGraph()->GetBlocks().size()); } - void Finalize(CodeAllocator* allocator) OVERRIDE; - - const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; } + void Finalize(CodeAllocator* allocator) override; - bool NeedsTwoRegisters(DataType::Type type) const OVERRIDE { + bool NeedsTwoRegisters(DataType::Type type) const override { return type == DataType::Type::kFloat64 || type == DataType::Type::kInt64; } - void ComputeSpillMask() OVERRIDE; + void ComputeSpillMask() override; vixl::aarch32::Label* GetFrameEntryLabel() { return &frame_entry_label_; } // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadString::LoadKind GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + HLoadString::LoadKind desired_string_load_kind) override; // Check if the desired_class_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadClass::LoadKind GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + HLoadClass::LoadKind desired_class_load_kind) override; // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) OVERRIDE; + ArtMethod* method) override; void GenerateStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; void GenerateVirtualCall( - HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; - void MoveFromReturnRegister(Location trg, DataType::Type type) OVERRIDE; + void MoveFromReturnRegister(Location trg, DataType::Type type) override; // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. @@ -574,6 +578,8 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Label add_pc_label; }; + PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data); + PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset); PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method); PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); @@ -583,9 +589,9 @@ class CodeGeneratorARMVIXL : public CodeGenerator { PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, dex::StringIndex string_index); - // Add a new baker read barrier patch and return the label to be bound - // before the BNE instruction. - vixl::aarch32::Label* NewBakerReadBarrierPatch(uint32_t custom_data); + // Emit the BNE instruction for baker read barrier and record + // the associated patch for AOT or slow path for JIT. + void EmitBakerReadBarrierBne(uint32_t custom_data); VIXLUInt32Literal* DeduplicateBootImageAddressLiteral(uint32_t address); VIXLUInt32Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, @@ -595,14 +601,40 @@ class CodeGeneratorARMVIXL : public CodeGenerator { dex::TypeIndex type_index, Handle<mirror::Class> handle); - void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; + void LoadBootImageAddress(vixl::aarch32::Register reg, uint32_t boot_image_reference); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); - void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; + bool NeedsThunkCode(const linker::LinkerPatch& patch) const override; + void EmitThunkCode(const linker::LinkerPatch& patch, + /*out*/ ArenaVector<uint8_t>* code, + /*out*/ std::string* debug_name) override; - // Maybe add the reserved entrypoint register as a temporary for field load. This temp - // is added only for AOT compilation if link-time generated thunks for fields are enabled. - void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations); + void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers based on read_barrier_option. + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + vixl::aarch32::Register obj, + uint32_t offset, + ReadBarrierOption read_barrier_option); + // Generate ADD for UnsafeCASObject to reconstruct the old value from + // `old_value - expected` and mark it with Baker read barrier. + void GenerateUnsafeCasOldValueAddWithBakerReadBarrier(vixl::aarch32::Register old_value, + vixl::aarch32::Register adjusted_old_value, + vixl::aarch32::Register expected); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + // Overload suitable for Unsafe.getObject/-Volatile() intrinsic. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::aarch32::Register obj, + const vixl::aarch32::MemOperand& src, + bool needs_null_check); // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -613,56 +645,12 @@ class CodeGeneratorARMVIXL : public CodeGenerator { bool needs_null_check); // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. - void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, + void GenerateArrayLoadWithBakerReadBarrier(Location ref, vixl::aarch32::Register obj, uint32_t data_offset, Location index, Location temp, bool needs_null_check); - // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, - // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. - // - // Load the object reference located at the address - // `obj + offset + (index << scale_factor)`, held by object `obj`, into - // `ref`, and mark it if needed. - void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl::aarch32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - Location temp, - bool needs_null_check); - - // Generate code checking whether the the reference field at the - // address `obj + field_offset`, held by object `obj`, needs to be - // marked, and if so, marking it and updating the field within `obj` - // with the marked value. - // - // This routine is used for the implementation of the - // UnsafeCASObject intrinsic with Baker read barriers. - // - // This method has a structure similar to - // GenerateReferenceLoadWithBakerReadBarrier, but note that argument - // `ref` is only as a temporary here, and thus its value should not - // be used afterwards. - void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl::aarch32::Register obj, - Location field_offset, - Location temp, - bool needs_null_check, - vixl::aarch32::Register temp2); - - // Generate a heap reference load (with no read barrier). - void GenerateRawReferenceLoad(HInstruction* instruction, - Location ref, - vixl::aarch32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check); // Emit code checking the status of the Marking Register, and // aborting the program if MR does not match the value stored in the @@ -734,10 +722,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // artReadBarrierForRootSlow. void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); - void GenerateNop() OVERRIDE; + void GenerateNop() override; - void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; - void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateImplicitNullCheck(HNullCheck* instruction) override; + void GenerateExplicitNullCheck(HNullCheck* instruction) override; JumpTableARMVIXL* CreateJumpTable(HPackedSwitch* switch_instr) { jump_tables_.emplace_back(new (GetGraph()->GetAllocator()) JumpTableARMVIXL(switch_instr)); @@ -757,6 +745,92 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Register temp = vixl32::Register()); private: + // Encoding of thunk type and data for link-time generated thunks for Baker read barriers. + + enum class BakerReadBarrierKind : uint8_t { + kField, // Field get or array get with constant offset (i.e. constant index). + kArray, // Array get with index in register. + kGcRoot, // GC root load. + kUnsafeCas, // UnsafeCASObject intrinsic. + kLast = kUnsafeCas + }; + + enum class BakerReadBarrierWidth : uint8_t { + kWide, // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled). + kNarrow, // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled). + kLast = kNarrow + }; + + static constexpr uint32_t kBakerReadBarrierInvalidEncodedReg = /* pc is invalid */ 15u; + + static constexpr size_t kBitsForBakerReadBarrierKind = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); + static constexpr size_t kBakerReadBarrierBitsForRegister = + MinimumBitsToStore(kBakerReadBarrierInvalidEncodedReg); + using BakerReadBarrierKindField = + BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>; + using BakerReadBarrierFirstRegField = + BitField<uint32_t, kBitsForBakerReadBarrierKind, kBakerReadBarrierBitsForRegister>; + using BakerReadBarrierSecondRegField = + BitField<uint32_t, + kBitsForBakerReadBarrierKind + kBakerReadBarrierBitsForRegister, + kBakerReadBarrierBitsForRegister>; + static constexpr size_t kBitsForBakerReadBarrierWidth = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast)); + using BakerReadBarrierWidthField = + BitField<BakerReadBarrierWidth, + kBitsForBakerReadBarrierKind + 2 * kBakerReadBarrierBitsForRegister, + kBitsForBakerReadBarrierWidth>; + + static void CheckValidReg(uint32_t reg) { + DCHECK(reg < vixl::aarch32::ip.GetCode() && reg != mr.GetCode()) << reg; + } + + static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, + uint32_t holder_reg, + bool narrow) { + CheckValidReg(base_reg); + CheckValidReg(holder_reg); + DCHECK(!narrow || base_reg < 8u) << base_reg; + BakerReadBarrierWidth width = + narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(holder_reg) | + BakerReadBarrierWidthField::Encode(width); + } + + static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { + CheckValidReg(base_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) | + BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide); + } + + static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) { + CheckValidReg(root_reg); + DCHECK(!narrow || root_reg < 8u) << root_reg; + BakerReadBarrierWidth width = + narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | + BakerReadBarrierFirstRegField::Encode(root_reg) | + BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) | + BakerReadBarrierWidthField::Encode(width); + } + + static uint32_t EncodeBakerReadBarrierUnsafeCasData(uint32_t root_reg) { + CheckValidReg(root_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kUnsafeCas) | + BakerReadBarrierFirstRegField::Encode(root_reg) | + BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) | + BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide); + } + + void CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler, + uint32_t encoded_data, + /*out*/ std::string* debug_name); + vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, vixl::aarch32::Register temp); @@ -794,11 +868,11 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ParallelMoveResolverARMVIXL move_resolver_; ArmVIXLAssembler assembler_; - const ArmInstructionSetFeatures& isa_features_; // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; @@ -806,10 +880,12 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; @@ -818,6 +894,20 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // Patches for class literals in JIT compiled code. TypeToLiteralMap jit_class_patches_; + // Baker read barrier slow paths, mapping custom data (uint32_t) to label. + // Wrap the label to work around vixl::aarch32::Label being non-copyable + // and non-moveable and as such unusable in ArenaSafeMap<>. + struct LabelWrapper { + LabelWrapper(const LabelWrapper& src) + : label() { + DCHECK(!src.label.IsReferenced() && !src.label.IsBound()); + } + LabelWrapper() = default; + vixl::aarch32::Label label; + }; + ArenaSafeMap<uint32_t, LabelWrapper> jit_baker_read_barrier_slow_paths_; + + friend class linker::Thumb2RelativePatcherTest; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARMVIXL); }; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 87e6d6834b..72334afa40 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -26,6 +26,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_mips.h" @@ -146,7 +147,7 @@ Location InvokeDexCallingConventionVisitorMIPS::GetNextLocation(DataType::Type t case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; - break; + UNREACHABLE(); } // Space on the stack is reserved for all arguments. @@ -159,6 +160,14 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type type) return MipsReturnLocation(type); } +static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + // The reference is returned in the same register. This differs from the standard return location. + return caller_saves; +} + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, x).Int32Value() @@ -167,7 +176,7 @@ class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS { public: explicit BoundsCheckSlowPathMIPS(HBoundsCheck* instruction) : SlowPathCodeMIPS(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); @@ -192,9 +201,9 @@ class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS { CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathMIPS"; } + const char* GetDescription() const override { return "BoundsCheckSlowPathMIPS"; } private: DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS); @@ -204,16 +213,16 @@ class DivZeroCheckSlowPathMIPS : public SlowPathCodeMIPS { public: explicit DivZeroCheckSlowPathMIPS(HDivZeroCheck* instruction) : SlowPathCodeMIPS(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); mips_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathMIPS"; } + const char* GetDescription() const override { return "DivZeroCheckSlowPathMIPS"; } private: DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathMIPS); @@ -221,35 +230,41 @@ class DivZeroCheckSlowPathMIPS : public SlowPathCodeMIPS { class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { public: - LoadClassSlowPathMIPS(HLoadClass* cls, - HInstruction* at, - uint32_t dex_pc, - bool do_clinit) - : SlowPathCodeMIPS(at), - cls_(cls), - dex_pc_(dex_pc), - do_clinit_(do_clinit) { + LoadClassSlowPathMIPS(HLoadClass* cls, HInstruction* at) + : SlowPathCodeMIPS(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Location out = locations->Out(); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - InvokeRuntimeCallingConvention calling_convention; - DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - dex::TypeIndex type_index = cls_->GetTypeIndex(); - __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); - QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage - : kQuickInitializeType; - mips_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + InvokeRuntimeCallingConvention calling_convention; + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), mips_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); + mips_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. } else { - CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + mips_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + source, + cls_->GetType()); + } + if (must_do_clinit) { + mips_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } // Move the class to the desired location. @@ -265,18 +280,12 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS"; } + const char* GetDescription() const override { return "LoadClassSlowPathMIPS"; } private: // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathMIPS); }; @@ -285,7 +294,7 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { explicit LoadStringSlowPathMIPS(HLoadString* instruction) : SlowPathCodeMIPS(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { DCHECK(instruction_->IsLoadString()); DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry); LocationSummary* locations = instruction_->GetLocations(); @@ -309,7 +318,7 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS"; } + const char* GetDescription() const override { return "LoadStringSlowPathMIPS"; } private: DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS); @@ -319,7 +328,7 @@ class NullCheckSlowPathMIPS : public SlowPathCodeMIPS { public: explicit NullCheckSlowPathMIPS(HNullCheck* instr) : SlowPathCodeMIPS(instr) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { @@ -333,9 +342,9 @@ class NullCheckSlowPathMIPS : public SlowPathCodeMIPS { CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathMIPS"; } + const char* GetDescription() const override { return "NullCheckSlowPathMIPS"; } private: DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathMIPS); @@ -346,7 +355,7 @@ class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS { SuspendCheckSlowPathMIPS(HSuspendCheck* instruction, HBasicBlock* successor) : SlowPathCodeMIPS(instruction), successor_(successor) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); @@ -366,7 +375,7 @@ class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS { return &return_label_; } - const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathMIPS"; } + const char* GetDescription() const override { return "SuspendCheckSlowPathMIPS"; } HBasicBlock* GetSuccessor() const { return successor_; @@ -387,7 +396,7 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { explicit TypeCheckSlowPathMIPS(HInstruction* instruction, bool is_fatal) : SlowPathCodeMIPS(instruction), is_fatal_(is_fatal) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); uint32_t dex_pc = instruction_->GetDexPc(); DCHECK(instruction_->IsCheckCast() @@ -426,9 +435,9 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { } } - const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS"; } + const char* GetDescription() const override { return "TypeCheckSlowPathMIPS"; } - bool IsFatal() const OVERRIDE { return is_fatal_; } + bool IsFatal() const override { return is_fatal_; } private: const bool is_fatal_; @@ -441,7 +450,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { explicit DeoptimizationSlowPathMIPS(HDeoptimize* instruction) : SlowPathCodeMIPS(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); LocationSummary* locations = instruction_->GetLocations(); @@ -453,7 +462,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } - const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; } + const char* GetDescription() const override { return "DeoptimizationSlowPathMIPS"; } private: DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS); @@ -463,7 +472,7 @@ class ArraySetSlowPathMIPS : public SlowPathCodeMIPS { public: explicit ArraySetSlowPathMIPS(HInstruction* instruction) : SlowPathCodeMIPS(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -494,7 +503,7 @@ class ArraySetSlowPathMIPS : public SlowPathCodeMIPS { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS"; } + const char* GetDescription() const override { return "ArraySetSlowPathMIPS"; } private: DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS); @@ -524,9 +533,9 @@ class ReadBarrierMarkSlowPathMIPS : public SlowPathCodeMIPS { DCHECK(kEmitCompilerReadBarrier); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; } + const char* GetDescription() const override { return "ReadBarrierMarkSlowPathMIPS"; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); @@ -578,7 +587,7 @@ class ReadBarrierMarkSlowPathMIPS : public SlowPathCodeMIPS { mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this, - /* direct */ false); + /* direct= */ false); } __ B(GetExitLabel()); } @@ -618,11 +627,11 @@ class ReadBarrierMarkAndUpdateFieldSlowPathMIPS : public SlowPathCodeMIPS { DCHECK(kEmitCompilerReadBarrier); } - const char* GetDescription() const OVERRIDE { + const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS"; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); @@ -672,7 +681,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathMIPS : public SlowPathCodeMIPS { mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this, - /* direct */ false); + /* direct= */ false); // If the new reference is different from the old reference, // update the field in the holder (`*(obj_ + field_offset_)`). @@ -789,7 +798,7 @@ class ReadBarrierForHeapReferenceSlowPathMIPS : public SlowPathCodeMIPS { DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); LocationSummary* locations = instruction_->GetLocations(); Register reg_out = out_.AsRegister<Register>(); @@ -913,7 +922,7 @@ class ReadBarrierForHeapReferenceSlowPathMIPS : public SlowPathCodeMIPS { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathMIPS"; } + const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathMIPS"; } private: Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { @@ -956,7 +965,7 @@ class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS { DCHECK(kEmitCompilerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Register reg_out = out_.AsRegister<Register>(); DCHECK(locations->CanCall()); @@ -986,7 +995,7 @@ class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS"; } + const char* GetDescription() const override { return "ReadBarrierForRootSlowPathMIPS"; } private: const Location out_; @@ -996,7 +1005,6 @@ class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS { }; CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, - const MipsInstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, @@ -1013,8 +1021,8 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), - assembler_(graph->GetAllocator(), &isa_features), - isa_features_(isa_features), + assembler_(graph->GetAllocator(), + compiler_options.GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()), uint32_literals_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -1023,6 +1031,7 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), clobbered_ra_(false) { @@ -1042,8 +1051,7 @@ void CodeGeneratorMIPS::Finalize(CodeAllocator* allocator) { // Adjust native pc offsets in stack maps. StackMapStream* stack_map_stream = GetStackMapStream(); for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) { - uint32_t old_position = - stack_map_stream->GetStackMap(i).native_pc_code_offset.Uint32Value(InstructionSet::kMips); + uint32_t old_position = stack_map_stream->GetStackMapNativePcOffset(i); uint32_t new_position = __ GetAdjustedPosition(old_position); DCHECK_GE(new_position, old_position); stack_map_stream->SetStackMapNativePcOffset(i, new_position); @@ -1159,9 +1167,9 @@ void ParallelMoveResolverMIPS::EmitSwap(size_t index) { __ Move(r2_l, TMP); __ Move(r2_h, AT); } else if (loc1.IsStackSlot() && loc2.IsStackSlot()) { - Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ false); + Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot= */ false); } else if (loc1.IsDoubleStackSlot() && loc2.IsDoubleStackSlot()) { - Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ true); + Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot= */ true); } else if (loc1.IsSIMDStackSlot() && loc2.IsSIMDStackSlot()) { ExchangeQuadSlots(loc1.GetStackIndex(), loc2.GetStackIndex()); } else if ((loc1.IsRegister() && loc2.IsStackSlot()) || @@ -1597,6 +1605,15 @@ inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches( } } +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -1605,7 +1622,8 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link boot_image_type_patches_.size() + type_bss_entry_patches_.size() + boot_image_string_patches_.size() + - string_bss_entry_patches_.size(); + string_bss_entry_patches_.size() + + boot_image_intrinsic_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( @@ -1614,12 +1632,14 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1630,6 +1650,20 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link DCHECK_EQ(size, linker_patches->size()); } +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageIntrinsicPatch( + uint32_t intrinsic_data, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file= */ nullptr, intrinsic_data, info_high, &boot_image_intrinsic_patches_); +} + +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageRelRoPatch( + uint32_t boot_image_offset, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file= */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_); +} + CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageMethodPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { @@ -1703,7 +1737,7 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo __ Bind(&info_high->label); __ Bind(&info_high->pc_rel_label); // Add the high half of a 32-bit offset to PC. - __ Auipc(out, /* placeholder */ 0x1234); + __ Auipc(out, /* imm16= */ 0x1234); __ SetReorder(reordering); } else { // If base is ZERO, emit NAL to obtain the actual base. @@ -1712,7 +1746,7 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo __ Nal(); } __ Bind(&info_high->label); - __ Lui(out, /* placeholder */ 0x1234); + __ Lui(out, /* imm16= */ 0x1234); // If we emitted the NAL, bind the pc_rel_label, otherwise base is a register holding // the HMipsComputeBaseMethodAddress which has its own label stored in MipsAssembler. if (base == ZERO) { @@ -1726,6 +1760,48 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo // offset to `out` (e.g. lw, jialc, addiu). } +void CodeGeneratorMIPS::LoadBootImageAddress(Register reg, uint32_t boot_image_reference) { + if (GetCompilerOptions().IsBootImage()) { + PcRelativePatchInfo* info_high = NewBootImageIntrinsicPatch(boot_image_reference); + PcRelativePatchInfo* info_low = NewBootImageIntrinsicPatch(boot_image_reference, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, /* base= */ ZERO); + __ Addiu(reg, TMP, /* imm16= */ 0x5678, &info_low->label); + } else if (GetCompilerOptions().GetCompilePic()) { + PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_reference); + PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_reference, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, reg, /* base= */ ZERO); + __ Lw(reg, reg, /* imm16= */ 0x5678, &info_low->label); + } else { + DCHECK(Runtime::Current()->UseJitCompilation()); + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; + __ LoadConst32(reg, dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))); + } +} + +void CodeGeneratorMIPS::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConvention calling_convention; + Register argument = calling_convention.GetRegisterAt(0); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + PcRelativePatchInfo* info_high = NewBootImageTypePatch(*target_method.dex_file, type_idx); + PcRelativePatchInfo* info_low = + NewBootImageTypePatch(*target_method.dex_file, type_idx, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, argument, /* base= */ ZERO); + __ Addiu(argument, argument, /* imm16= */ 0x5678, &info_low->label); + } else { + LoadBootImageAddress(argument, boot_image_offset); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + CodeGeneratorMIPS::JitPatchInfo* CodeGeneratorMIPS::NewJitRootStringPatch( const DexFile& dex_file, dex::StringIndex string_index, @@ -1792,12 +1868,27 @@ void CodeGeneratorMIPS::MarkGCCard(Register object, if (value_can_be_null) { __ Beqz(value, &done); } + // Load the address of the card table into `card`. __ LoadFromOffset(kLoadWord, card, TR, Thread::CardTableOffset<kMipsPointerSize>().Int32Value()); + // Calculate the address of the card corresponding to `object`. __ Srl(temp, object, gc::accounting::CardTable::kCardShift); __ Addu(temp, card, temp); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the SB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ Sb(card, temp, 0); if (value_can_be_null) { __ Bind(&done); @@ -1882,6 +1973,10 @@ void CodeGeneratorMIPS::DumpFloatingPointRegister(std::ostream& stream, int reg) stream << FRegister(reg); } +const MipsInstructionSetFeatures& CodeGeneratorMIPS::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsMipsInstructionSetFeatures(); +} + constexpr size_t kMipsDirectEntrypointRuntimeOffset = 16; void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint, @@ -1936,6 +2031,34 @@ void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCode __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorMIPS::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + Register temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ LoadFromOffset( + kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + if (IsUint<16>(path_to_root)) { + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + __ LoadConst32(TMP, path_to_root); + __ Xor(temp, temp, TMP); + } + // Shift out bits that do not contribute to the comparison. + __ Sll(temp, temp, 32 - mask_bits); + } +} + void InstructionCodeGeneratorMIPS::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) { __ Sync(0); // Only stype 0 is supported. } @@ -2456,7 +2579,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Or(dst_high, dst_high, TMP); __ Andi(TMP, rhs_reg, kMipsBitsPerWord); if (isR6) { - __ Beqzc(TMP, &done, /* is_bare */ true); + __ Beqzc(TMP, &done, /* is_bare= */ true); __ Move(dst_high, dst_low); __ Move(dst_low, ZERO); } else { @@ -2472,7 +2595,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Or(dst_low, dst_low, TMP); __ Andi(TMP, rhs_reg, kMipsBitsPerWord); if (isR6) { - __ Beqzc(TMP, &done, /* is_bare */ true); + __ Beqzc(TMP, &done, /* is_bare= */ true); __ Move(dst_low, dst_high); __ Sra(dst_high, dst_high, 31); } else { @@ -2489,7 +2612,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Or(dst_low, dst_low, TMP); __ Andi(TMP, rhs_reg, kMipsBitsPerWord); if (isR6) { - __ Beqzc(TMP, &done, /* is_bare */ true); + __ Beqzc(TMP, &done, /* is_bare= */ true); __ Move(dst_low, dst_high); __ Move(dst_high, ZERO); } else { @@ -2508,7 +2631,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Or(dst_high, dst_high, TMP); __ Andi(TMP, rhs_reg, kMipsBitsPerWord); if (isR6) { - __ Beqzc(TMP, &done, /* is_bare */ true); + __ Beqzc(TMP, &done, /* is_bare= */ true); __ Move(TMP, dst_high); __ Move(dst_high, dst_low); __ Move(dst_low, TMP); @@ -2739,7 +2862,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { obj, offset, temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); } else { codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, out_loc, @@ -2747,7 +2870,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { data_offset, index, temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); } } else { Register out = out_loc.AsRegister<Register>(); @@ -3287,7 +3410,13 @@ void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -3296,7 +3425,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); - Register cls = locations->InAt(1).AsRegister<Register>(); + Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); Register temp = temp_loc.AsRegister<Register>(); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -3335,7 +3464,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ Bne(temp, cls, slow_path->GetEntryLabel()); + __ Bne(temp, cls.AsRegister<Register>(), slow_path->GetEntryLabel()); break; } @@ -3361,7 +3490,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // exception. __ Beqz(temp, slow_path->GetEntryLabel()); // Otherwise, compare the classes. - __ Bne(temp, cls, &loop); + __ Bne(temp, cls.AsRegister<Register>(), &loop); break; } @@ -3376,7 +3505,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // Walk over the class hierarchy to find a match. MipsLabel loop; __ Bind(&loop); - __ Beq(temp, cls, &done); + __ Beq(temp, cls.AsRegister<Register>(), &done); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, temp_loc, @@ -3399,7 +3528,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { maybe_temp2_loc, kWithoutReadBarrier); // Do an exact check. - __ Beq(temp, cls, &done); + __ Beq(temp, cls.AsRegister<Register>(), &done); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -3458,7 +3587,21 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // Go to next interface. __ Addiu(TMP, TMP, -2); // Compare the classes and continue the loop if they do not match. - __ Bne(AT, cls, &loop); + __ Bne(AT, cls.AsRegister<Register>(), &loop); + break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ Bnez(temp, slow_path->GetEntryLabel()); break; } } @@ -3474,15 +3617,14 @@ void LocationsBuilderMIPS::VisitClinitCheck(HClinitCheck* check) { if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } + // Rely on the type initialization to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } void InstructionCodeGeneratorMIPS::VisitClinitCheck(HClinitCheck* check) { // We assume the class is not null. - SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS( - check->GetLoadClass(), - check, - check->GetDexPc(), - true); + SlowPathCodeMIPS* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).AsRegister<Register>()); @@ -3962,7 +4104,7 @@ void InstructionCodeGeneratorMIPS::GenerateDivRemWithAnyConstant(HBinaryOperatio int64_t magic; int shift; - CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift); bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); @@ -5806,7 +5948,7 @@ void InstructionCodeGeneratorMIPS::VisitIf(HIf* if_instr) { nullptr : codegen_->GetLabelOf(true_successor); MipsLabel* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); - GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); + GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { @@ -5825,9 +5967,9 @@ void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCodeMIPS* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS>(deoptimize); GenerateTestAndBranch(deoptimize, - /* condition_input_index */ 0, + /* condition_input_index= */ 0, slow_path->GetEntryLabel(), - /* false_target */ nullptr); + /* false_target= */ nullptr); } // This function returns true if a conditional move can be generated for HSelect. @@ -5841,7 +5983,7 @@ void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { // of common logic. static bool CanMoveConditionally(HSelect* select, bool is_r6, LocationSummary* locations_to_set) { bool materialized = IsBooleanValueOrMaterializedCondition(select->GetCondition()); - HInstruction* cond = select->InputAt(/* condition_input_index */ 2); + HInstruction* cond = select->InputAt(/* i= */ 2); HCondition* condition = cond->AsCondition(); DataType::Type cond_type = @@ -6074,7 +6216,7 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR2(HSelect* select) { Location src = locations->InAt(1); Register src_reg = ZERO; Register src_reg_high = ZERO; - HInstruction* cond = select->InputAt(/* condition_input_index */ 2); + HInstruction* cond = select->InputAt(/* i= */ 2); Register cond_reg = TMP; int cond_cc = 0; DataType::Type cond_type = DataType::Type::kInt32; @@ -6082,7 +6224,7 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR2(HSelect* select) { DataType::Type dst_type = select->GetType(); if (IsBooleanValueOrMaterializedCondition(cond)) { - cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<Register>(); + cond_reg = locations->InAt(/* at= */ 2).AsRegister<Register>(); } else { HCondition* condition = cond->AsCondition(); LocationSummary* cond_locations = cond->GetLocations(); @@ -6195,7 +6337,7 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) { Location dst = locations->Out(); Location false_src = locations->InAt(0); Location true_src = locations->InAt(1); - HInstruction* cond = select->InputAt(/* condition_input_index */ 2); + HInstruction* cond = select->InputAt(/* i= */ 2); Register cond_reg = TMP; FRegister fcond_reg = FTMP; DataType::Type cond_type = DataType::Type::kInt32; @@ -6203,7 +6345,7 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) { DataType::Type dst_type = select->GetType(); if (IsBooleanValueOrMaterializedCondition(cond)) { - cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<Register>(); + cond_reg = locations->InAt(/* at= */ 2).AsRegister<Register>(); } else { HCondition* condition = cond->AsCondition(); LocationSummary* cond_locations = cond->GetLocations(); @@ -6384,7 +6526,7 @@ void LocationsBuilderMIPS::VisitSelect(HSelect* select) { void InstructionCodeGeneratorMIPS::VisitSelect(HSelect* select) { bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6(); - if (CanMoveConditionally(select, is_r6, /* locations_to_set */ nullptr)) { + if (CanMoveConditionally(select, is_r6, /* locations_to_set= */ nullptr)) { if (is_r6) { GenConditionalMoveR6(select); } else { @@ -6394,8 +6536,8 @@ void InstructionCodeGeneratorMIPS::VisitSelect(HSelect* select) { LocationSummary* locations = select->GetLocations(); MipsLabel false_target; GenerateTestAndBranch(select, - /* condition_input_index */ 2, - /* true_target */ nullptr, + /* condition_input_index= */ 2, + /* true_target= */ nullptr, &false_target); codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); __ Bind(&false_target); @@ -6554,7 +6696,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, obj, offset, temp_loc, - /* needs_null_check */ true); + /* needs_null_check= */ true); if (is_volatile) { GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -6787,7 +6929,7 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadOneRegister( out_reg, offset, maybe_temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it @@ -6828,7 +6970,7 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters( obj_reg, offset, maybe_temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -6919,7 +7061,7 @@ void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruc __ AddUpper(base, obj, offset_high); } MipsLabel skip_call; - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); if (label_low != nullptr) { DCHECK(short_offset); __ Bind(label_low); @@ -7074,11 +7216,11 @@ void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* inst MipsLabel skip_call; if (short_offset) { if (isR6) { - __ Beqzc(T9, &skip_call, /* is_bare */ true); + __ Beqzc(T9, &skip_call, /* is_bare= */ true); __ Nop(); // In forbidden slot. __ Jialc(T9, thunk_disp); } else { - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); __ Addiu(T9, T9, thunk_disp); // In delay slot. __ Jalr(T9); __ Nop(); // In delay slot. @@ -7086,13 +7228,13 @@ void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* inst __ Bind(&skip_call); } else { if (isR6) { - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); __ Aui(base, obj, offset_high); // In delay slot. __ Jialc(T9, thunk_disp); __ Bind(&skip_call); } else { __ Lui(base, offset_high); - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); __ Addiu(T9, T9, thunk_disp); // In delay slot. __ Jalr(T9); __ Bind(&skip_call); @@ -7169,7 +7311,7 @@ void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* inst // We will not do the explicit null check in the thunk as some form of a null check // must've been done earlier. DCHECK(!needs_null_check); - const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset */ false); + const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset= */ false); // Loading the entrypoint does not require a load acquire since it is only changed when // threads are suspended or running a checkpoint. __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); @@ -7179,13 +7321,13 @@ void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* inst : index.AsRegister<Register>(); MipsLabel skip_call; if (GetInstructionSetFeatures().IsR6()) { - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); __ Lsa(TMP, index_reg, obj, scale_factor); // In delay slot. __ Jialc(T9, thunk_disp); __ Bind(&skip_call); } else { __ Sll(TMP, index_reg, scale_factor); - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); __ Addiu(T9, T9, thunk_disp); // In delay slot. __ Jalr(T9); __ Bind(&skip_call); @@ -7300,7 +7442,7 @@ void CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* ReadBarrierMarkAndUpdateFieldSlowPathMIPS(instruction, ref, obj, - /* field_offset */ index, + /* field_offset= */ index, temp_reg); } else { slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathMIPS(instruction, ref); @@ -7312,7 +7454,7 @@ void CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // Given the numeric representation, it's enough to check the low bit of the // rb_state. We do that by shifting the bit into the sign bit (31) and // performing a branch on less than zero. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size"); __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift); @@ -7401,6 +7543,8 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -7409,7 +7553,13 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The output does overlap inputs. // Note that TypeCheckSlowPathMIPS uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -7421,7 +7571,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); - Register cls = locations->InAt(1).AsRegister<Register>(); + Location cls = locations->InAt(1); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -7453,7 +7603,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, read_barrier_option); // Classes must be equal for the instanceof to succeed. - __ Xor(out, out, cls); + __ Xor(out, out, cls.AsRegister<Register>()); __ Sltiu(out, out, 1); break; } @@ -7480,7 +7630,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqz(out, &done); - __ Bne(out, cls, &loop); + __ Bne(out, cls.AsRegister<Register>(), &loop); __ LoadConst32(out, 1); break; } @@ -7498,7 +7648,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { // Walk over the class hierarchy to find a match. MipsLabel loop, success; __ Bind(&loop); - __ Beq(out, cls, &success); + __ Beq(out, cls.AsRegister<Register>(), &success); // /* HeapReference<Class> */ out = out->super_class_ GenerateReferenceLoadOneRegister(instruction, out_loc, @@ -7525,7 +7675,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { read_barrier_option); // Do an exact check. MipsLabel success; - __ Beq(out, cls, &success); + __ Beq(out, cls.AsRegister<Register>(), &success); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -7555,9 +7705,9 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { kWithoutReadBarrier); DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); - __ Bne(out, cls, slow_path->GetEntryLabel()); + __ Bne(out, cls.AsRegister<Register>(), slow_path->GetEntryLabel()); __ LoadConst32(out, 1); break; } @@ -7584,11 +7734,25 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ Sltiu(out, out, 1); + break; + } } __ Bind(&done); @@ -7712,6 +7876,14 @@ void InstructionCodeGeneratorMIPS::VisitInvokePolymorphic(HInvokePolymorphic* in codegen_->GenerateInvokePolymorphicCall(invoke); } +void LocationsBuilderMIPS::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorMIPS::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); +} + static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen) { if (invoke->GetLocations()->Intrinsified()) { IntrinsicCodeGeneratorMIPS intrinsic(codegen); @@ -7725,14 +7897,14 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadString::LoadKind::kJitBootImageAddress: case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kRuntimeCall: break; } @@ -7748,14 +7920,14 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadClass::LoadKind::kJitBootImageAddress: case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kRuntimeCall: break; } @@ -7792,7 +7964,7 @@ Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticO HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { + ArtMethod* method ATTRIBUTE_UNUSED) { return desired_dispatch_info; } @@ -7829,12 +8001,18 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall( NewBootImageMethodPatch(invoke->GetTargetMethod(), info_high); Register temp_reg = temp.AsRegister<Register>(); EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg); - __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label); + __ Addiu(temp_reg, TMP, /* imm16= */ 0x5678, &info_low->label); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: - __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + uint32_t boot_image_offset = GetBootImageOffset(invoke); + PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset); + PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high); + Register temp_reg = temp.AsRegister<Register>(); + EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg); + __ Lw(temp_reg, TMP, /* imm16= */ 0x5678, &info_low->label); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { PcRelativePatchInfo* info_high = NewMethodBssEntryPatch( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); @@ -7842,9 +8020,12 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()), info_high); Register temp_reg = temp.AsRegister<Register>(); EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg); - __ Lw(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label); + __ Lw(temp_reg, TMP, /* imm16= */ 0x5678, &info_low->label); break; } + case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: + __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); + break; case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); return; // No code pointer retrieval; the runtime performs the call directly. @@ -7955,14 +8136,14 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { switch (load_kind) { // We need an extra register for PC-relative literals on R2. case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: + case HLoadClass::LoadKind::kJitBootImageAddress: if (isR6) { break; } if (has_irreducible_loops) { - if (load_kind != HLoadClass::LoadKind::kBootImageAddress) { + if (load_kind != HLoadClass::LoadKind::kJitBootImageAddress) { codegen_->ClobberRA(); } break; @@ -7978,10 +8159,7 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { if (load_kind == HLoadClass::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barriers we have a temp-clobbering call. } @@ -8007,9 +8185,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF switch (load_kind) { // We need an extra register for PC-relative literals on R2. case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: + case HLoadClass::LoadKind::kJitBootImageAddress: base_or_current_method_reg = (isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>(); break; @@ -8048,39 +8226,20 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); - __ Addiu(out, out, /* placeholder */ 0x5678, &info_low->label); + __ Addiu(out, out, /* imm16= */ 0x5678, &info_low->label); break; } - case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(cls->GetClass().Get())); - DCHECK_NE(address, 0u); - if (isR6 || !has_irreducible_loops) { - __ LoadLiteral(out, - base_or_current_method_reg, - codegen_->DeduplicateBootImageAddressLiteral(address)); - } else { - __ LoadConst32(out, address); - } - break; - } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageRelRoPatch(boot_image_offset); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); - __ Lw(out, out, /* placeholder */ 0x5678, &info_low->label); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); - if (masked_hash != 0) { - __ Addiu(out, out, -masked_hash); - } + __ Lw(out, out, /* imm16= */ 0x5678, &info_low->label); break; } case HLoadClass::LoadKind::kBssEntry: { @@ -8094,24 +8253,37 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF GenerateGcRootFieldLoad(cls, out_loc, out, - /* placeholder */ 0x5678, + /* offset= */ 0x5678, read_barrier_option, &info_low->label); generate_null_check = true; break; } + case HLoadClass::LoadKind::kJitBootImageAddress: { + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); + DCHECK_NE(address, 0u); + if (isR6 || !has_irreducible_loops) { + __ LoadLiteral(out, + base_or_current_method_reg, + codegen_->DeduplicateBootImageAddressLiteral(address)); + } else { + __ LoadConst32(out, address); + } + break; + } case HLoadClass::LoadKind::kJitTableAddress: { CodeGeneratorMIPS::JitPatchInfo* info = codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass()); bool reordering = __ SetReorder(false); __ Bind(&info->high_label); - __ Lui(out, /* placeholder */ 0x1234); + __ Lui(out, /* imm16= */ 0x1234); __ SetReorder(reordering); GenerateGcRootFieldLoad(cls, out_loc, out, - /* placeholder */ 0x5678, + /* offset= */ 0x5678, read_barrier_option, &info->low_label); break; @@ -8124,8 +8296,8 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + SlowPathCodeMIPS* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ Beqz(out, slow_path->GetEntryLabel()); @@ -8138,6 +8310,26 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF } } +void LocationsBuilderMIPS::VisitLoadMethodHandle(HLoadMethodHandle* load) { + InvokeRuntimeCallingConvention calling_convention; + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, loc, loc); +} + +void InstructionCodeGeneratorMIPS::VisitLoadMethodHandle(HLoadMethodHandle* load) { + codegen_->GenerateLoadMethodHandleRuntimeCall(load); +} + +void LocationsBuilderMIPS::VisitLoadMethodType(HLoadMethodType* load) { + InvokeRuntimeCallingConvention calling_convention; + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, loc, loc); +} + +void InstructionCodeGeneratorMIPS::VisitLoadMethodType(HLoadMethodType* load) { + codegen_->GenerateLoadMethodTypeRuntimeCall(load); +} + static int32_t GetExceptionTlsOffset() { return Thread::ExceptionOffset<kMipsPointerSize>().Int32Value(); } @@ -8169,15 +8361,15 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { const bool has_irreducible_loops = codegen_->GetGraph()->HasIrreducibleLoops(); switch (load_kind) { // We need an extra register for PC-relative literals on R2. - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: + case HLoadString::LoadKind::kJitBootImageAddress: if (isR6) { break; } if (has_irreducible_loops) { - if (load_kind != HLoadString::LoadKind::kBootImageAddress) { + if (load_kind != HLoadString::LoadKind::kJitBootImageAddress) { codegen_->ClobberRA(); } break; @@ -8198,10 +8390,7 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { if (load_kind == HLoadString::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString and marking to save everything we need. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barriers we have a temp-clobbering call. } @@ -8221,10 +8410,10 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); switch (load_kind) { // We need an extra register for PC-relative literals on R2. - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: + case HLoadString::LoadKind::kJitBootImageAddress: base_or_current_method_reg = (isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>(); break; @@ -8243,36 +8432,23 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); - __ Addiu(out, out, /* placeholder */ 0x5678, &info_low->label); + __ Addiu(out, out, /* imm16= */ 0x5678, &info_low->label); return; } - case HLoadString::LoadKind::kBootImageAddress: { - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(load->GetString().Get())); - DCHECK_NE(address, 0u); - if (isR6 || !has_irreducible_loops) { - __ LoadLiteral(out, - base_or_current_method_reg, - codegen_->DeduplicateBootImageAddressLiteral(address)); - } else { - __ LoadConst32(out, address); - } - return; - } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(load); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageRelRoPatch(boot_image_offset); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); - __ Lw(out, out, /* placeholder */ 0x5678, &info_low->label); + __ Lw(out, out, /* imm16= */ 0x5678, &info_low->label); return; } case HLoadString::LoadKind::kBssEntry: { - DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = @@ -8283,7 +8459,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ GenerateGcRootFieldLoad(load, out_loc, out, - /* placeholder */ 0x5678, + /* offset= */ 0x5678, kCompilerReadBarrierOption, &info_low->label); SlowPathCodeMIPS* slow_path = @@ -8293,6 +8469,18 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ __ Bind(slow_path->GetExitLabel()); return; } + case HLoadString::LoadKind::kJitBootImageAddress: { + uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); + DCHECK_NE(address, 0u); + if (isR6 || !has_irreducible_loops) { + __ LoadLiteral(out, + base_or_current_method_reg, + codegen_->DeduplicateBootImageAddressLiteral(address)); + } else { + __ LoadConst32(out, address); + } + return; + } case HLoadString::LoadKind::kJitTableAddress: { CodeGeneratorMIPS::JitPatchInfo* info = codegen_->NewJitRootStringPatch(load->GetDexFile(), @@ -8300,12 +8488,12 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ load->GetString()); bool reordering = __ SetReorder(false); __ Bind(&info->high_label); - __ Lui(out, /* placeholder */ 0x1234); + __ Lui(out, /* imm16= */ 0x1234); __ SetReorder(reordering); GenerateGcRootFieldLoad(load, out_loc, out, - /* placeholder */ 0x5678, + /* offset= */ 0x5678, kCompilerReadBarrierOption, &info->low_label); return; @@ -8513,10 +8701,8 @@ void LocationsBuilderMIPS::VisitNewArray(HNewArray* instruction) { } void InstructionCodeGeneratorMIPS::VisitNewArray(HNewArray* instruction) { - // Note: if heap poisoning is enabled, the entry point takes care - // of poisoning the reference. - QuickEntrypointEnum entrypoint = - CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); + // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. + QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); DCHECK(!codegen_->IsLeafMethod()); @@ -8526,30 +8712,13 @@ void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; - if (instruction->IsStringAlloc()) { - locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); - } else { - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - } + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); } void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) { - // Note: if heap poisoning is enabled, the entry point takes care - // of poisoning the reference. - if (instruction->IsStringAlloc()) { - // String is allocated through StringFactory. Call NewEmptyString entry point. - Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>(); - MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize); - __ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString)); - __ LoadFromOffset(kLoadWord, T9, temp, code_offset.Int32Value()); - __ Jalr(T9); - __ NopIfNoReordering(); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); - } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); - } + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } void LocationsBuilderMIPS::VisitNot(HNot* instruction) { @@ -8779,6 +8948,501 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS::GenerateMinMaxInt(LocationSummary* locations, + bool is_min, + bool isR6, + DataType::Type type) { + if (isR6) { + // Some architectures, such as ARM and MIPS (prior to r6), have a + // conditional move instruction which only changes the target + // (output) register if the condition is true (MIPS prior to r6 had + // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions + // always change the target (output) register. If the condition is + // true the output register gets the contents of the "rs" register; + // otherwise, the output register is set to zero. One consequence + // of this is that to implement something like "rd = c==0 ? rs : rt" + // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions. + // After executing this pair of instructions one of the output + // registers from the pair will necessarily contain zero. Then the + // code ORs the output registers from the SELEQZ/SELNEZ instructions + // to get the final result. + // + // The initial test to see if the output register is same as the + // first input register is needed to make sure that value in the + // first input register isn't clobbered before we've finished + // computing the output value. The logic in the corresponding else + // clause performs the same task but makes sure the second input + // register isn't clobbered in the event that it's the same register + // as the output register; the else clause also handles the case + // where the output register is distinct from both the first, and the + // second input registers. + if (type == DataType::Type::kInt64) { + Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); + Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + MipsLabel compare_done; + + if (a_lo == b_lo) { + if (out_lo != a_lo) { + __ Move(out_lo, a_lo); + __ Move(out_hi, a_hi); + } + } else { + __ Slt(TMP, b_hi, a_hi); + __ Bne(b_hi, a_hi, &compare_done); + + __ Sltu(TMP, b_lo, a_lo); + + __ Bind(&compare_done); + + if (is_min) { + __ Seleqz(AT, a_lo, TMP); + __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo + // because at this point we're + // done using a_lo/b_lo. + } else { + __ Selnez(AT, a_lo, TMP); + __ Seleqz(out_lo, b_lo, TMP); // ditto + } + __ Or(out_lo, out_lo, AT); + if (is_min) { + __ Seleqz(AT, a_hi, TMP); + __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi + } else { + __ Selnez(AT, a_hi, TMP); + __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi + } + __ Or(out_hi, out_hi, AT); + } + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register a = locations->InAt(0).AsRegister<Register>(); + Register b = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (a == b) { + if (out != a) { + __ Move(out, a); + } + } else { + __ Slt(AT, b, a); + if (is_min) { + __ Seleqz(TMP, a, AT); + __ Selnez(AT, b, AT); + } else { + __ Selnez(TMP, a, AT); + __ Seleqz(AT, b, AT); + } + __ Or(out, TMP, AT); + } + } + } else { // !isR6 + if (type == DataType::Type::kInt64) { + Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); + Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + MipsLabel compare_done; + + if (a_lo == b_lo) { + if (out_lo != a_lo) { + __ Move(out_lo, a_lo); + __ Move(out_hi, a_hi); + } + } else { + __ Slt(TMP, a_hi, b_hi); + __ Bne(a_hi, b_hi, &compare_done); + + __ Sltu(TMP, a_lo, b_lo); + + __ Bind(&compare_done); + + if (is_min) { + if (out_lo != a_lo) { + __ Movn(out_hi, a_hi, TMP); + __ Movn(out_lo, a_lo, TMP); + } + if (out_lo != b_lo) { + __ Movz(out_hi, b_hi, TMP); + __ Movz(out_lo, b_lo, TMP); + } + } else { + if (out_lo != a_lo) { + __ Movz(out_hi, a_hi, TMP); + __ Movz(out_lo, a_lo, TMP); + } + if (out_lo != b_lo) { + __ Movn(out_hi, b_hi, TMP); + __ Movn(out_lo, b_lo, TMP); + } + } + } + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register a = locations->InAt(0).AsRegister<Register>(); + Register b = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (a == b) { + if (out != a) { + __ Move(out, a); + } + } else { + __ Slt(AT, a, b); + if (is_min) { + if (out != a) { + __ Movn(out, a, AT); + } + if (out != b) { + __ Movz(out, b, AT); + } + } else { + if (out != a) { + __ Movz(out, a, AT); + } + if (out != b) { + __ Movn(out, b, AT); + } + } + } + } + } +} + +void InstructionCodeGeneratorMIPS::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + bool isR6, + DataType::Type type) { + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + FRegister a = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister b = locations->InAt(1).AsFpuRegister<FRegister>(); + + if (isR6) { + MipsLabel noNaNs; + MipsLabel done; + FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; + + // When Java computes min/max it prefers a NaN to a number; the + // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of + // the inputs is a NaN and the other is a valid number, the MIPS + // instruction will return the number; Java wants the NaN value + // returned. This is why there is extra logic preceding the use of + // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a + // NaN, return the NaN, otherwise return the min/max. + if (type == DataType::Type::kFloat64) { + __ CmpUnD(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqD(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelD(ftmp, a, b); + + if (ftmp != out) { + __ MovD(out, ftmp); + } + + __ B(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinD(out, a, b); + } else { + __ MaxD(out, a, b); + } + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CmpUnS(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqS(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelS(ftmp, a, b); + + if (ftmp != out) { + __ MovS(out, ftmp); + } + + __ B(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinS(out, a, b); + } else { + __ MaxS(out, a, b); + } + } + + __ Bind(&done); + + } else { // !isR6 + MipsLabel ordered; + MipsLabel compare; + MipsLabel select; + MipsLabel done; + + if (type == DataType::Type::kFloat64) { + __ CunD(a, b); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CunS(a, b); + } + __ Bc1f(&ordered); + + // a or b (or both) is a NaN. Return one, which is a NaN. + if (type == DataType::Type::kFloat64) { + __ CeqD(b, b); + } else { + __ CeqS(b, b); + } + __ B(&select); + + __ Bind(&ordered); + + // Neither is a NaN. + // a == b? (-0.0 compares equal with +0.0) + // If equal, handle zeroes, else compare further. + if (type == DataType::Type::kFloat64) { + __ CeqD(a, b); + } else { + __ CeqS(a, b); + } + __ Bc1f(&compare); + + // a == b either bit for bit or one is -0.0 and the other is +0.0. + if (type == DataType::Type::kFloat64) { + __ MoveFromFpuHigh(TMP, a); + __ MoveFromFpuHigh(AT, b); + } else { + __ Mfc1(TMP, a); + __ Mfc1(AT, b); + } + + if (is_min) { + // -0.0 prevails over +0.0. + __ Or(TMP, TMP, AT); + } else { + // +0.0 prevails over -0.0. + __ And(TMP, TMP, AT); + } + + if (type == DataType::Type::kFloat64) { + __ Mfc1(AT, a); + __ Mtc1(AT, out); + __ MoveToFpuHigh(TMP, out); + } else { + __ Mtc1(TMP, out); + } + __ B(&done); + + __ Bind(&compare); + + if (type == DataType::Type::kFloat64) { + if (is_min) { + // return (a <= b) ? a : b; + __ ColeD(a, b); + } else { + // return (a >= b) ? a : b; + __ ColeD(b, a); // b <= a + } + } else { + if (is_min) { + // return (a <= b) ? a : b; + __ ColeS(a, b); + } else { + // return (a >= b) ? a : b; + __ ColeS(b, a); // b <= a + } + } + + __ Bind(&select); + + if (type == DataType::Type::kFloat64) { + __ MovtD(out, a); + __ MovfD(out, b); + } else { + __ MovtS(out, a); + __ MovfS(out, b); + } + + __ Bind(&done); + } +} + +void InstructionCodeGeneratorMIPS::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min, isR6, type); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, isR6, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderMIPS::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorMIPS::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderMIPS::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorMIPS::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderMIPS::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS::GenerateAbsFP(LocationSummary* locations, + DataType::Type type, + bool isR2OrNewer, + bool isR6) { + FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + + // Note, as a "quality of implementation", rather than pure "spec compliance", we require that + // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN + // (signaling NaN may become quiet though). + // + // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case, + // both regular floating point numbers and NAN values are treated alike, only the sign bit is + // affected by this instruction. + // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any + // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be + // changed when doing abs(NaN). Because of that, we clear sign bit in a different way. + if (isR6) { + if (type == DataType::Type::kFloat64) { + __ AbsD(out, in); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ AbsS(out, in); + } + } else { + if (type == DataType::Type::kFloat64) { + if (in != out) { + __ MovD(out, in); + } + __ MoveFromFpuHigh(TMP, in); + // ins instruction is not available for R1. + if (isR2OrNewer) { + __ Ins(TMP, ZERO, 31, 1); + } else { + __ Sll(TMP, TMP, 1); + __ Srl(TMP, TMP, 1); + } + __ MoveToFpuHigh(TMP, out); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ Mfc1(TMP, in); + // ins instruction is not available for R1. + if (isR2OrNewer) { + __ Ins(TMP, ZERO, 31, 1); + } else { + __ Sll(TMP, TMP, 1); + __ Srl(TMP, TMP, 1); + } + __ Mtc1(TMP, out); + } + } +} + +void InstructionCodeGeneratorMIPS::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + bool isR2OrNewer = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + Register in = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + __ Sra(AT, in, 31); + __ Xor(out, in, AT); + __ Subu(out, out, AT); + break; + } + case DataType::Type::kInt64: { + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + // The comments in this section show the analogous operations which would + // be performed if we had 64-bit registers "in", and "out". + // __ Dsra32(AT, in, 31); + __ Sra(AT, in_hi, 31); + // __ Xor(out, in, AT); + __ Xor(TMP, in_lo, AT); + __ Xor(out_hi, in_hi, AT); + // __ Dsubu(out, out, AT); + __ Subu(out_lo, TMP, AT); + __ Sltu(TMP, out_lo, TMP); + __ Addu(out_hi, out_hi, TMP); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateAbsFP(locations, abs->GetResultType(), isR2OrNewer, isR6); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + void LocationsBuilderMIPS::VisitConstructorFence(HConstructorFence* constructor_fence) { constructor_fence->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index c91cb62eda..50807310b6 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -81,9 +81,9 @@ class InvokeDexCallingConventionVisitorMIPS : public InvokeDexCallingConventionV InvokeDexCallingConventionVisitorMIPS() {} virtual ~InvokeDexCallingConventionVisitorMIPS() {} - Location GetNextLocation(DataType::Type type) OVERRIDE; - Location GetReturnLocation(DataType::Type type) const OVERRIDE; - Location GetMethodLocation() const OVERRIDE; + Location GetNextLocation(DataType::Type type) override; + Location GetReturnLocation(DataType::Type type) const override; + Location GetMethodLocation() const override; private: InvokeDexCallingConvention calling_convention; @@ -110,23 +110,23 @@ class FieldAccessCallingConventionMIPS : public FieldAccessCallingConvention { public: FieldAccessCallingConventionMIPS() {} - Location GetObjectLocation() const OVERRIDE { + Location GetObjectLocation() const override { return Location::RegisterLocation(A1); } - Location GetFieldIndexLocation() const OVERRIDE { + Location GetFieldIndexLocation() const override { return Location::RegisterLocation(A0); } - Location GetReturnLocation(DataType::Type type) const OVERRIDE { + Location GetReturnLocation(DataType::Type type) const override { return DataType::Is64BitType(type) ? Location::RegisterPairLocation(V0, V1) : Location::RegisterLocation(V0); } - Location GetSetValueLocation(DataType::Type type, bool is_instance) const OVERRIDE { + Location GetSetValueLocation(DataType::Type type, bool is_instance) const override { return DataType::Is64BitType(type) ? Location::RegisterPairLocation(A2, A3) : (is_instance ? Location::RegisterLocation(A2) : Location::RegisterLocation(A1)); } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { return Location::FpuRegisterLocation(F0); } @@ -139,10 +139,10 @@ class ParallelMoveResolverMIPS : public ParallelMoveResolverWithSwap { ParallelMoveResolverMIPS(ArenaAllocator* allocator, CodeGeneratorMIPS* codegen) : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} - void EmitMove(size_t index) OVERRIDE; - void EmitSwap(size_t index) OVERRIDE; - void SpillScratch(int reg) OVERRIDE; - void RestoreScratch(int reg) OVERRIDE; + void EmitMove(size_t index) override; + void EmitSwap(size_t index) override; + void SpillScratch(int reg) override; + void RestoreScratch(int reg) override; void Exchange(int index1, int index2, bool double_slot); void ExchangeQuadSlots(int index1, int index2); @@ -176,14 +176,14 @@ class LocationsBuilderMIPS : public HGraphVisitor { : HGraphVisitor(graph), codegen_(codegen) {} #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_MIPS(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -210,14 +210,14 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { InstructionCodeGeneratorMIPS(HGraph* graph, CodeGeneratorMIPS* codegen); #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_MIPS(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -237,6 +237,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp); void HandleBinaryOp(HBinaryOperation* operation); void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* operation); @@ -246,6 +247,11 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type); + void GenerateMinMax(HBinaryOperation*, bool is_min); + void GenerateAbsFP(LocationSummary* locations, DataType::Type type, bool isR2OrNewer, bool isR6); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -364,40 +370,39 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { class CodeGeneratorMIPS : public CodeGenerator { public: CodeGeneratorMIPS(HGraph* graph, - const MipsInstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorMIPS() {} - void ComputeSpillMask() OVERRIDE; - bool HasAllocatedCalleeSaveRegisters() const OVERRIDE; - void GenerateFrameEntry() OVERRIDE; - void GenerateFrameExit() OVERRIDE; + void ComputeSpillMask() override; + bool HasAllocatedCalleeSaveRegisters() const override; + void GenerateFrameEntry() override; + void GenerateFrameExit() override; - void Bind(HBasicBlock* block) OVERRIDE; + void Bind(HBasicBlock* block) override; void MoveConstant(Location location, HConstant* c); - size_t GetWordSize() const OVERRIDE { return kMipsWordSize; } + size_t GetWordSize() const override { return kMipsWordSize; } - size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + size_t GetFloatingPointSpillSlotSize() const override { return GetGraph()->HasSIMD() ? 2 * kMipsDoublewordSize // 16 bytes for each spill. : 1 * kMipsDoublewordSize; // 8 bytes for each spill. } - uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE { + uintptr_t GetAddressOf(HBasicBlock* block) override { return assembler_.GetLabelLocation(GetLabelOf(block)); } - HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } - HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; } - MipsAssembler* GetAssembler() OVERRIDE { return &assembler_; } - const MipsAssembler& GetAssembler() const OVERRIDE { return assembler_; } + HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } + HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; } + MipsAssembler* GetAssembler() override { return &assembler_; } + const MipsAssembler& GetAssembler() const override { return assembler_; } // Emit linker patches. - void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; - void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; + void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. @@ -488,48 +493,46 @@ class CodeGeneratorMIPS : public CodeGenerator { // Register allocation. - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters() const override; - size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; void ClobberRA() { clobbered_ra_ = true; } - void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; - void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + void DumpCoreRegister(std::ostream& stream, int reg) const override; + void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; - InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kMips; } + InstructionSet GetInstructionSet() const override { return InstructionSet::kMips; } - const MipsInstructionSetFeatures& GetInstructionSetFeatures() const { - return isa_features_; - } + const MipsInstructionSetFeatures& GetInstructionSetFeatures() const; MipsLabel* GetLabelOf(HBasicBlock* block) const { return CommonGetLabelOf<MipsLabel>(block_labels_, block); } - void Initialize() OVERRIDE { + void Initialize() override { block_labels_ = CommonInitializeLabels<MipsLabel>(); } - void Finalize(CodeAllocator* allocator) OVERRIDE; + void Finalize(CodeAllocator* allocator) override; // Code generation helpers. - void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; - void MoveConstant(Location destination, int32_t value) OVERRIDE; + void MoveConstant(Location destination, int32_t value) override; - void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + void AddLocationAsTemp(Location location, LocationSummary* locations) override; // Generate code to invoke a runtime entry point. void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path = nullptr) OVERRIDE; + SlowPathCode* slow_path = nullptr) override; // Generate code to invoke a runtime entry point, but do not record // PC-related information in a stack map. @@ -540,41 +543,41 @@ class CodeGeneratorMIPS : public CodeGenerator { void GenerateInvokeRuntime(int32_t entry_point_offset, bool direct); - ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; } + ParallelMoveResolver* GetMoveResolver() override { return &move_resolver_; } - bool NeedsTwoRegisters(DataType::Type type) const OVERRIDE { + bool NeedsTwoRegisters(DataType::Type type) const override { return type == DataType::Type::kInt64; } // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadString::LoadKind GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + HLoadString::LoadKind desired_string_load_kind) override; // Check if the desired_class_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadClass::LoadKind GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + HLoadClass::LoadKind desired_class_load_kind) override; // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) OVERRIDE; + ArtMethod* method) override; void GenerateStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; void GenerateVirtualCall( - HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, - DataType::Type type ATTRIBUTE_UNUSED) OVERRIDE { + DataType::Type type ATTRIBUTE_UNUSED) override { UNIMPLEMENTED(FATAL) << "Not implemented on MIPS"; } - void GenerateNop() OVERRIDE; - void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; - void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateNop() override; + void GenerateImplicitNullCheck(HNullCheck* instruction) override; + void GenerateExplicitNullCheck(HNullCheck* instruction) override; // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. @@ -615,6 +618,10 @@ class CodeGeneratorMIPS : public CodeGenerator { DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); }; + PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, @@ -637,6 +644,9 @@ class CodeGeneratorMIPS : public CodeGenerator { Register out, Register base); + void LoadBootImageAddress(Register reg, uint32_t boot_image_reference); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); + // The JitPatchInfo is used for JIT string and class loads. struct JitPatchInfo { JitPatchInfo(const DexFile& dex_file, uint64_t idx) @@ -685,11 +695,11 @@ class CodeGeneratorMIPS : public CodeGenerator { InstructionCodeGeneratorMIPS instruction_visitor_; ParallelMoveResolverMIPS move_resolver_; MipsAssembler assembler_; - const MipsInstructionSetFeatures& isa_features_; // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; @@ -697,10 +707,12 @@ class CodeGeneratorMIPS : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_; // Patches for string root accesses in JIT compiled code. ArenaDeque<JitPatchInfo> jit_string_patches_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 985ac2ca55..0d3cb3b8ca 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -24,6 +24,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_mips64.h" @@ -111,6 +112,14 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type type) return Mips64ReturnLocation(type); } +static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + // The reference is returned in the same register. This differs from the standard return location. + return caller_saves; +} + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, x).Int32Value() @@ -119,7 +128,7 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: explicit BoundsCheckSlowPathMIPS64(HBoundsCheck* instruction) : SlowPathCodeMIPS64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); @@ -144,9 +153,9 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathMIPS64"; } + const char* GetDescription() const override { return "BoundsCheckSlowPathMIPS64"; } private: DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS64); @@ -157,16 +166,16 @@ class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) : SlowPathCodeMIPS64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); mips64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathMIPS64"; } + const char* GetDescription() const override { return "DivZeroCheckSlowPathMIPS64"; } private: DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathMIPS64); @@ -174,35 +183,41 @@ class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: - LoadClassSlowPathMIPS64(HLoadClass* cls, - HInstruction* at, - uint32_t dex_pc, - bool do_clinit) - : SlowPathCodeMIPS64(at), - cls_(cls), - dex_pc_(dex_pc), - do_clinit_(do_clinit) { + LoadClassSlowPathMIPS64(HLoadClass* cls, HInstruction* at) + : SlowPathCodeMIPS64(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Location out = locations->Out(); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); + CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - InvokeRuntimeCallingConvention calling_convention; - DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - dex::TypeIndex type_index = cls_->GetTypeIndex(); - __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); - QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage - : kQuickInitializeType; - mips64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + InvokeRuntimeCallingConvention calling_convention; + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), mips64_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); + mips64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. } else { - CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + mips64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + source, + cls_->GetType()); + } + if (must_do_clinit) { + mips64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } // Move the class to the desired location. @@ -218,18 +233,12 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { __ Bc(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS64"; } + const char* GetDescription() const override { return "LoadClassSlowPathMIPS64"; } private: // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathMIPS64); }; @@ -238,7 +247,7 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { explicit LoadStringSlowPathMIPS64(HLoadString* instruction) : SlowPathCodeMIPS64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { DCHECK(instruction_->IsLoadString()); DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry); LocationSummary* locations = instruction_->GetLocations(); @@ -265,7 +274,7 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { __ Bc(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; } + const char* GetDescription() const override { return "LoadStringSlowPathMIPS64"; } private: DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS64); @@ -275,7 +284,7 @@ class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: explicit NullCheckSlowPathMIPS64(HNullCheck* instr) : SlowPathCodeMIPS64(instr) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { @@ -289,9 +298,9 @@ class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathMIPS64"; } + const char* GetDescription() const override { return "NullCheckSlowPathMIPS64"; } private: DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathMIPS64); @@ -302,7 +311,7 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { SuspendCheckSlowPathMIPS64(HSuspendCheck* instruction, HBasicBlock* successor) : SlowPathCodeMIPS64(instruction), successor_(successor) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); @@ -322,7 +331,7 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { return &return_label_; } - const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathMIPS64"; } + const char* GetDescription() const override { return "SuspendCheckSlowPathMIPS64"; } HBasicBlock* GetSuccessor() const { return successor_; @@ -343,7 +352,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { explicit TypeCheckSlowPathMIPS64(HInstruction* instruction, bool is_fatal) : SlowPathCodeMIPS64(instruction), is_fatal_(is_fatal) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); uint32_t dex_pc = instruction_->GetDexPc(); @@ -383,9 +392,9 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { } } - const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; } + const char* GetDescription() const override { return "TypeCheckSlowPathMIPS64"; } - bool IsFatal() const OVERRIDE { return is_fatal_; } + bool IsFatal() const override { return is_fatal_; } private: const bool is_fatal_; @@ -398,7 +407,7 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { explicit DeoptimizationSlowPathMIPS64(HDeoptimize* instruction) : SlowPathCodeMIPS64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); LocationSummary* locations = instruction_->GetLocations(); @@ -410,7 +419,7 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } - const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; } + const char* GetDescription() const override { return "DeoptimizationSlowPathMIPS64"; } private: DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64); @@ -420,7 +429,7 @@ class ArraySetSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: explicit ArraySetSlowPathMIPS64(HInstruction* instruction) : SlowPathCodeMIPS64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -451,7 +460,7 @@ class ArraySetSlowPathMIPS64 : public SlowPathCodeMIPS64 { __ Bc(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS64"; } + const char* GetDescription() const override { return "ArraySetSlowPathMIPS64"; } private: DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS64); @@ -481,9 +490,9 @@ class ReadBarrierMarkSlowPathMIPS64 : public SlowPathCodeMIPS64 { DCHECK(kEmitCompilerReadBarrier); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; } + const char* GetDescription() const override { return "ReadBarrierMarkSlowPathMIPS"; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); GpuRegister ref_reg = ref_.AsRegister<GpuRegister>(); DCHECK(locations->CanCall()); @@ -574,11 +583,11 @@ class ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 : public SlowPathCodeMIPS64 { DCHECK(kEmitCompilerReadBarrier); } - const char* GetDescription() const OVERRIDE { + const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS64"; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); GpuRegister ref_reg = ref_.AsRegister<GpuRegister>(); DCHECK(locations->CanCall()); @@ -735,7 +744,7 @@ class ReadBarrierForHeapReferenceSlowPathMIPS64 : public SlowPathCodeMIPS64 { DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); LocationSummary* locations = instruction_->GetLocations(); DataType::Type type = DataType::Type::kReference; @@ -855,7 +864,7 @@ class ReadBarrierForHeapReferenceSlowPathMIPS64 : public SlowPathCodeMIPS64 { __ Bc(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { + const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathMIPS64"; } @@ -900,7 +909,7 @@ class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 { DCHECK(kEmitCompilerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DataType::Type type = DataType::Type::kReference; GpuRegister reg_out = out_.AsRegister<GpuRegister>(); @@ -929,7 +938,7 @@ class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 { __ Bc(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS64"; } + const char* GetDescription() const override { return "ReadBarrierForRootSlowPathMIPS64"; } private: const Location out_; @@ -939,13 +948,12 @@ class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 { }; CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, - const Mips64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, kNumberOfGpuRegisters, kNumberOfFpuRegisters, - /* number_of_register_pairs */ 0, + /* number_of_register_pairs= */ 0, ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), arraysize(kCoreCalleeSaves)), ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), @@ -956,8 +964,8 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), - assembler_(graph->GetAllocator(), &isa_features), - isa_features_(isa_features), + assembler_(graph->GetAllocator(), + compiler_options.GetInstructionSetFeatures()->AsMips64InstructionSetFeatures()), uint32_literals_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), @@ -968,6 +976,7 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -988,8 +997,7 @@ void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) { // Adjust native pc offsets in stack maps. StackMapStream* stack_map_stream = GetStackMapStream(); for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) { - uint32_t old_position = - stack_map_stream->GetStackMap(i).native_pc_code_offset.Uint32Value(InstructionSet::kMips64); + uint32_t old_position = stack_map_stream->GetStackMapNativePcOffset(i); uint32_t new_position = __ GetAdjustedPosition(old_position); DCHECK_GE(new_position, old_position); stack_map_stream->SetStackMapNativePcOffset(i, new_position); @@ -1482,12 +1490,27 @@ void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, if (value_can_be_null) { __ Beqzc(value, &done); } + // Load the address of the card table into `card`. __ LoadFromOffset(kLoadDoubleword, card, TR, Thread::CardTableOffset<kMips64PointerSize>().Int32Value()); + // Calculate the address of the card corresponding to `object`. __ Dsrl(temp, object, gc::accounting::CardTable::kCardShift); __ Daddu(temp, card, temp); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the SB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ Sb(card, temp, 0); if (value_can_be_null) { __ Bind(&done); @@ -1509,6 +1532,15 @@ inline void CodeGeneratorMIPS64::EmitPcRelativeLinkerPatches( } } +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -1517,7 +1549,8 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li boot_image_type_patches_.size() + type_bss_entry_patches_.size() + boot_image_string_patches_.size() + - string_bss_entry_patches_.size(); + string_bss_entry_patches_.size() + + boot_image_intrinsic_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( @@ -1526,12 +1559,14 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1542,6 +1577,20 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li DCHECK_EQ(size, linker_patches->size()); } +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageIntrinsicPatch( + uint32_t intrinsic_data, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file= */ nullptr, intrinsic_data, info_high, &boot_image_intrinsic_patches_); +} + +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageRelRoPatch( + uint32_t boot_image_offset, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file= */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_); +} + CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageMethodPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { @@ -1616,7 +1665,7 @@ void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchIn DCHECK(!info_high->patch_info_high); __ Bind(&info_high->label); // Add the high half of a 32-bit offset to PC. - __ Auipc(out, /* placeholder */ 0x1234); + __ Auipc(out, /* imm16= */ 0x1234); // A following instruction will add the sign-extended low half of the 32-bit // offset to `out` (e.g. ld, jialc, daddiu). if (info_low != nullptr) { @@ -1625,13 +1674,57 @@ void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchIn } } +void CodeGeneratorMIPS64::LoadBootImageAddress(GpuRegister reg, uint32_t boot_image_reference) { + if (GetCompilerOptions().IsBootImage()) { + PcRelativePatchInfo* info_high = NewBootImageIntrinsicPatch(boot_image_reference); + PcRelativePatchInfo* info_low = NewBootImageIntrinsicPatch(boot_image_reference, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + __ Daddiu(reg, AT, /* imm16= */ 0x5678); + } else if (GetCompilerOptions().GetCompilePic()) { + PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_reference); + PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_reference, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. + __ Lwu(reg, AT, /* imm16= */ 0x5678); + } else { + DCHECK(Runtime::Current()->UseJitCompilation()); + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + uintptr_t address = + reinterpret_cast<uintptr_t>(heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference); + __ LoadLiteral(reg, kLoadDoubleword, DeduplicateBootImageAddressLiteral(address)); + } +} + +void CodeGeneratorMIPS64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConvention calling_convention; + GpuRegister argument = calling_convention.GetRegisterAt(0); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + PcRelativePatchInfo* info_high = NewBootImageTypePatch(*target_method.dex_file, type_idx); + PcRelativePatchInfo* info_low = + NewBootImageTypePatch(*target_method.dex_file, type_idx, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + __ Daddiu(argument, AT, /* imm16= */ 0x5678); + } else { + LoadBootImageAddress(argument, boot_image_offset); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + Literal* CodeGeneratorMIPS64::DeduplicateJitStringLiteral(const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) { ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); return jit_string_patches_.GetOrCreate( StringReference(&dex_file, string_index), - [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); + [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); }); } Literal* CodeGeneratorMIPS64::DeduplicateJitClassLiteral(const DexFile& dex_file, @@ -1640,7 +1733,7 @@ Literal* CodeGeneratorMIPS64::DeduplicateJitClassLiteral(const DexFile& dex_file ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); return jit_class_patches_.GetOrCreate( TypeReference(&dex_file, type_index), - [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); + [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); }); } void CodeGeneratorMIPS64::PatchJitRootUse(uint8_t* code, @@ -1740,6 +1833,10 @@ void CodeGeneratorMIPS64::DumpFloatingPointRegister(std::ostream& stream, int re stream << FpuRegister(reg); } +const Mips64InstructionSetFeatures& CodeGeneratorMIPS64::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsMips64InstructionSetFeatures(); +} + void CodeGeneratorMIPS64::InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, @@ -1780,6 +1877,34 @@ void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCo __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorMIPS64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + GpuRegister temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ LoadFromOffset( + kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + if (IsUint<16>(path_to_root)) { + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + __ LoadConst32(TMP, path_to_root); + __ Xor(temp, temp, TMP); + } + // Shift out bits that do not contribute to the comparison. + __ Sll(temp, temp, 32 - mask_bits); + } +} + void InstructionCodeGeneratorMIPS64::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) { __ Sync(0); // only stype 0 is supported } @@ -2333,7 +2458,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { obj, offset, temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); } else { codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, out_loc, @@ -2341,7 +2466,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { data_offset, index, temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); } } else { GpuRegister out = out_loc.AsRegister<GpuRegister>(); @@ -2840,7 +2965,13 @@ void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -2849,7 +2980,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); - GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); + Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); GpuRegister temp = temp_loc.AsRegister<GpuRegister>(); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -2888,7 +3019,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ Bnec(temp, cls, slow_path->GetEntryLabel()); + __ Bnec(temp, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); break; } @@ -2914,7 +3045,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { // exception. __ Beqzc(temp, slow_path->GetEntryLabel()); // Otherwise, compare the classes. - __ Bnec(temp, cls, &loop); + __ Bnec(temp, cls.AsRegister<GpuRegister>(), &loop); break; } @@ -2929,7 +3060,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { // Walk over the class hierarchy to find a match. Mips64Label loop; __ Bind(&loop); - __ Beqc(temp, cls, &done); + __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, temp_loc, @@ -2952,7 +3083,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { maybe_temp2_loc, kWithoutReadBarrier); // Do an exact check. - __ Beqc(temp, cls, &done); + __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -3011,7 +3142,21 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { __ Daddiu(temp, temp, 2 * kHeapReferenceSize); __ Addiu(TMP, TMP, -2); // Compare the classes and continue the loop if they do not match. - __ Bnec(AT, cls, &loop); + __ Bnec(AT, cls.AsRegister<GpuRegister>(), &loop); + break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ Bnezc(temp, slow_path->GetEntryLabel()); break; } } @@ -3027,15 +3172,14 @@ void LocationsBuilderMIPS64::VisitClinitCheck(HClinitCheck* check) { if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } + // Rely on the type initialization to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } void InstructionCodeGeneratorMIPS64::VisitClinitCheck(HClinitCheck* check) { // We assume the class is not null. - SlowPathCodeMIPS64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64( - check->GetLoadClass(), - check, - check->GetDexPc(), - true); + SlowPathCodeMIPS64* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).AsRegister<GpuRegister>()); @@ -3193,10 +3337,10 @@ void InstructionCodeGeneratorMIPS64::HandleCondition(HCondition* instruction) { switch (type) { default: // Integer case. - GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ false, locations); + GenerateIntLongCompare(instruction->GetCondition(), /* is64bit= */ false, locations); return; case DataType::Type::kInt64: - GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ true, locations); + GenerateIntLongCompare(instruction->GetCondition(), /* is64bit= */ true, locations); return; case DataType::Type::kFloat32: case DataType::Type::kFloat64: @@ -3498,7 +3642,7 @@ void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instructio if (!DataType::IsIntegralType(type)) { LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck."; - return; + UNREACHABLE(); } if (value.IsConstant()) { @@ -4305,10 +4449,10 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc switch (type) { default: - GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ false, locations, branch_target); + GenerateIntLongCompareAndBranch(if_cond, /* is64bit= */ false, locations, branch_target); break; case DataType::Type::kInt64: - GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ true, locations, branch_target); + GenerateIntLongCompareAndBranch(if_cond, /* is64bit= */ true, locations, branch_target); break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: @@ -4338,7 +4482,7 @@ void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) { nullptr : codegen_->GetLabelOf(true_successor); Mips64Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); - GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); + GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { @@ -4357,9 +4501,9 @@ void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCodeMIPS64* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS64>(deoptimize); GenerateTestAndBranch(deoptimize, - /* condition_input_index */ 0, + /* condition_input_index= */ 0, slow_path->GetEntryLabel(), - /* false_target */ nullptr); + /* false_target= */ nullptr); } // This function returns true if a conditional move can be generated for HSelect. @@ -4373,7 +4517,7 @@ void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { // of common logic. static bool CanMoveConditionally(HSelect* select, LocationSummary* locations_to_set) { bool materialized = IsBooleanValueOrMaterializedCondition(select->GetCondition()); - HInstruction* cond = select->InputAt(/* condition_input_index */ 2); + HInstruction* cond = select->InputAt(/* i= */ 2); HCondition* condition = cond->AsCondition(); DataType::Type cond_type = @@ -4516,7 +4660,7 @@ void InstructionCodeGeneratorMIPS64::GenConditionalMove(HSelect* select) { Location dst = locations->Out(); Location false_src = locations->InAt(0); Location true_src = locations->InAt(1); - HInstruction* cond = select->InputAt(/* condition_input_index */ 2); + HInstruction* cond = select->InputAt(/* i= */ 2); GpuRegister cond_reg = TMP; FpuRegister fcond_reg = FTMP; DataType::Type cond_type = DataType::Type::kInt32; @@ -4524,7 +4668,7 @@ void InstructionCodeGeneratorMIPS64::GenConditionalMove(HSelect* select) { DataType::Type dst_type = select->GetType(); if (IsBooleanValueOrMaterializedCondition(cond)) { - cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<GpuRegister>(); + cond_reg = locations->InAt(/* at= */ 2).AsRegister<GpuRegister>(); } else { HCondition* condition = cond->AsCondition(); LocationSummary* cond_locations = cond->GetLocations(); @@ -4533,13 +4677,13 @@ void InstructionCodeGeneratorMIPS64::GenConditionalMove(HSelect* select) { switch (cond_type) { default: cond_inverted = MaterializeIntLongCompare(if_cond, - /* is64bit */ false, + /* is64bit= */ false, cond_locations, cond_reg); break; case DataType::Type::kInt64: cond_inverted = MaterializeIntLongCompare(if_cond, - /* is64bit */ true, + /* is64bit= */ true, cond_locations, cond_reg); break; @@ -4682,14 +4826,14 @@ void LocationsBuilderMIPS64::VisitSelect(HSelect* select) { } void InstructionCodeGeneratorMIPS64::VisitSelect(HSelect* select) { - if (CanMoveConditionally(select, /* locations_to_set */ nullptr)) { + if (CanMoveConditionally(select, /* locations_to_set= */ nullptr)) { GenConditionalMove(select); } else { LocationSummary* locations = select->GetLocations(); Mips64Label false_target; GenerateTestAndBranch(select, - /* condition_input_index */ 2, - /* true_target */ nullptr, + /* condition_input_index= */ 2, + /* true_target= */ nullptr, &false_target); codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); __ Bind(&false_target); @@ -4801,7 +4945,7 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction, obj, offset, temp_loc, - /* needs_null_check */ true); + /* needs_null_check= */ true); if (is_volatile) { GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -4957,7 +5101,7 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadOneRegister( out_reg, offset, maybe_temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it @@ -4998,7 +5142,7 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters( obj_reg, offset, maybe_temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -5086,7 +5230,7 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(HInstruction* instr __ Daui(base, obj, offset_high); } Mips64Label skip_call; - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); if (label_low != nullptr) { DCHECK(short_offset); __ Bind(label_low); @@ -5216,7 +5360,7 @@ void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in GpuRegister ref_reg = ref.AsRegister<GpuRegister>(); Mips64Label skip_call; if (short_offset) { - __ Beqzc(T9, &skip_call, /* is_bare */ true); + __ Beqzc(T9, &skip_call, /* is_bare= */ true); __ Nop(); // In forbidden slot. __ Jialc(T9, thunk_disp); __ Bind(&skip_call); @@ -5225,7 +5369,7 @@ void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in } else { int16_t offset_low = Low16Bits(offset); int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign extension in lwu. - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); __ Daui(TMP, obj, offset_high); // In delay slot. __ Jialc(T9, thunk_disp); __ Bind(&skip_call); @@ -5298,12 +5442,12 @@ void CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in // We will not do the explicit null check in the thunk as some form of a null check // must've been done earlier. DCHECK(!needs_null_check); - const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset */ false); + const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset= */ false); // Loading the entrypoint does not require a load acquire since it is only changed when // threads are suspended or running a checkpoint. __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); Mips64Label skip_call; - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); GpuRegister ref_reg = ref.AsRegister<GpuRegister>(); GpuRegister index_reg = index.AsRegister<GpuRegister>(); __ Dlsa(TMP, index_reg, obj, scale_factor); // In delay slot. @@ -5414,7 +5558,7 @@ void CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(instruction, ref, obj, - /* field_offset */ index, + /* field_offset= */ index, temp_reg); } else { slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathMIPS64(instruction, ref); @@ -5426,7 +5570,7 @@ void CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // Given the numeric representation, it's enough to check the low bit of the // rb_state. We do that by shifting the bit into the sign bit (31) and // performing a branch on less than zero. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size"); __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift); @@ -5515,6 +5659,8 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -5523,7 +5669,13 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The output does overlap inputs. // Note that TypeCheckSlowPathMIPS64 uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -5535,7 +5687,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); - GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); + Location cls = locations->InAt(1); Location out_loc = locations->Out(); GpuRegister out = out_loc.AsRegister<GpuRegister>(); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -5567,7 +5719,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, read_barrier_option); // Classes must be equal for the instanceof to succeed. - __ Xor(out, out, cls); + __ Xor(out, out, cls.AsRegister<GpuRegister>()); __ Sltiu(out, out, 1); break; } @@ -5594,7 +5746,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqzc(out, &done); - __ Bnec(out, cls, &loop); + __ Bnec(out, cls.AsRegister<GpuRegister>(), &loop); __ LoadConst32(out, 1); break; } @@ -5612,7 +5764,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { // Walk over the class hierarchy to find a match. Mips64Label loop, success; __ Bind(&loop); - __ Beqc(out, cls, &success); + __ Beqc(out, cls.AsRegister<GpuRegister>(), &success); // /* HeapReference<Class> */ out = out->super_class_ GenerateReferenceLoadOneRegister(instruction, out_loc, @@ -5639,7 +5791,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { read_barrier_option); // Do an exact check. Mips64Label success; - __ Beqc(out, cls, &success); + __ Beqc(out, cls.AsRegister<GpuRegister>(), &success); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -5669,9 +5821,9 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { kWithoutReadBarrier); DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); - __ Bnec(out, cls, slow_path->GetEntryLabel()); + __ Bnec(out, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); __ LoadConst32(out, 1); break; } @@ -5698,11 +5850,25 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ Bc(slow_path->GetEntryLabel()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ Sltiu(out, out, 1); + break; + } } __ Bind(&done); @@ -5825,6 +5991,14 @@ void InstructionCodeGeneratorMIPS64::VisitInvokePolymorphic(HInvokePolymorphic* codegen_->GenerateInvokePolymorphicCall(invoke); } +void LocationsBuilderMIPS64::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorMIPS64::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); +} + static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codegen) { if (invoke->GetLocations()->Intrinsified()) { IntrinsicCodeGeneratorMIPS64 intrinsic(codegen); @@ -5839,14 +6013,14 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind( bool fallback_load = false; switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadString::LoadKind::kJitBootImageAddress: case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kRuntimeCall: break; } @@ -5866,14 +6040,14 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadClass::LoadKind::kJitBootImageAddress: case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kRuntimeCall: break; } @@ -5885,7 +6059,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind( HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS64::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { + ArtMethod* method ATTRIBUTE_UNUSED) { // On MIPS64 we support all dispatch types. return desired_dispatch_info; } @@ -5918,23 +6092,32 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall( CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = NewBootImageMethodPatch(invoke->GetTargetMethod(), info_high); EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Daddiu(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678); + __ Daddiu(temp.AsRegister<GpuRegister>(), AT, /* imm16= */ 0x5678); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: - __ LoadLiteral(temp.AsRegister<GpuRegister>(), - kLoadDoubleword, - DeduplicateUint64Literal(invoke->GetMethodAddress())); + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + uint32_t boot_image_offset = GetBootImageOffset(invoke); + PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset); + PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. + __ Lwu(temp.AsRegister<GpuRegister>(), AT, /* imm16= */ 0x5678); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { PcRelativePatchInfo* info_high = NewMethodBssEntryPatch( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); PcRelativePatchInfo* info_low = NewMethodBssEntryPatch( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()), info_high); EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Ld(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678); + __ Ld(temp.AsRegister<GpuRegister>(), AT, /* imm16= */ 0x5678); break; } + case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: + __ LoadLiteral(temp.AsRegister<GpuRegister>(), + kLoadDoubleword, + DeduplicateUint64Literal(invoke->GetMethodAddress())); + break; case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); return; // No code pointer retrieval; the runtime performs the call directly. @@ -6048,10 +6231,7 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { if (load_kind == HLoadClass::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barriers we have a temp-clobbering call. } @@ -6100,33 +6280,18 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Daddiu(out, AT, /* placeholder */ 0x5678); - break; - } - case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(cls->GetClass().Get())); - DCHECK_NE(address, 0u); - __ LoadLiteral(out, - kLoadUnsignedWord, - codegen_->DeduplicateBootImageAddressLiteral(address)); + __ Daddiu(out, AT, /* imm16= */ 0x5678); break; } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageRelRoPatch(boot_image_offset); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Lwu(out, AT, /* placeholder */ 0x5678); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); - if (masked_hash != 0) { - __ Daddiu(out, out, -masked_hash); - } + __ Lwu(out, AT, /* imm16= */ 0x5678); break; } case HLoadClass::LoadKind::kBssEntry: { @@ -6138,12 +6303,21 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S GenerateGcRootFieldLoad(cls, out_loc, out, - /* placeholder */ 0x5678, + /* offset= */ 0x5678, read_barrier_option, &info_low->label); generate_null_check = true; break; } + case HLoadClass::LoadKind::kJitBootImageAddress: { + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); + DCHECK_NE(address, 0u); + __ LoadLiteral(out, + kLoadUnsignedWord, + codegen_->DeduplicateBootImageAddressLiteral(address)); + break; + } case HLoadClass::LoadKind::kJitTableAddress: __ LoadLiteral(out, kLoadUnsignedWord, @@ -6160,8 +6334,8 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - SlowPathCodeMIPS64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + SlowPathCodeMIPS64* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ Beqzc(out, slow_path->GetEntryLabel()); @@ -6174,6 +6348,26 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S } } +void LocationsBuilderMIPS64::VisitLoadMethodHandle(HLoadMethodHandle* load) { + InvokeRuntimeCallingConvention calling_convention; + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, loc, loc); +} + +void InstructionCodeGeneratorMIPS64::VisitLoadMethodHandle(HLoadMethodHandle* load) { + codegen_->GenerateLoadMethodHandleRuntimeCall(load); +} + +void LocationsBuilderMIPS64::VisitLoadMethodType(HLoadMethodType* load) { + InvokeRuntimeCallingConvention calling_convention; + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, loc, loc); +} + +void InstructionCodeGeneratorMIPS64::VisitLoadMethodType(HLoadMethodType* load) { + codegen_->GenerateLoadMethodTypeRuntimeCall(load); +} + static int32_t GetExceptionTlsOffset() { return Thread::ExceptionOffset<kMips64PointerSize>().Int32Value(); } @@ -6209,10 +6403,7 @@ void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) { if (load_kind == HLoadString::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString and marking to save everything we need. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barriers we have a temp-clobbering call. } @@ -6236,30 +6427,21 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Daddiu(out, AT, /* placeholder */ 0x5678); - return; - } - case HLoadString::LoadKind::kBootImageAddress: { - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(load->GetString().Get())); - DCHECK_NE(address, 0u); - __ LoadLiteral(out, - kLoadUnsignedWord, - codegen_->DeduplicateBootImageAddressLiteral(address)); + __ Daddiu(out, AT, /* imm16= */ 0x5678); return; } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(load); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageRelRoPatch(boot_image_offset); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Lwu(out, AT, /* placeholder */ 0x5678); + __ Lwu(out, AT, /* imm16= */ 0x5678); return; } case HLoadString::LoadKind::kBssEntry: { - DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = @@ -6268,7 +6450,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA GenerateGcRootFieldLoad(load, out_loc, out, - /* placeholder */ 0x5678, + /* offset= */ 0x5678, kCompilerReadBarrierOption, &info_low->label); SlowPathCodeMIPS64* slow_path = @@ -6278,6 +6460,14 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA __ Bind(slow_path->GetExitLabel()); return; } + case HLoadString::LoadKind::kJitBootImageAddress: { + uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); + DCHECK_NE(address, 0u); + __ LoadLiteral(out, + kLoadUnsignedWord, + codegen_->DeduplicateBootImageAddressLiteral(address)); + return; + } case HLoadString::LoadKind::kJitTableAddress: __ LoadLiteral(out, kLoadUnsignedWord, @@ -6442,10 +6632,8 @@ void LocationsBuilderMIPS64::VisitNewArray(HNewArray* instruction) { } void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) { - // Note: if heap poisoning is enabled, the entry point takes care - // of poisoning the reference. - QuickEntrypointEnum entrypoint = - CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); + // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. + QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); DCHECK(!codegen_->IsLeafMethod()); @@ -6455,31 +6643,13 @@ void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; - if (instruction->IsStringAlloc()) { - locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); - } else { - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - } + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); } void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) { - // Note: if heap poisoning is enabled, the entry point takes care - // of poisoning the reference. - if (instruction->IsStringAlloc()) { - // String is allocated through StringFactory. Call NewEmptyString entry point. - GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>(); - MemberOffset code_offset = - ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64PointerSize); - __ LoadFromOffset(kLoadDoubleword, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString)); - __ LoadFromOffset(kLoadDoubleword, T9, temp, code_offset.Int32Value()); - __ Jalr(T9); - __ Nop(); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); - } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); - } + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } void LocationsBuilderMIPS64::VisitNot(HNot* instruction) { @@ -6665,6 +6835,236 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS64::GenerateMinMaxInt(LocationSummary* locations, bool is_min) { + GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (lhs == rhs) { + if (out != lhs) { + __ Move(out, lhs); + } + } else { + // Some architectures, such as ARM and MIPS (prior to r6), have a + // conditional move instruction which only changes the target + // (output) register if the condition is true (MIPS prior to r6 had + // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always + // change the target (output) register. If the condition is true the + // output register gets the contents of the "rs" register; otherwise, + // the output register is set to zero. One consequence of this is + // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6 + // needs to use a pair of SELEQZ/SELNEZ instructions. After + // executing this pair of instructions one of the output registers + // from the pair will necessarily contain zero. Then the code ORs the + // output registers from the SELEQZ/SELNEZ instructions to get the + // final result. + // + // The initial test to see if the output register is same as the + // first input register is needed to make sure that value in the + // first input register isn't clobbered before we've finished + // computing the output value. The logic in the corresponding else + // clause performs the same task but makes sure the second input + // register isn't clobbered in the event that it's the same register + // as the output register; the else clause also handles the case + // where the output register is distinct from both the first, and the + // second input registers. + if (out == lhs) { + __ Slt(AT, rhs, lhs); + if (is_min) { + __ Seleqz(out, lhs, AT); + __ Selnez(AT, rhs, AT); + } else { + __ Selnez(out, lhs, AT); + __ Seleqz(AT, rhs, AT); + } + } else { + __ Slt(AT, lhs, rhs); + if (is_min) { + __ Seleqz(out, rhs, AT); + __ Selnez(AT, lhs, AT); + } else { + __ Selnez(out, rhs, AT); + __ Seleqz(AT, lhs, AT); + } + } + __ Or(out, out, AT); + } +} + +void InstructionCodeGeneratorMIPS64::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + Mips64Label noNaNs; + Mips64Label done; + FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; + + // When Java computes min/max it prefers a NaN to a number; the + // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of + // the inputs is a NaN and the other is a valid number, the MIPS + // instruction will return the number; Java wants the NaN value + // returned. This is why there is extra logic preceding the use of + // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a + // NaN, return the NaN, otherwise return the min/max. + if (type == DataType::Type::kFloat64) { + __ CmpUnD(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqD(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelD(ftmp, a, b); + + if (ftmp != out) { + __ MovD(out, ftmp); + } + + __ Bc(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinD(out, a, b); + } else { + __ MaxD(out, a, b); + } + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CmpUnS(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqS(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelS(ftmp, a, b); + + if (ftmp != out) { + __ MovS(out, ftmp); + } + + __ Bc(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinS(out, a, b); + } else { + __ MaxS(out, a, b); + } + } + + __ Bind(&done); +} + +void InstructionCodeGeneratorMIPS64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderMIPS64::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorMIPS64::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderMIPS64::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorMIPS64::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderMIPS64::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS64::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + __ Sra(AT, in, 31); + __ Xor(out, in, AT); + __ Subu(out, out, AT); + break; + } + case DataType::Type::kInt64: { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + __ Dsra32(AT, in, 31); + __ Xor(out, in, AT); + __ Dsubu(out, out, AT); + break; + } + case DataType::Type::kFloat32: { + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + __ AbsS(out, in); + break; + } + case DataType::Type::kFloat64: { + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + __ AbsD(out, in); + break; + } + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + void LocationsBuilderMIPS64::VisitConstructorFence(HConstructorFence* constructor_fence) { constructor_fence->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index e6b69c469f..52f3a62f33 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -79,9 +79,9 @@ class InvokeDexCallingConventionVisitorMIPS64 : public InvokeDexCallingConventio InvokeDexCallingConventionVisitorMIPS64() {} virtual ~InvokeDexCallingConventionVisitorMIPS64() {} - Location GetNextLocation(DataType::Type type) OVERRIDE; - Location GetReturnLocation(DataType::Type type) const OVERRIDE; - Location GetMethodLocation() const OVERRIDE; + Location GetNextLocation(DataType::Type type) override; + Location GetReturnLocation(DataType::Type type) const override; + Location GetMethodLocation() const override; private: InvokeDexCallingConvention calling_convention; @@ -108,22 +108,22 @@ class FieldAccessCallingConventionMIPS64 : public FieldAccessCallingConvention { public: FieldAccessCallingConventionMIPS64() {} - Location GetObjectLocation() const OVERRIDE { + Location GetObjectLocation() const override { return Location::RegisterLocation(A1); } - Location GetFieldIndexLocation() const OVERRIDE { + Location GetFieldIndexLocation() const override { return Location::RegisterLocation(A0); } - Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { return Location::RegisterLocation(V0); } Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, - bool is_instance) const OVERRIDE { + bool is_instance) const override { return is_instance ? Location::RegisterLocation(A2) : Location::RegisterLocation(A1); } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { return Location::FpuRegisterLocation(F0); } @@ -136,10 +136,10 @@ class ParallelMoveResolverMIPS64 : public ParallelMoveResolverWithSwap { ParallelMoveResolverMIPS64(ArenaAllocator* allocator, CodeGeneratorMIPS64* codegen) : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} - void EmitMove(size_t index) OVERRIDE; - void EmitSwap(size_t index) OVERRIDE; - void SpillScratch(int reg) OVERRIDE; - void RestoreScratch(int reg) OVERRIDE; + void EmitMove(size_t index) override; + void EmitSwap(size_t index) override; + void SpillScratch(int reg) override; + void RestoreScratch(int reg) override; void Exchange(int index1, int index2, bool double_slot); void ExchangeQuadSlots(int index1, int index2); @@ -173,14 +173,14 @@ class LocationsBuilderMIPS64 : public HGraphVisitor { : HGraphVisitor(graph), codegen_(codegen) {} #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -207,14 +207,14 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen); #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -233,6 +233,7 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, GpuRegister temp); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* operation); void HandleCondition(HCondition* instruction); @@ -242,6 +243,10 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -347,36 +352,35 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { class CodeGeneratorMIPS64 : public CodeGenerator { public: CodeGeneratorMIPS64(HGraph* graph, - const Mips64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorMIPS64() {} - void GenerateFrameEntry() OVERRIDE; - void GenerateFrameExit() OVERRIDE; + void GenerateFrameEntry() override; + void GenerateFrameExit() override; - void Bind(HBasicBlock* block) OVERRIDE; + void Bind(HBasicBlock* block) override; - size_t GetWordSize() const OVERRIDE { return kMips64DoublewordSize; } + size_t GetWordSize() const override { return kMips64DoublewordSize; } - size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + size_t GetFloatingPointSpillSlotSize() const override { return GetGraph()->HasSIMD() ? 2 * kMips64DoublewordSize // 16 bytes for each spill. : 1 * kMips64DoublewordSize; // 8 bytes for each spill. } - uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE { + uintptr_t GetAddressOf(HBasicBlock* block) override { return assembler_.GetLabelLocation(GetLabelOf(block)); } - HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } - HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; } - Mips64Assembler* GetAssembler() OVERRIDE { return &assembler_; } - const Mips64Assembler& GetAssembler() const OVERRIDE { return assembler_; } + HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } + HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; } + Mips64Assembler* GetAssembler() override { return &assembler_; } + const Mips64Assembler& GetAssembler() const override { return assembler_; } // Emit linker patches. - void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; - void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; + void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. @@ -467,42 +471,40 @@ class CodeGeneratorMIPS64 : public CodeGenerator { // Register allocation. - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters() const override; - size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; - void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; - void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + void DumpCoreRegister(std::ostream& stream, int reg) const override; + void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; - InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kMips64; } + InstructionSet GetInstructionSet() const override { return InstructionSet::kMips64; } - const Mips64InstructionSetFeatures& GetInstructionSetFeatures() const { - return isa_features_; - } + const Mips64InstructionSetFeatures& GetInstructionSetFeatures() const; Mips64Label* GetLabelOf(HBasicBlock* block) const { return CommonGetLabelOf<Mips64Label>(block_labels_, block); } - void Initialize() OVERRIDE { + void Initialize() override { block_labels_ = CommonInitializeLabels<Mips64Label>(); } // We prefer aligned loads and stores (less code), so spill and restore registers in slow paths // at aligned locations. - uint32_t GetPreferredSlotsAlignment() const OVERRIDE { return kMips64DoublewordSize; } + uint32_t GetPreferredSlotsAlignment() const override { return kMips64DoublewordSize; } - void Finalize(CodeAllocator* allocator) OVERRIDE; + void Finalize(CodeAllocator* allocator) override; // Code generation helpers. - void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; - void MoveConstant(Location destination, int32_t value) OVERRIDE; + void MoveConstant(Location destination, int32_t value) override; - void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + void AddLocationAsTemp(Location location, LocationSummary* locations) override; void SwapLocations(Location loc1, Location loc2, DataType::Type type); @@ -511,7 +513,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path = nullptr) OVERRIDE; + SlowPathCode* slow_path = nullptr) override; // Generate code to invoke a runtime entry point, but do not record // PC-related information in a stack map. @@ -521,39 +523,39 @@ class CodeGeneratorMIPS64 : public CodeGenerator { void GenerateInvokeRuntime(int32_t entry_point_offset); - ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; } + ParallelMoveResolver* GetMoveResolver() override { return &move_resolver_; } - bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return false; } + bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override { return false; } // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadString::LoadKind GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + HLoadString::LoadKind desired_string_load_kind) override; // Check if the desired_class_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadClass::LoadKind GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + HLoadClass::LoadKind desired_class_load_kind) override; // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) OVERRIDE; + ArtMethod* method) override; void GenerateStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; void GenerateVirtualCall( - HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, - DataType::Type type ATTRIBUTE_UNUSED) OVERRIDE { + DataType::Type type ATTRIBUTE_UNUSED) override { UNIMPLEMENTED(FATAL) << "Not implemented on MIPS64"; } - void GenerateNop() OVERRIDE; - void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; - void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateNop() override; + void GenerateImplicitNullCheck(HNullCheck* instruction) override; + void GenerateExplicitNullCheck(HNullCheck* instruction) override; // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. @@ -586,6 +588,10 @@ class CodeGeneratorMIPS64 : public CodeGenerator { DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); }; + PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, @@ -608,6 +614,9 @@ class CodeGeneratorMIPS64 : public CodeGenerator { GpuRegister out, PcRelativePatchInfo* info_low = nullptr); + void LoadBootImageAddress(GpuRegister reg, uint32_t boot_image_reference); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); + void PatchJitRootUse(uint8_t* code, const uint8_t* roots_data, const Literal* literal, @@ -648,14 +657,14 @@ class CodeGeneratorMIPS64 : public CodeGenerator { InstructionCodeGeneratorMIPS64 instruction_visitor_; ParallelMoveResolverMIPS64 move_resolver_; Mips64Assembler assembler_; - const Mips64InstructionSetFeatures& isa_features_; // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; // Deduplication map for 64-bit literals, used for non-patchable method address or method code // address. Uint64ToLiteralMap uint64_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; @@ -663,10 +672,12 @@ class CodeGeneratorMIPS64 : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_; // Patches for string root accesses in JIT compiled code. StringToLiteralMap jit_string_patches_; diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 1cfdf54816..df95c88c07 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -16,6 +16,7 @@ #include "code_generator_arm64.h" +#include "arch/arm64/instruction_set_features_arm64.h" #include "mirror/array-inl.h" #include "mirror/string.h" @@ -29,7 +30,7 @@ using helpers::Arm64CanEncodeConstantAsImmediate; using helpers::DRegisterFrom; using helpers::HeapOperand; using helpers::InputRegisterAt; -using helpers::Int64ConstantFrom; +using helpers::Int64FromLocation; using helpers::OutputRegister; using helpers::VRegisterFrom; using helpers::WRegisterFrom; @@ -37,6 +38,15 @@ using helpers::XRegisterFrom; #define __ GetVIXLAssembler()-> +// Build-time switch for Armv8.4-a dot product instructions. +// TODO: Enable dot product when there is a device to test it on. +static constexpr bool kArm64EmitDotProdInstructions = false; + +// Returns whether dot product instructions should be emitted. +static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) { + return kArm64EmitDotProdInstructions && codegen_->GetInstructionSetFeatures().HasDotProd(); +} + void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); HInstruction* input = instruction->InputAt(0); @@ -63,7 +73,7 @@ void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruc } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -78,7 +88,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { - __ Movi(dst.V16B(), Int64ConstantFrom(src_loc)); + __ Movi(dst.V16B(), Int64FromLocation(src_loc)); } else { __ Dup(dst.V16B(), InputRegisterAt(instruction, 0)); } @@ -87,7 +97,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { - __ Movi(dst.V8H(), Int64ConstantFrom(src_loc)); + __ Movi(dst.V8H(), Int64FromLocation(src_loc)); } else { __ Dup(dst.V8H(), InputRegisterAt(instruction, 0)); } @@ -95,7 +105,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { - __ Movi(dst.V4S(), Int64ConstantFrom(src_loc)); + __ Movi(dst.V4S(), Int64FromLocation(src_loc)); } else { __ Dup(dst.V4S(), InputRegisterAt(instruction, 0)); } @@ -103,7 +113,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { - __ Movi(dst.V2D(), Int64ConstantFrom(src_loc)); + __ Movi(dst.V2D(), Int64FromLocation(src_loc)); } else { __ Dup(dst.V2D(), XRegisterFrom(src_loc)); } @@ -125,7 +135,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -149,7 +159,7 @@ void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -173,7 +183,7 @@ void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* ins DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -200,7 +210,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -216,7 +226,7 @@ void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) { switch (instruction->GetPackedType()) { case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ Addv(dst.S(), src.V4S()); break; @@ -230,7 +240,7 @@ void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) { break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ Addp(dst.D(), src.V2D()); break; @@ -240,7 +250,7 @@ void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -259,7 +269,7 @@ void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Scvtf(dst.V4S(), src.V4S()); } else { - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } } @@ -299,7 +309,7 @@ void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) { __ Fneg(dst.V2D(), src.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -338,7 +348,7 @@ void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) { __ Fabs(dst.V2D(), src.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -366,7 +376,7 @@ void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) { __ Not(dst.V16B(), src.V16B()); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -389,7 +399,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -431,7 +441,39 @@ void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) { __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Uqadd(dst.V16B(), lhs.V16B(), rhs.V16B()); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Sqadd(dst.V16B(), lhs.V16B(), rhs.V16B()); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Uqadd(dst.V8H(), lhs.V8H(), rhs.V8H()); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Sqadd(dst.V8H(), lhs.V8H(), rhs.V8H()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -471,7 +513,7 @@ void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instructi : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -513,7 +555,39 @@ void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) { __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Uqsub(dst.V16B(), lhs.V16B(), rhs.V16B()); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Sqsub(dst.V16B(), lhs.V16B(), rhs.V16B()); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Uqsub(dst.V8H(), lhs.V8H(), rhs.V8H()); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Sqsub(dst.V8H(), lhs.V8H(), rhs.V8H()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -551,7 +625,7 @@ void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) { __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -575,7 +649,7 @@ void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) { __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -623,7 +697,7 @@ void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) { __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -671,7 +745,7 @@ void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) { __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -699,7 +773,7 @@ void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) { __ And(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -735,7 +809,7 @@ void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) { __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -762,7 +836,7 @@ void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) { __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -782,7 +856,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -816,7 +890,7 @@ void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) { __ Shl(dst.V2D(), lhs.V2D(), value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -850,7 +924,7 @@ void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) { __ Sshr(dst.V2D(), lhs.V2D(), value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -884,7 +958,7 @@ void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) { __ Ushr(dst.V2D(), lhs.V2D(), value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -916,7 +990,7 @@ void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -957,7 +1031,7 @@ void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instructi __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -978,7 +1052,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1026,7 +1100,7 @@ void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccum } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1139,7 +1213,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1167,7 +1241,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1188,7 +1262,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins __ Sabal2(acc.V2D(), left.V4S(), right.V4S()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1204,12 +1278,88 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + } +} + +void LocationsBuilderARM64::VisitVecDotProd(HVecDotProd* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + DCHECK(instruction->GetPackedType() == DataType::Type::kInt32); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + + // For Int8 and Uint8 general case we need a temp register. + if ((DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) && + !ShouldEmitDotProductInstructions(codegen_)) { + locations->AddTemp(Location::RequiresFpuRegister()); + } +} + +void InstructionCodeGeneratorARM64::VisitVecDotProd(HVecDotProd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + VRegister acc = VRegisterFrom(locations->InAt(0)); + VRegister left = VRegisterFrom(locations->InAt(1)); + VRegister right = VRegisterFrom(locations->InAt(2)); + HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); + HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); + DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), + HVecOperation::ToSignedType(b->GetPackedType())); + DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32); + DCHECK_EQ(4u, instruction->GetVectorLength()); + + size_t inputs_data_size = DataType::Size(a->GetPackedType()); + switch (inputs_data_size) { + case 1u: { + DCHECK_EQ(16u, a->GetVectorLength()); + if (instruction->IsZeroExtending()) { + if (ShouldEmitDotProductInstructions(codegen_)) { + __ Udot(acc.V4S(), left.V16B(), right.V16B()); + } else { + VRegister tmp = VRegisterFrom(locations->GetTemp(0)); + __ Umull(tmp.V8H(), left.V8B(), right.V8B()); + __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H()); + __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + + __ Umull2(tmp.V8H(), left.V16B(), right.V16B()); + __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H()); + __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + } + } else { + if (ShouldEmitDotProductInstructions(codegen_)) { + __ Sdot(acc.V4S(), left.V16B(), right.V16B()); + } else { + VRegister tmp = VRegisterFrom(locations->GetTemp(0)); + __ Smull(tmp.V8H(), left.V8B(), right.V8B()); + __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H()); + __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + + __ Smull2(tmp.V8H(), left.V16B(), right.V16B()); + __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H()); + __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + } + } + break; + } + case 2u: + DCHECK_EQ(8u, a->GetVectorLength()); + if (instruction->IsZeroExtending()) { + __ Umlal(acc.V4S(), left.V4H(), right.V4H()); + __ Umlal2(acc.V4S(), left.V8H(), right.V8H()); + } else { + __ Smlal(acc.V4S(), left.V4H(), right.V4H()); + __ Smlal2(acc.V4S(), left.V8H(), right.V8H()); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size; } } @@ -1237,7 +1387,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1269,7 +1419,7 @@ MemOperand InstructionCodeGeneratorARM64::VecAddress( DCHECK(!instruction->InputAt(0)->IsIntermediateAddress()); if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << shift; + offset += Int64FromLocation(index) << shift; return HeapOperand(base, offset); } else { *scratch = temps_scope->AcquireSameSizeAs(base); @@ -1331,7 +1481,7 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1362,7 +1512,7 @@ void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) { __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index 7c3155ab73..b092961a56 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -46,7 +46,7 @@ void LocationsBuilderARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instr locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -71,7 +71,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScala __ Vdup(Untyped32, dst, InputRegisterAt(instruction, 0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -84,7 +84,7 @@ void LocationsBuilderARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instructi locations->SetOut(Location::RequiresRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -98,7 +98,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecExtractScalar(HVecExtractScalar* i __ Vmov(OutputRegister(instruction), DRegisterLane(src, 0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -122,7 +122,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -138,7 +138,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReduce(HVecReduce* instruction) { switch (instruction->GetPackedType()) { case DataType::Type::kInt32: DCHECK_EQ(2u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ Vpadd(DataTypeValue::I32, dst, src, src); break; @@ -151,7 +151,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReduce(HVecReduce* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -188,7 +188,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) { __ Vneg(DataTypeValue::S32, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -215,7 +215,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAbs(HVecAbs* instruction) { __ Vabs(DataTypeValue::S32, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -242,7 +242,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecNot(HVecNot* instruction) { __ Vmvn(I8, dst, src); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -262,7 +262,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -292,7 +292,39 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) { __ Vadd(I32, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vqadd(DataTypeValue::U8, dst, lhs, rhs); + break; + case DataType::Type::kInt8: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vqadd(DataTypeValue::S8, dst, lhs, rhs); + break; + case DataType::Type::kUint16: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vqadd(DataTypeValue::U16, dst, lhs, rhs); + break; + case DataType::Type::kInt16: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vqadd(DataTypeValue::S16, dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -332,7 +364,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruc : __ Vhadd(DataTypeValue::S16, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -362,7 +394,39 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSub(HVecSub* instruction) { __ Vsub(I32, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vqsub(DataTypeValue::U8, dst, lhs, rhs); + break; + case DataType::Type::kInt8: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vqsub(DataTypeValue::S8, dst, lhs, rhs); + break; + case DataType::Type::kUint16: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vqsub(DataTypeValue::U16, dst, lhs, rhs); + break; + case DataType::Type::kInt16: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vqsub(DataTypeValue::S16, dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -392,7 +456,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMul(HVecMul* instruction) { __ Vmul(I32, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -440,7 +504,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) { __ Vmin(DataTypeValue::S32, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -480,7 +544,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) { __ Vmax(DataTypeValue::S32, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -505,7 +569,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAnd(HVecAnd* instruction) { __ Vand(I8, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -537,7 +601,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecOr(HVecOr* instruction) { __ Vorr(I8, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -561,7 +625,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecXor(HVecXor* instruction) { __ Veor(I8, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -580,7 +644,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -610,7 +674,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecShl(HVecShl* instruction) { __ Vshl(I32, dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -640,7 +704,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecShr(HVecShr* instruction) { __ Vshr(DataTypeValue::S32, dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -670,7 +734,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) { __ Vshr(DataTypeValue::U32, dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -690,7 +754,7 @@ void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -716,7 +780,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruc __ Vmov(Untyped32, DRegisterLane(dst, 0), InputRegisterAt(instruction, 0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -737,7 +801,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -780,16 +844,24 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* i break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderARMVIXL::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Return whether the vector memory access operation is guaranteed to be word-aligned (ARM word // size equals to 4). static bool IsWordAligned(HVecMemoryOperation* instruction) { @@ -817,7 +889,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -923,7 +995,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -971,7 +1043,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc index ed9de96496..4e9ba0d3d2 100644 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -42,7 +42,7 @@ void LocationsBuilderMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruct locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -74,22 +74,22 @@ void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar* __ InsertW(static_cast<VectorRegister>(FTMP), locations->InAt(0).AsRegisterPairHigh<Register>(), 1); - __ ReplicateFPToVectorRegister(dst, FTMP, /* is_double */ true); + __ ReplicateFPToVectorRegister(dst, FTMP, /* is_double= */ true); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ ReplicateFPToVectorRegister(dst, locations->InAt(0).AsFpuRegister<FRegister>(), - /* is_double */ false); + /* is_double= */ false); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ ReplicateFPToVectorRegister(dst, locations->InAt(0).AsFpuRegister<FRegister>(), - /* is_double */ true); + /* is_double= */ true); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -113,7 +113,7 @@ void LocationsBuilderMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -138,7 +138,7 @@ void InstructionCodeGeneratorMIPS::VisitVecExtractScalar(HVecExtractScalar* inst DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -170,7 +170,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation : Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -187,7 +187,7 @@ void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) { switch (instruction->GetPackedType()) { case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ Hadd_sD(tmp, src, src); __ IlvlD(dst, tmp, tmp); @@ -209,7 +209,7 @@ void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) { break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ IlvlD(dst, src, src); __ AddvD(dst, dst, src); @@ -225,7 +225,7 @@ void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -244,7 +244,7 @@ void InstructionCodeGeneratorMIPS::VisitVecCnv(HVecCnv* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Ffint_sW(dst, src); } else { - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } } @@ -290,7 +290,7 @@ void InstructionCodeGeneratorMIPS::VisitVecNeg(HVecNeg* instruction) { __ FsubD(dst, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -337,7 +337,7 @@ void InstructionCodeGeneratorMIPS::VisitVecAbs(HVecAbs* instruction) { __ AndV(dst, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -369,7 +369,7 @@ void InstructionCodeGeneratorMIPS::VisitVecNot(HVecNot* instruction) { __ NorV(dst, src, src); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -392,7 +392,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -434,11 +434,19 @@ void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) { __ FaddD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderMIPS::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LOG(FATAL) << "Unsupported SIMD " << instruction->GetId(); +} + void LocationsBuilderMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } @@ -474,7 +482,7 @@ void InstructionCodeGeneratorMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instructio : __ Ave_sH(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -516,11 +524,19 @@ void InstructionCodeGeneratorMIPS::VisitVecSub(HVecSub* instruction) { __ FsubD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderMIPS::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LOG(FATAL) << "Unsupported SIMD " << instruction->GetId(); +} + void LocationsBuilderMIPS::VisitVecMul(HVecMul* instruction) { CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } @@ -558,7 +574,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMul(HVecMul* instruction) { __ FmulD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -582,7 +598,7 @@ void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) { __ FdivD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -640,7 +656,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) { __ FminD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -698,7 +714,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) { __ FmaxD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -727,7 +743,7 @@ void InstructionCodeGeneratorMIPS::VisitVecAnd(HVecAnd* instruction) { __ AndV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -764,7 +780,7 @@ void InstructionCodeGeneratorMIPS::VisitVecOr(HVecOr* instruction) { __ OrV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -793,7 +809,7 @@ void InstructionCodeGeneratorMIPS::VisitVecXor(HVecXor* instruction) { __ XorV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -813,7 +829,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -847,7 +863,7 @@ void InstructionCodeGeneratorMIPS::VisitVecShl(HVecShl* instruction) { __ SlliD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -881,7 +897,7 @@ void InstructionCodeGeneratorMIPS::VisitVecShr(HVecShr* instruction) { __ SraiD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -915,7 +931,7 @@ void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) { __ SrliD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -947,7 +963,7 @@ void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -989,7 +1005,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instructio __ InsertW(dst, locations->InAt(0).AsRegisterPairHigh<Register>(), 1); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1010,7 +1026,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1060,7 +1076,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumu } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1162,7 +1178,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1201,7 +1217,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1231,7 +1247,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1247,17 +1263,25 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderMIPS::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, @@ -1282,7 +1306,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1320,7 +1344,7 @@ int32_t InstructionCodeGeneratorMIPS::VecAddress(LocationSummary* locations, } void LocationsBuilderMIPS::VisitVecLoad(HVecLoad* instruction) { - CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load */ true); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load= */ true); } void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) { @@ -1357,13 +1381,13 @@ void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) { __ LdD(reg, base, offset); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } void LocationsBuilderMIPS::VisitVecStore(HVecStore* instruction) { - CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load */ false); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load= */ false); } void InstructionCodeGeneratorMIPS::VisitVecStore(HVecStore* instruction) { @@ -1395,7 +1419,7 @@ void InstructionCodeGeneratorMIPS::VisitVecStore(HVecStore* instruction) { __ StD(reg, base, offset); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index 9ea55ec8d7..6467d3e27f 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -47,7 +47,7 @@ void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instru locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -79,16 +79,16 @@ void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar DCHECK_EQ(4u, instruction->GetVectorLength()); __ ReplicateFPToVectorRegister(dst, locations->InAt(0).AsFpuRegister<FpuRegister>(), - /* is_double */ false); + /* is_double= */ false); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ ReplicateFPToVectorRegister(dst, locations->InAt(0).AsFpuRegister<FpuRegister>(), - /* is_double */ true); + /* is_double= */ true); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -112,7 +112,7 @@ void LocationsBuilderMIPS64::VisitVecExtractScalar(HVecExtractScalar* instructio locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -136,7 +136,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecExtractScalar(HVecExtractScalar* in DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -168,7 +168,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation : Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -185,7 +185,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) { switch (instruction->GetPackedType()) { case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ Hadd_sD(tmp, src, src); __ IlvlD(dst, tmp, tmp); @@ -207,7 +207,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) { break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ IlvlD(dst, src, src); __ AddvD(dst, dst, src); @@ -223,7 +223,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -242,7 +242,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Ffint_sW(dst, src); } else { - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -289,7 +289,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) { __ FsubD(dst, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -336,7 +336,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) { __ AndV(dst, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -368,7 +368,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) { __ NorV(dst, src, src); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -391,7 +391,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -433,11 +433,19 @@ void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) { __ FaddD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderMIPS64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LOG(FATAL) << "Unsupported SIMD " << instruction->GetId(); +} + void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } @@ -473,7 +481,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruct : __ Ave_sH(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -515,11 +523,19 @@ void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) { __ FsubD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderMIPS64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LOG(FATAL) << "Unsupported SIMD " << instruction->GetId(); +} + void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) { CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } @@ -557,7 +573,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) { __ FmulD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -581,7 +597,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) { __ FdivD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -639,7 +655,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) { __ FminD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -697,7 +713,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) { __ FmaxD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -726,7 +742,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) { __ AndV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -763,7 +779,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) { __ OrV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -792,7 +808,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) { __ XorV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -812,7 +828,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -846,7 +862,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) { __ SlliD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -880,7 +896,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) { __ SraiD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -914,7 +930,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) { __ SrliD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -946,7 +962,7 @@ void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -987,7 +1003,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruct __ InsertD(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1008,7 +1024,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1058,7 +1074,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccu } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1160,7 +1176,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1199,7 +1215,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1229,7 +1245,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1245,17 +1261,25 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderMIPS64::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS64::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, @@ -1280,7 +1304,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1318,7 +1342,7 @@ int32_t InstructionCodeGeneratorMIPS64::VecAddress(LocationSummary* locations, } void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) { - CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load */ true); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load= */ true); } void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) { @@ -1355,13 +1379,13 @@ void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) { __ LdD(reg, base, offset); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } void LocationsBuilderMIPS64::VisitVecStore(HVecStore* instruction) { - CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load */ false); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load= */ false); } void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) { @@ -1393,7 +1417,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) { __ StD(reg, base, offset); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 4945328e2b..0ee00356b9 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -54,7 +54,7 @@ void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instructi : Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -111,7 +111,7 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i __ shufpd(dst, dst, Immediate(0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -138,7 +138,7 @@ void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction) locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -152,7 +152,7 @@ void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instr case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: // TODO: up to here, and? - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); case DataType::Type::kInt32: DCHECK_LE(4u, instruction->GetVectorLength()); @@ -174,7 +174,7 @@ void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instr DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -196,7 +196,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -205,8 +205,8 @@ void LocationsBuilderX86::VisitVecReduce(HVecReduce* instruction) { CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); // Long reduction or min/max require a temporary. if (instruction->GetPackedType() == DataType::Type::kInt64 || - instruction->GetKind() == HVecReduce::kMin || - instruction->GetKind() == HVecReduce::kMax) { + instruction->GetReductionKind() == HVecReduce::kMin || + instruction->GetReductionKind() == HVecReduce::kMax) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); } } @@ -218,38 +218,23 @@ void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) { switch (instruction->GetPackedType()) { case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ movaps(dst, src); __ phaddd(dst, dst); __ phaddd(dst, dst); break; - case HVecReduce::kMin: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - __ movaps(tmp, src); - __ movaps(dst, src); - __ psrldq(tmp, Immediate(8)); - __ pminsd(dst, tmp); - __ psrldq(tmp, Immediate(4)); - __ pminsd(dst, tmp); - break; - } - case HVecReduce::kMax: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - __ movaps(tmp, src); - __ movaps(dst, src); - __ psrldq(tmp, Immediate(8)); - __ pmaxsd(dst, tmp); - __ psrldq(tmp, Immediate(4)); - __ pmaxsd(dst, tmp); - break; - } + case HVecReduce::kMin: + case HVecReduce::kMax: + // Historical note: We've had a broken implementation here. b/117863065 + // Do not draw on the old code if we ever want to bring MIN/MAX reduction back. + LOG(FATAL) << "Unsupported reduction type."; } break; case DataType::Type::kInt64: { DCHECK_EQ(2u, instruction->GetVectorLength()); XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ movaps(tmp, src); __ movaps(dst, src); @@ -258,12 +243,12 @@ void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) { break; case HVecReduce::kMin: case HVecReduce::kMax: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -282,7 +267,7 @@ void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ cvtdq2ps(dst, src); } else { - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } } @@ -328,7 +313,7 @@ void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) { __ subpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -369,7 +354,7 @@ void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) { __ andpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -418,7 +403,7 @@ void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) { __ xorpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -441,7 +426,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -483,7 +468,39 @@ void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) { __ addpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ paddusb(dst, src); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ paddsb(dst, src); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ paddusw(dst, src); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ paddsw(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -503,14 +520,14 @@ void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction switch (instruction->GetPackedType()) { case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - __ pavgb(dst, src); - return; + __ pavgb(dst, src); + break; case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ pavgw(dst, src); - return; + break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -552,7 +569,39 @@ void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) { __ subpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ psubusb(dst, src); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ psubsb(dst, src); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psubusw(dst, src); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psubsw(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -585,7 +634,7 @@ void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) { __ mulpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -609,7 +658,7 @@ void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) { __ divpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -658,7 +707,7 @@ void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) { __ minpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -707,7 +756,7 @@ void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) { __ maxpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -742,7 +791,7 @@ void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) { __ andpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -777,7 +826,7 @@ void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) { __ andnpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -812,7 +861,7 @@ void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) { __ orpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -847,7 +896,7 @@ void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) { __ xorpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -865,7 +914,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -894,7 +943,7 @@ void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) { __ psllq(dst, Immediate(static_cast<uint8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -919,7 +968,7 @@ void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) { __ psrad(dst, Immediate(static_cast<uint8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -948,7 +997,7 @@ void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) { __ psrlq(dst, Immediate(static_cast<uint8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -985,7 +1034,7 @@ void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1011,7 +1060,7 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: // TODO: up to here, and? - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); @@ -1035,7 +1084,7 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction __ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1056,7 +1105,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1079,6 +1128,14 @@ void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instr LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderX86::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, @@ -1103,7 +1160,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1184,7 +1241,7 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) { is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1220,7 +1277,7 @@ void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) { is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index a77c7d6838..9c2882766c 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -49,7 +49,7 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru : Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -102,7 +102,7 @@ void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar __ shufpd(dst, dst, Immediate(0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -126,7 +126,7 @@ void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instructio locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -140,7 +140,7 @@ void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* in case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: // TODO: up to here, and? - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); @@ -157,7 +157,7 @@ void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* in DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -179,7 +179,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -188,8 +188,8 @@ void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) { CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); // Long reduction or min/max require a temporary. if (instruction->GetPackedType() == DataType::Type::kInt64 || - instruction->GetKind() == HVecReduce::kMin || - instruction->GetKind() == HVecReduce::kMax) { + instruction->GetReductionKind() == HVecReduce::kMin || + instruction->GetReductionKind() == HVecReduce::kMax) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); } } @@ -201,38 +201,23 @@ void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) { switch (instruction->GetPackedType()) { case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ movaps(dst, src); __ phaddd(dst, dst); __ phaddd(dst, dst); break; - case HVecReduce::kMin: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - __ movaps(tmp, src); - __ movaps(dst, src); - __ psrldq(tmp, Immediate(8)); - __ pminsd(dst, tmp); - __ psrldq(tmp, Immediate(4)); - __ pminsd(dst, tmp); - break; - } - case HVecReduce::kMax: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - __ movaps(tmp, src); - __ movaps(dst, src); - __ psrldq(tmp, Immediate(8)); - __ pmaxsd(dst, tmp); - __ psrldq(tmp, Immediate(4)); - __ pmaxsd(dst, tmp); - break; - } + case HVecReduce::kMin: + case HVecReduce::kMax: + // Historical note: We've had a broken implementation here. b/117863065 + // Do not draw on the old code if we ever want to bring MIN/MAX reduction back. + LOG(FATAL) << "Unsupported reduction type."; } break; case DataType::Type::kInt64: { DCHECK_EQ(2u, instruction->GetVectorLength()); XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ movaps(tmp, src); __ movaps(dst, src); @@ -241,12 +226,12 @@ void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) { break; case HVecReduce::kMin: case HVecReduce::kMax: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -265,7 +250,7 @@ void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ cvtdq2ps(dst, src); } else { - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } } @@ -311,7 +296,7 @@ void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) { __ subpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -352,7 +337,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) { __ andpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -401,7 +386,7 @@ void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) { __ xorpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -424,7 +409,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -466,7 +451,39 @@ void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) { __ addpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ paddusb(dst, src); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ paddsb(dst, src); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ paddusw(dst, src); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ paddsw(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -486,14 +503,14 @@ void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruct switch (instruction->GetPackedType()) { case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - __ pavgb(dst, src); - return; + __ pavgb(dst, src); + break; case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ pavgw(dst, src); - return; + break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -535,7 +552,39 @@ void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) { __ subpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ psubusb(dst, src); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ psubsb(dst, src); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psubusw(dst, src); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psubsw(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -568,7 +617,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) { __ mulpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -592,7 +641,7 @@ void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) { __ divpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -641,7 +690,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) { __ minpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -690,7 +739,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) { __ maxpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -725,7 +774,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) { __ andpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -760,7 +809,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) { __ andnpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -795,7 +844,7 @@ void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) { __ orpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -830,7 +879,7 @@ void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) { __ xorpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -848,7 +897,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -877,7 +926,7 @@ void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) { __ psllq(dst, Immediate(static_cast<int8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -902,7 +951,7 @@ void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) { __ psrad(dst, Immediate(static_cast<int8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -931,7 +980,7 @@ void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) { __ psrlq(dst, Immediate(static_cast<int8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -963,7 +1012,7 @@ void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -989,7 +1038,7 @@ void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruct case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: // TODO: up to here, and? - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); @@ -1008,7 +1057,7 @@ void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruct __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1029,7 +1078,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1052,6 +1101,14 @@ void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* in LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderX86_64::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86_64::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, @@ -1076,7 +1133,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1157,7 +1214,7 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1193,7 +1250,7 @@ void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) { is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 6bf045885d..95118b0b6d 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -23,6 +23,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_x86.h" @@ -51,6 +52,18 @@ static constexpr int kC2ConditionMask = 0x400; static constexpr int kFakeReturnRegister = Register(8); +static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); +static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); + +static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() + // that the the kPrimNot result register is the same as the first argument register. + return caller_saves; +} + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value() @@ -59,7 +72,7 @@ class NullCheckSlowPathX86 : public SlowPathCode { public: explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { @@ -73,9 +86,9 @@ class NullCheckSlowPathX86 : public SlowPathCode { CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86"; } + const char* GetDescription() const override { return "NullCheckSlowPathX86"; } private: DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86); @@ -85,16 +98,16 @@ class DivZeroCheckSlowPathX86 : public SlowPathCode { public: explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86"; } + const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; } private: DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86); @@ -105,7 +118,7 @@ class DivRemMinusOneSlowPathX86 : public SlowPathCode { DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div) : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { __ Bind(GetEntryLabel()); if (is_div_) { __ negl(reg_); @@ -115,7 +128,7 @@ class DivRemMinusOneSlowPathX86 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86"; } + const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; } private: Register reg_; @@ -127,7 +140,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { public: explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); @@ -174,9 +187,9 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86"; } + const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; } private: DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86); @@ -187,7 +200,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor) : SlowPathCode(instruction), successor_(successor) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); @@ -211,7 +224,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { return successor_; } - const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86"; } + const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; } private: HBasicBlock* const successor_; @@ -224,7 +237,7 @@ class LoadStringSlowPathX86 : public SlowPathCode { public: explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); @@ -243,7 +256,7 @@ class LoadStringSlowPathX86 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86"; } + const char* GetDescription() const override { return "LoadStringSlowPathX86"; } private: DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86); @@ -251,36 +264,42 @@ class LoadStringSlowPathX86 : public SlowPathCode { class LoadClassSlowPathX86 : public SlowPathCode { public: - LoadClassSlowPathX86(HLoadClass* cls, - HInstruction* at, - uint32_t dex_pc, - bool do_clinit) - : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) { + LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at) + : SlowPathCode(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); + Location out = locations->Out(); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - dex::TypeIndex type_index = cls_->GetTypeIndex(); - __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_)); - x86_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage - : kQuickInitializeType, - instruction_, - dex_pc_, - this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_)); + x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. } else { - CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source); + } + if (must_do_clinit) { + x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } // Move the class to the desired location. - Location out = locations->Out(); if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); x86_codegen->Move32(out, Location::RegisterLocation(EAX)); @@ -289,18 +308,12 @@ class LoadClassSlowPathX86 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86"; } + const char* GetDescription() const override { return "LoadClassSlowPathX86"; } private: // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86); }; @@ -309,7 +322,7 @@ class TypeCheckSlowPathX86 : public SlowPathCode { TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal) : SlowPathCode(instruction), is_fatal_(is_fatal) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DCHECK(instruction_->IsCheckCast() || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); @@ -362,8 +375,8 @@ class TypeCheckSlowPathX86 : public SlowPathCode { } } - const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86"; } - bool IsFatal() const OVERRIDE { return is_fatal_; } + const char* GetDescription() const override { return "TypeCheckSlowPathX86"; } + bool IsFatal() const override { return is_fatal_; } private: const bool is_fatal_; @@ -376,7 +389,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode { explicit DeoptimizationSlowPathX86(HDeoptimize* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); LocationSummary* locations = instruction_->GetLocations(); @@ -389,7 +402,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode { CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } - const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; } + const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; } private: DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86); @@ -399,7 +412,7 @@ class ArraySetSlowPathX86 : public SlowPathCode { public: explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -430,7 +443,7 @@ class ArraySetSlowPathX86 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86"; } + const char* GetDescription() const override { return "ArraySetSlowPathX86"; } private: DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86); @@ -458,9 +471,9 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { DCHECK(kEmitCompilerReadBarrier); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86"; } + const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); @@ -545,9 +558,9 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode { DCHECK(kEmitCompilerReadBarrier); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; } + const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); @@ -711,7 +724,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); LocationSummary* locations = instruction_->GetLocations(); Register reg_out = out_.AsRegister<Register>(); @@ -830,7 +843,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathX86"; } + const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; } private: Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { @@ -870,7 +883,7 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode { DCHECK(kEmitCompilerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Register reg_out = out_.AsRegister<Register>(); DCHECK(locations->CanCall()); @@ -896,7 +909,7 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86"; } + const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; } private: const Location out_; @@ -954,6 +967,10 @@ void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) stream << XmmRegister(reg); } +const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures(); +} + size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id)); return kX86WordSize; @@ -1005,7 +1022,6 @@ void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) { } CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, - const X86InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, @@ -1023,13 +1039,13 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), assembler_(graph->GetAllocator()), - isa_features_(isa_features), boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), constant_area_start_(-1), @@ -1212,7 +1228,7 @@ Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type ty case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; - break; + UNREACHABLE(); } return Location::NoLocation(); } @@ -1704,7 +1720,7 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { nullptr : codegen_->GetLabelOf(true_successor); Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); - GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); + GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { @@ -1722,9 +1738,9 @@ void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize); GenerateTestAndBranch<Label>(deoptimize, - /* condition_input_index */ 0, + /* condition_input_index= */ 0, slow_path->GetEntryLabel(), - /* false_target */ nullptr); + /* false_target= */ nullptr); } void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { @@ -1847,7 +1863,7 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) { } else { NearLabel false_target; GenerateTestAndBranch<NearLabel>( - select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target); + select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target); codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); __ Bind(&false_target); } @@ -2185,7 +2201,9 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { - if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeMethodLoadKind()) { + if (invoke->GetLocations()->CanCall() && + invoke->HasPcRelativeMethodLoadKind() && + invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) { invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); } return; @@ -2308,6 +2326,14 @@ void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* inv codegen_->GenerateInvokePolymorphicCall(invoke); } +void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); +} + void LocationsBuilderX86::VisitNeg(HNeg* neg) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); @@ -2963,7 +2989,7 @@ void LocationsBuilderX86::VisitAdd(HAdd* add) { default: LOG(FATAL) << "Unexpected add type " << add->GetResultType(); - break; + UNREACHABLE(); } } @@ -3408,8 +3434,8 @@ void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) { // Load the values to the FP stack in reverse order, using temporaries if needed. const bool is_wide = !is_float; - PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp */ true, is_wide); - PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp */ true, is_wide); + PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide); + PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide); // Loop doing FPREM until we stabilize. NearLabel retry; @@ -3471,6 +3497,27 @@ void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruct } } +void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + + Register out = locations->Out().AsRegister<Register>(); + Register numerator = locations->InAt(0).AsRegister<Register>(); + + int32_t imm = Int64FromConstant(second.GetConstant()); + DCHECK(IsPowerOfTwo(AbsOrMin(imm))); + uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); + + Register tmp = locations->GetTemp(0).AsRegister<Register>(); + NearLabel done; + __ movl(out, numerator); + __ andl(out, Immediate(abs_imm-1)); + __ j(Condition::kZero, &done); + __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1)))); + __ testl(numerator, numerator); + __ cmovl(Condition::kLess, out, tmp); + __ Bind(&done); +} void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) { LocationSummary* locations = instruction->GetLocations(); @@ -3525,7 +3572,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation int64_t magic; int shift; - CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift); // Save the numerator. __ movl(num, eax); @@ -3584,8 +3631,12 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr // Do not generate anything for 0. DivZeroCheck would forbid any generated code. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) { - DivByPowerOfTwo(instruction->AsDiv()); + } else if (IsPowerOfTwo(AbsOrMin(imm))) { + if (is_div) { + DivByPowerOfTwo(instruction->AsDiv()); + } else { + RemByPowerOfTwo(instruction->AsRem()); + } } else { DCHECK(imm <= -2 || imm >= 2); GenerateDivRemWithAnyConstant(instruction); @@ -3802,6 +3853,301 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + // Register to use to perform a long subtract to set cc. + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + // Can return immediately, as op1_loc == out_loc. + // Note: if we ever support separate registers, e.g., output into memory, we need to check for + // a copy here. + DCHECK(locations->Out().Equals(op1_loc)); + return; + } + + if (type == DataType::Type::kInt64) { + // Need to perform a subtract to get the sign right. + // op1 is already in the same location as the output. + Location output = locations->Out(); + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + + Register op2_lo = op2_loc.AsRegisterPairLow<Register>(); + Register op2_hi = op2_loc.AsRegisterPairHigh<Register>(); + + // The comparison is performed by subtracting the second operand from + // the first operand and then setting the status flags in the same + // manner as the SUB instruction." + __ cmpl(output_lo, op2_lo); + + // Now use a temp and the borrow to finish the subtraction of op2_hi. + Register temp = locations->GetTemp(0).AsRegister<Register>(); + __ movl(temp, output_hi); + __ sbbl(temp, op2_hi); + + // Now the condition code is correct. + Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess; + __ cmovl(cond, output_lo, op2_lo); + __ cmovl(cond, output_hi, op2_hi); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register out = locations->Out().AsRegister<Register>(); + Register op2 = op2_loc.AsRegister<Register>(); + + // (out := op1) + // out <=? op2 + // if out is min jmp done + // out := op2 + // done: + + __ cmpl(out, op2); + Condition cond = is_min ? Condition::kGreater : Condition::kLess; + __ cmovl(cond, out, op2); + } +} + +void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); + return; + } + + // (out := op1) + // out <=? op2 + // if Nan jmp Nan_label + // if out is min jmp done + // if op2 is min jmp op2_label + // handle -0/+0 + // jmp done + // Nan_label: + // out := NaN + // op2_label: + // out := op2 + // done: + // + // This removes one jmp, but needs to copy one input (op1) to out. + // + // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? + + XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); + + NearLabel nan, done, op2_label; + if (type == DataType::Type::kFloat64) { + __ ucomisd(out, op2); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ ucomiss(out, op2); + } + + __ j(Condition::kParityEven, &nan); + + __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); + __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); + + // Handle 0.0/-0.0. + if (is_min) { + if (type == DataType::Type::kFloat64) { + __ orpd(out, op2); + } else { + __ orps(out, op2); + } + } else { + if (type == DataType::Type::kFloat64) { + __ andpd(out, op2); + } else { + __ andps(out, op2); + } + } + __ jmp(&done); + + // NaN handling. + __ Bind(&nan); + if (type == DataType::Type::kFloat64) { + // TODO: Use a constant from the constant table (requires extra input). + __ LoadLongConstant(out, kDoubleNaN); + } else { + Register constant = locations->GetTemp(0).AsRegister<Register>(); + __ movl(constant, Immediate(kFloatNaN)); + __ movd(out, constant); + } + __ jmp(&done); + + // out := op2; + __ Bind(&op2_label); + if (type == DataType::Type::kFloat64) { + __ movsd(out, op2); + } else { + __ movss(out, op2); + } + + // Done. + __ Bind(&done); +} + +void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min, type); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderX86::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorX86::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderX86::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorX86::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderX86::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RegisterLocation(EAX)); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RegisterLocation(EDX)); + break; + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + Register out = locations->Out().AsRegister<Register>(); + DCHECK_EQ(out, EAX); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + DCHECK_EQ(temp, EDX); + // Sign extend EAX into EDX. + __ cdq(); + // XOR EAX with sign. + __ xorl(EAX, EDX); + // Subtract out sign to correct. + __ subl(EAX, EDX); + // The result is in EAX. + break; + } + case DataType::Type::kInt64: { + Location input = locations->InAt(0); + Register input_lo = input.AsRegisterPairLow<Register>(); + Register input_hi = input.AsRegisterPairHigh<Register>(); + Location output = locations->Out(); + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + // Compute the sign into the temporary. + __ movl(temp, input_hi); + __ sarl(temp, Immediate(31)); + // Store the sign into the output. + __ movl(output_lo, temp); + __ movl(output_hi, temp); + // XOR the input to the output. + __ xorl(output_lo, input_lo); + __ xorl(output_hi, input_hi); + // Subtract the sign. + __ subl(output_lo, temp); + __ sbbl(output_hi, temp); + break; + } + case DataType::Type::kFloat32: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + Register constant = locations->GetTemp(1).AsRegister<Register>(); + __ movl(constant, Immediate(INT32_C(0x7FFFFFFF))); + __ movd(temp, constant); + __ andps(out, temp); + break; + } + case DataType::Type::kFloat64: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + // TODO: Use a constant from the constant table (requires extra input). + __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF)); + __ andpd(out, temp); + break; + } + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) { LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); switch (instruction->GetType()) { @@ -4184,29 +4530,14 @@ void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, LocationSummary::kCallOnMainOnly); locations->SetOut(Location::RegisterLocation(EAX)); - if (instruction->IsStringAlloc()) { - locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - } + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - if (instruction->IsStringAlloc()) { - // String is allocated through StringFactory. Call NewEmptyString entry point. - Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>(); - MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize); - __ fs()->movl(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString))); - __ call(Address(temp, code_offset.Int32Value())); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); - } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); - DCHECK(!codegen_->IsLeafMethod()); - } + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + DCHECK(!codegen_->IsLeafMethod()); } void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) { @@ -4219,10 +4550,8 @@ void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) { } void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) { - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - QuickEntrypointEnum entrypoint = - CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); + // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. + QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); DCHECK(!codegen_->IsLeafMethod()); @@ -4472,14 +4801,14 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { } case MemBarrierKind::kNTStoreStore: // Non-Temporal Store/Store needs an explicit fence. - MemoryFence(/* non-temporal */ true); + MemoryFence(/* non-temporal= */ true); break; } } HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { + ArtMethod* method ATTRIBUTE_UNUSED) { return desired_dispatch_info; } @@ -4531,9 +4860,15 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall( RecordBootImageMethodPatch(invoke); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: - __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress())); + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, + temp.AsRegister<Register>()); + __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset)); + RecordBootImageRelRoPatch( + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(), + GetBootImageOffset(invoke)); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()); @@ -4541,6 +4876,9 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall( RecordMethodBssEntryPatch(invoke); break; } + case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: + __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress())); + break; case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); return; // No code pointer retrieval; the runtime performs the call directly. @@ -4595,6 +4933,20 @@ void CodeGeneratorX86::GenerateVirtualCall( RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } +void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address, + uint32_t intrinsic_data) { + boot_image_intrinsic_patches_.emplace_back( + method_address, /* target_dex_file= */ nullptr, intrinsic_data); + __ Bind(&boot_image_intrinsic_patches_.back().label); +} + +void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address, + uint32_t boot_image_offset) { + boot_image_method_patches_.emplace_back( + method_address, /* target_dex_file= */ nullptr, boot_image_offset); + __ Bind(&boot_image_method_patches_.back().label); +} + void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); HX86ComputeBaseMethodAddress* method_address = @@ -4639,7 +4991,6 @@ void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) { } Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { - DCHECK(!GetCompilerOptions().IsBootImage()); HX86ComputeBaseMethodAddress* method_address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); string_bss_entry_patches_.emplace_back( @@ -4647,6 +4998,62 @@ Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { return &string_bss_entry_patches_.back().label; } +void CodeGeneratorX86::LoadBootImageAddress(Register reg, + uint32_t boot_image_reference, + HInvokeStaticOrDirect* invoke) { + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); + DCHECK(method_address != nullptr); + Register method_address_reg = + invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>(); + __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset)); + RecordBootImageIntrinsicPatch(method_address, boot_image_reference); + } else if (GetCompilerOptions().GetCompilePic()) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); + DCHECK(method_address != nullptr); + Register method_address_reg = + invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>(); + __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset)); + RecordBootImageRelRoPatch(method_address, boot_image_reference); + } else { + DCHECK(Runtime::Current()->UseJitCompilation()); + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; + __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address)))); + } +} + +void CodeGeneratorX86::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConvention calling_convention; + Register argument = calling_convention.GetRegisterAt(0); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); + DCHECK(method_address != nullptr); + Register method_address_reg = + invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>(); + __ leal(argument, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset)); + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_); + __ Bind(&boot_image_type_patches_.back().label); + } else { + LoadBootImageAddress(argument, boot_image_offset, invoke); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + // The label points to the end of the "movl" or another instruction but the literal offset // for method patch needs to point to the embedded constant which occupies the last 4 bytes. constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; @@ -4664,6 +5071,15 @@ inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches( } } +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -4672,7 +5088,8 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke boot_image_type_patches_.size() + type_bss_entry_patches_.size() + boot_image_string_patches_.size() + - string_bss_entry_patches_.size(); + string_bss_entry_patches_.size() + + boot_image_intrinsic_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( @@ -4681,12 +5098,14 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -4707,9 +5126,25 @@ void CodeGeneratorX86::MarkGCCard(Register temp, __ testl(value, value); __ j(kEqual, &is_null); } + // Load the address of the card table into `card`. __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value())); + // Calculate the offset (in the card table) of the card corresponding to + // `object`. __ movl(temp, object); __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift)); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ movb(Address(temp, card, TIMES_1, 0), X86ManagedRegister::FromCpuRegister(card).AsByteRegister()); if (value_can_be_null) { @@ -4801,7 +5236,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, base, offset, /* needs_null_check */ true); + instruction, out, base, offset, /* needs_null_check= */ true); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -5284,7 +5719,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); + instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true); } else { Register out = out_loc.AsRegister<Register>(); __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset)); @@ -6055,14 +6490,14 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadClass::LoadKind::kJitBootImageAddress: case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kRuntimeCall: break; } @@ -6093,7 +6528,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { if (load_kind == HLoadClass::LoadKind::kReferrersClass || load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadClass::LoadKind::kBootImageClassTable || + load_kind == HLoadClass::LoadKind::kBootImageRelRo || load_kind == HLoadClass::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -6101,10 +6536,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { if (load_kind == HLoadClass::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution and/or initialization to save everything. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -6149,7 +6581,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()), - /* fixup_label */ nullptr, + /* fixup_label= */ nullptr, read_barrier_option); break; } @@ -6161,25 +6593,12 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE codegen_->RecordBootImageTypePatch(cls); break; } - case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(cls->GetClass().Get())); - DCHECK_NE(address, 0u); - __ movl(out, Immediate(address)); - break; - } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); Register method_address = locations->InAt(0).AsRegister<Register>(); __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordBootImageTypePatch(cls); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); - if (masked_hash != 0) { - __ subl(out, Immediate(masked_hash)); - } + codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(), + codegen_->GetBootImageOffset(cls)); break; } case HLoadClass::LoadKind::kBssEntry: { @@ -6190,6 +6609,13 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE generate_null_check = true; break; } + case HLoadClass::LoadKind::kJitBootImageAddress: { + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); + DCHECK_NE(address, 0u); + __ movl(out, Immediate(address)); + break; + } case HLoadClass::LoadKind::kJitTableAddress: { Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset); Label* fixup_label = codegen_->NewJitRootClassPatch( @@ -6206,8 +6632,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { @@ -6223,6 +6648,26 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE } } +void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) { + InvokeRuntimeCallingConvention calling_convention; + Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location); +} + +void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) { + codegen_->GenerateLoadMethodHandleRuntimeCall(load); +} + +void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) { + InvokeRuntimeCallingConvention calling_convention; + Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location); +} + +void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) { + codegen_->GenerateLoadMethodTypeRuntimeCall(load); +} + void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); @@ -6230,12 +6675,14 @@ void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) { if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } + // Rely on the type initialization to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) { // We assume the class to not be null. - SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86( - check->GetLoadClass(), check, check->GetDexPc(), true); + SlowPathCode* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).AsRegister<Register>()); @@ -6255,18 +6702,38 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( // No need for memory fence, thanks to the X86 memory model. } +void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + Register temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Compare the bitstring in memory. + __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ movl(temp, Address(temp, mirror::Class::StatusOffset())); + // Compare the bitstring bits using SUB. + __ subl(temp, Immediate(path_to_root)); + // Shift out bits that do not contribute to the comparison. + __ shll(temp, Immediate(32u - mask_bits)); + } +} + HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadString::LoadKind::kJitBootImageAddress: case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kRuntimeCall: break; } @@ -6278,7 +6745,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadString::LoadKind::kBootImageInternTable || + load_kind == HLoadString::LoadKind::kBootImageRelRo || load_kind == HLoadString::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -6289,10 +6756,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { if (load_kind == HLoadString::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString to save everything. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -6325,18 +6789,12 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S codegen_->RecordBootImageStringPatch(load); return; } - case HLoadString::LoadKind::kBootImageAddress: { - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(load->GetString().Get())); - DCHECK_NE(address, 0u); - __ movl(out, Immediate(address)); - return; - } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); Register method_address = locations->InAt(0).AsRegister<Register>(); __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordBootImageStringPatch(load); + codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(), + codegen_->GetBootImageOffset(load)); return; } case HLoadString::LoadKind::kBssEntry: { @@ -6352,6 +6810,12 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S __ Bind(slow_path->GetExitLabel()); return; } + case HLoadString::LoadKind::kJitBootImageAddress: { + uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); + DCHECK_NE(address, 0u); + __ movl(out, Immediate(address)); + return; + } case HLoadString::LoadKind::kJitTableAddress: { Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset); Label* fixup_label = codegen_->NewJitRootStringPatch( @@ -6418,8 +6882,8 @@ static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { return 0; } -// Interface case has 3 temps, one for holding the number of interfaces, one for the current -// interface pointer, one for loading the current interface. +// Interface case has 2 temps, one for holding the number of interfaces, one for the current +// interface pointer, the current interface is compared in memory. // The other checks have one temp for loading the object's class. static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { if (type_check_kind == TypeCheckKind::kInterfaceCheck) { @@ -6447,6 +6911,8 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -6455,7 +6921,13 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::Any()); + } // Note that TypeCheckSlowPathX86 uses this "out" register too. locations->SetOut(Location::RequiresRegister()); // When read barriers are enabled, we need a temporary register for some cases. @@ -6636,7 +7108,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ j(kNotEqual, slow_path->GetEntryLabel()); __ movl(out, Immediate(1)); @@ -6668,7 +7140,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ jmp(slow_path->GetEntryLabel()); if (zero.IsLinked()) { @@ -6676,6 +7148,21 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ j(kNotEqual, &zero); + __ movl(out, Immediate(1)); + __ jmp(&done); + break; + } } if (zero.IsLinked()) { @@ -6702,12 +7189,14 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { // Require a register for the interface check since there is a loop that compares the class to // a memory address. locations->SetInAt(1, Location::RequiresRegister()); + } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); } else { locations->SetInAt(1, Location::Any()); } - // Note that TypeCheckSlowPathX86 uses this "temp" register too. - locations->AddTemp(Location::RequiresRegister()); - // When read barriers are enabled, we need an additional temporary register for some cases. + // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86. locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -6921,6 +7410,19 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); + break; + } } __ Bind(&done); @@ -6946,6 +7448,61 @@ void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instr } } +void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) { + DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); + DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + Location dest = locations->Out(); + if (instruction->GetResultType() == DataType::Type::kInt32) { + __ andn(dest.AsRegister<Register>(), + first.AsRegister<Register>(), + second.AsRegister<Register>()); + } else { + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); + __ andn(dest.AsRegisterPairLow<Register>(), + first.AsRegisterPairLow<Register>(), + second.AsRegisterPairLow<Register>()); + __ andn(dest.AsRegisterPairHigh<Register>(), + first.AsRegisterPairHigh<Register>(), + second.AsRegisterPairHigh<Register>()); + } +} + +void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) { + DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); + DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit( + HX86MaskOrResetLeastSetBit* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location src = locations->InAt(0); + Location dest = locations->Out(); + DCHECK(instruction->GetResultType() == DataType::Type::kInt32); + switch (instruction->GetOpKind()) { + case HInstruction::kAnd: + __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>()); + break; + case HInstruction::kXor: + __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>()); + break; + default: + LOG(FATAL) << "Unreachable"; + } +} + void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); } void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); } @@ -7092,7 +7649,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister( // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, out_reg, offset, /* needs_null_check */ false); + instruction, out, out_reg, offset, /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it @@ -7126,7 +7683,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters( // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, obj_reg, offset, /* needs_null_check */ false); + instruction, out, obj_reg, offset, /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -7175,7 +7732,7 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad( // Slow path marking the GC root `root`. SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86( - instruction, root, /* unpoison_ref_before_marking */ false); + instruction, root, /* unpoison_ref_before_marking= */ false); codegen_->AddSlowPath(slow_path); // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`). @@ -7277,7 +7834,7 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; @@ -7305,10 +7862,10 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i if (always_update_field) { DCHECK(temp != nullptr); slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86( - instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp); + instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp); } else { slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86( - instruction, ref, /* unpoison_ref_before_marking */ true); + instruction, ref, /* unpoison_ref_before_marking= */ true); } AddSlowPath(slow_path); @@ -7620,7 +8177,7 @@ class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenera HX86ComputeBaseMethodAddress* base_method_address_; private: - void Process(const MemoryRegion& region, int pos) OVERRIDE { + void Process(const MemoryRegion& region, int pos) override { // Patch the correct offset for the instruction. The place to patch is the // last 4 bytes of the instruction. // The value to patch is the distance from the offset in the constant area @@ -7821,7 +8378,7 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code, uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; uintptr_t address = reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); - typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t; + using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t; reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] = dchecked_integral_cast<uint32_t>(address); } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 51e5bca00b..deeef888e2 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -83,9 +83,9 @@ class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVi InvokeDexCallingConventionVisitorX86() {} virtual ~InvokeDexCallingConventionVisitorX86() {} - Location GetNextLocation(DataType::Type type) OVERRIDE; - Location GetReturnLocation(DataType::Type type) const OVERRIDE; - Location GetMethodLocation() const OVERRIDE; + Location GetNextLocation(DataType::Type type) override; + Location GetReturnLocation(DataType::Type type) const override; + Location GetMethodLocation() const override; private: InvokeDexCallingConvention calling_convention; @@ -97,18 +97,18 @@ class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention { public: FieldAccessCallingConventionX86() {} - Location GetObjectLocation() const OVERRIDE { + Location GetObjectLocation() const override { return Location::RegisterLocation(ECX); } - Location GetFieldIndexLocation() const OVERRIDE { + Location GetFieldIndexLocation() const override { return Location::RegisterLocation(EAX); } - Location GetReturnLocation(DataType::Type type) const OVERRIDE { + Location GetReturnLocation(DataType::Type type) const override { return DataType::Is64BitType(type) ? Location::RegisterPairLocation(EAX, EDX) : Location::RegisterLocation(EAX); } - Location GetSetValueLocation(DataType::Type type, bool is_instance) const OVERRIDE { + Location GetSetValueLocation(DataType::Type type, bool is_instance) const override { return DataType::Is64BitType(type) ? (is_instance ? Location::RegisterPairLocation(EDX, EBX) @@ -117,7 +117,7 @@ class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention { ? Location::RegisterLocation(EDX) : Location::RegisterLocation(ECX)); } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { return Location::FpuRegisterLocation(XMM0); } @@ -130,10 +130,10 @@ class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap { ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen) : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} - void EmitMove(size_t index) OVERRIDE; - void EmitSwap(size_t index) OVERRIDE; - void SpillScratch(int reg) OVERRIDE; - void RestoreScratch(int reg) OVERRIDE; + void EmitMove(size_t index) override; + void EmitSwap(size_t index) override; + void SpillScratch(int reg) override; + void RestoreScratch(int reg) override; X86Assembler* GetAssembler() const; @@ -155,14 +155,15 @@ class LocationsBuilderX86 : public HGraphVisitor { : HGraphVisitor(graph), codegen_(codegen) {} #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -186,14 +187,15 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen); #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -211,10 +213,12 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { // the suspend call. void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp); void HandleBitwiseOperation(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivByPowerOfTwo(HDiv* instruction); + void RemByPowerOfTwo(HRem* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateRemFP(HRem* rem); void HandleCondition(HCondition* condition); @@ -225,6 +229,9 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { void GenerateShlLong(const Location& loc, int shift); void GenerateShrLong(const Location& loc, int shift); void GenerateUShrLong(const Location& loc, int shift); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, @@ -312,28 +319,27 @@ class JumpTableRIPFixup; class CodeGeneratorX86 : public CodeGenerator { public: CodeGeneratorX86(HGraph* graph, - const X86InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorX86() {} - void GenerateFrameEntry() OVERRIDE; - void GenerateFrameExit() OVERRIDE; - void Bind(HBasicBlock* block) OVERRIDE; - void MoveConstant(Location destination, int32_t value) OVERRIDE; - void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; - void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + void GenerateFrameEntry() override; + void GenerateFrameExit() override; + void Bind(HBasicBlock* block) override; + void MoveConstant(Location destination, int32_t value) override; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; + void AddLocationAsTemp(Location location, LocationSummary* locations) override; - size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; // Generate code to invoke a runtime entry point. void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path = nullptr) OVERRIDE; + SlowPathCode* slow_path = nullptr) override; // Generate code to invoke a runtime entry point, but do not record // PC-related information in a stack map. @@ -343,49 +349,51 @@ class CodeGeneratorX86 : public CodeGenerator { void GenerateInvokeRuntime(int32_t entry_point_offset); - size_t GetWordSize() const OVERRIDE { + size_t GetWordSize() const override { return kX86WordSize; } - size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + size_t GetFloatingPointSpillSlotSize() const override { return GetGraph()->HasSIMD() ? 4 * kX86WordSize // 16 bytes == 4 words for each spill : 2 * kX86WordSize; // 8 bytes == 2 words for each spill } - HGraphVisitor* GetLocationBuilder() OVERRIDE { + HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } - HGraphVisitor* GetInstructionVisitor() OVERRIDE { + HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; } - X86Assembler* GetAssembler() OVERRIDE { + X86Assembler* GetAssembler() override { return &assembler_; } - const X86Assembler& GetAssembler() const OVERRIDE { + const X86Assembler& GetAssembler() const override { return assembler_; } - uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE { + uintptr_t GetAddressOf(HBasicBlock* block) override { return GetLabelOf(block)->Position(); } - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters() const override; - void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; - void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + void DumpCoreRegister(std::ostream& stream, int reg) const override; + void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; - ParallelMoveResolverX86* GetMoveResolver() OVERRIDE { + ParallelMoveResolverX86* GetMoveResolver() override { return &move_resolver_; } - InstructionSet GetInstructionSet() const OVERRIDE { + InstructionSet GetInstructionSet() const override { return InstructionSet::kX86; } + const X86InstructionSetFeatures& GetInstructionSetFeatures() const; + // Helper method to move a 32bits value between two locations. void Move32(Location destination, Location source); // Helper method to move a 64bits value between two locations. @@ -394,32 +402,42 @@ class CodeGeneratorX86 : public CodeGenerator { // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadString::LoadKind GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + HLoadString::LoadKind desired_string_load_kind) override; // Check if the desired_class_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadClass::LoadKind GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + HLoadClass::LoadKind desired_class_load_kind) override; // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) OVERRIDE; + ArtMethod* method) override; // Generate a call to a static or direct method. void GenerateStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; // Generate a call to a virtual method. void GenerateVirtualCall( - HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; + void RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address, + uint32_t intrinsic_data); + void RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address, + uint32_t boot_image_offset); void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke); void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke); void RecordBootImageTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); void RecordBootImageStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); + + void LoadBootImageAddress(Register reg, + uint32_t boot_image_reference, + HInvokeStaticOrDirect* invoke); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); + Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle); @@ -427,16 +445,16 @@ class CodeGeneratorX86 : public CodeGenerator { dex::TypeIndex type_index, Handle<mirror::Class> handle); - void MoveFromReturnRegister(Location trg, DataType::Type type) OVERRIDE; + void MoveFromReturnRegister(Location trg, DataType::Type type) override; // Emit linker patches. - void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; void PatchJitRootUse(uint8_t* code, const uint8_t* roots_data, const PatchInfo<Label>& info, uint64_t index_in_table) const; - void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; // Emit a write barrier. void MarkGCCard(Register temp, @@ -451,22 +469,18 @@ class CodeGeneratorX86 : public CodeGenerator { return CommonGetLabelOf<Label>(block_labels_, block); } - void Initialize() OVERRIDE { + void Initialize() override { block_labels_ = CommonInitializeLabels<Label>(); } - bool NeedsTwoRegisters(DataType::Type type) const OVERRIDE { + bool NeedsTwoRegisters(DataType::Type type) const override { return type == DataType::Type::kInt64; } - bool ShouldSplitLongMoves() const OVERRIDE { return true; } + bool ShouldSplitLongMoves() const override { return true; } Label* GetFrameEntryLabel() { return &frame_entry_label_; } - const X86InstructionSetFeatures& GetInstructionSetFeatures() const { - return isa_features_; - } - void AddMethodAddressOffset(HX86ComputeBaseMethodAddress* method_base, int32_t offset) { method_address_offset_.Put(method_base->GetId(), offset); } @@ -502,7 +516,7 @@ class CodeGeneratorX86 : public CodeGenerator { Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value); - void Finalize(CodeAllocator* allocator) OVERRIDE; + void Finalize(CodeAllocator* allocator) override; // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. @@ -598,9 +612,9 @@ class CodeGeneratorX86 : public CodeGenerator { } } - void GenerateNop() OVERRIDE; - void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; - void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateNop() override; + void GenerateImplicitNullCheck(HNullCheck* instruction) override; + void GenerateExplicitNullCheck(HNullCheck* instruction) override; // When we don't know the proper offset for the value, we use kDummy32BitOffset. // The correct value will be inserted when processing Assembler fixups. @@ -629,20 +643,22 @@ class CodeGeneratorX86 : public CodeGenerator { InstructionCodeGeneratorX86 instruction_visitor_; ParallelMoveResolverX86 move_resolver_; X86Assembler assembler_; - const X86InstructionSetFeatures& isa_features_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_; - // Type patch locations for kBssEntry. + // PC-relative type patch info for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_; - // String patch locations; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<X86PcRelativePatchInfo> boot_image_string_patches_; - // String patch locations for kBssEntry. + // PC-relative String patch info for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<X86PcRelativePatchInfo> boot_image_intrinsic_patches_; // Patches for string root accesses in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 7be360536b..7c293b8605 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -22,6 +22,7 @@ #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_x86_64.h" @@ -55,6 +56,13 @@ static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 static constexpr int kC2ConditionMask = 0x400; +static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { + // Custom calling convention: RAX serves as both input and output. + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(RAX)); + return caller_saves; +} + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value() @@ -63,7 +71,7 @@ class NullCheckSlowPathX86_64 : public SlowPathCode { public: explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { @@ -77,9 +85,9 @@ class NullCheckSlowPathX86_64 : public SlowPathCode { CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; } + const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64); @@ -89,16 +97,16 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCode { public: explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; } + const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64); @@ -109,7 +117,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCode { DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div) : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { __ Bind(GetEntryLabel()); if (type_ == DataType::Type::kInt32) { if (is_div_) { @@ -129,7 +137,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; } + const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; } private: const CpuRegister cpu_reg_; @@ -143,7 +151,7 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor) : SlowPathCode(instruction), successor_(successor) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); @@ -167,7 +175,7 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { return successor_; } - const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; } + const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; } private: HBasicBlock* const successor_; @@ -181,7 +189,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); @@ -228,9 +236,9 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; } + const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64); @@ -238,34 +246,41 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { class LoadClassSlowPathX86_64 : public SlowPathCode { public: - LoadClassSlowPathX86_64(HLoadClass* cls, - HInstruction* at, - uint32_t dex_pc, - bool do_clinit) - : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) { + LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at) + : SlowPathCode(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); + Location out = locations->Out(); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, locations); // Custom calling convention: RAX serves as both input and output. - __ movl(CpuRegister(RAX), Immediate(cls_->GetTypeIndex().index_)); - x86_64_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType, - instruction_, - dex_pc_, - this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ movl(CpuRegister(RAX), Immediate(type_index.index_)); + x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. } else { - CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + x86_64_codegen->Move(Location::RegisterLocation(RAX), source); + } + if (must_do_clinit) { + x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } - Location out = locations->Out(); // Move the class to the desired location. if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); @@ -276,18 +291,12 @@ class LoadClassSlowPathX86_64 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; } + const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; } private: // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64); }; @@ -295,7 +304,7 @@ class LoadStringSlowPathX86_64 : public SlowPathCode { public: explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); @@ -317,7 +326,7 @@ class LoadStringSlowPathX86_64 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; } + const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64); @@ -328,7 +337,7 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal) : SlowPathCode(instruction), is_fatal_(is_fatal) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); uint32_t dex_pc = instruction_->GetDexPc(); DCHECK(instruction_->IsCheckCast() @@ -376,9 +385,9 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { } } - const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; } + const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; } - bool IsFatal() const OVERRIDE { return is_fatal_; } + bool IsFatal() const override { return is_fatal_; } private: const bool is_fatal_; @@ -391,7 +400,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); LocationSummary* locations = instruction_->GetLocations(); @@ -404,7 +413,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } - const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } + const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64); @@ -414,7 +423,7 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { public: explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -445,7 +454,7 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; } + const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); @@ -473,9 +482,9 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { DCHECK(kEmitCompilerReadBarrier); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; } + const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); Register ref_reg = ref_cpu_reg.AsRegister(); @@ -564,11 +573,11 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode { DCHECK(kEmitCompilerReadBarrier); } - const char* GetDescription() const OVERRIDE { + const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64"; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); Register ref_reg = ref_cpu_reg.AsRegister(); @@ -736,7 +745,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); LocationSummary* locations = instruction_->GetLocations(); CpuRegister reg_out = out_.AsRegister<CpuRegister>(); @@ -855,7 +864,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { + const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86_64"; } @@ -897,7 +906,7 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { DCHECK(kEmitCompilerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); @@ -922,7 +931,7 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; } + const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; } private: const Location out_; @@ -969,7 +978,7 @@ inline Condition X86_64FPCondition(IfCondition cond) { HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { + ArtMethod* method ATTRIBUTE_UNUSED) { return desired_dispatch_info; } @@ -983,7 +992,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall( // temp = thread->string_init_entrypoint uint32_t offset = GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); - __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip */ true)); + __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true)); break; } case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: @@ -992,18 +1001,25 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: DCHECK(GetCompilerOptions().IsBootImage()); __ leal(temp.AsRegister<CpuRegister>(), - Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); + Address::Absolute(kDummy32BitOffset, /* no_rip= */ false)); RecordBootImageMethodPatch(invoke); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: - Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress()); + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. + __ movl(temp.AsRegister<CpuRegister>(), + Address::Absolute(kDummy32BitOffset, /* no_rip= */ false)); + RecordBootImageRelRoPatch(GetBootImageOffset(invoke)); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { __ movq(temp.AsRegister<CpuRegister>(), - Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); + Address::Absolute(kDummy32BitOffset, /* no_rip= */ false)); RecordMethodBssEntryPatch(invoke); break; } + case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: + Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress()); + break; case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); return; // No code pointer retrieval; the runtime performs the call directly. @@ -1059,6 +1075,16 @@ void CodeGeneratorX86_64::GenerateVirtualCall( RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } +void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) { + boot_image_intrinsic_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data); + __ Bind(&boot_image_intrinsic_patches_.back().label); +} + +void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) { + boot_image_method_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset); + __ Bind(&boot_image_method_patches_.back().label); +} + void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { boot_image_method_patches_.emplace_back( invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index); @@ -1089,12 +1115,48 @@ void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) { } Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { - DCHECK(!GetCompilerOptions().IsBootImage()); string_bss_entry_patches_.emplace_back( &load_string->GetDexFile(), load_string->GetStringIndex().index_); return &string_bss_entry_patches_.back().label; } +void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) { + if (GetCompilerOptions().IsBootImage()) { + __ leal(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); + RecordBootImageIntrinsicPatch(boot_image_reference); + } else if (GetCompilerOptions().GetCompilePic()) { + __ movl(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); + RecordBootImageRelRoPatch(boot_image_reference); + } else { + DCHECK(Runtime::Current()->UseJitCompilation()); + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; + __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address)))); + } +} + +void CodeGeneratorX86_64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConvention calling_convention; + CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0)); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + __ leal(argument, + Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_); + __ Bind(&boot_image_type_patches_.back().label); + } else { + LoadBootImageAddress(argument, boot_image_offset); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + // The label points to the end of the "movl" or another instruction but the literal offset // for method patch needs to point to the embedded constant which occupies the last 4 bytes. constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; @@ -1110,6 +1172,15 @@ inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches( } } +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -1118,7 +1189,8 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li boot_image_type_patches_.size() + type_bss_entry_patches_.size() + boot_image_string_patches_.size() + - string_bss_entry_patches_.size(); + string_bss_entry_patches_.size() + + boot_image_intrinsic_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( @@ -1127,12 +1199,14 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1151,6 +1225,10 @@ void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int re stream << FloatRegister(reg); } +const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures(); +} + size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id)); return kX86_64WordSize; @@ -1198,14 +1276,13 @@ void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_poin } void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) { - __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true)); + __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true)); } static constexpr int kNumberOfCpuRegisterPairs = 0; // Use a fake return address register to mimic Quick. static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, - const X86_64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, @@ -1224,7 +1301,6 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), assembler_(graph->GetAllocator()), - isa_features_(isa_features), constant_area_start_(0), boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -1232,6 +1308,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { @@ -1721,7 +1798,7 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { nullptr : codegen_->GetLabelOf(true_successor); Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); - GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); + GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { @@ -1739,9 +1816,9 @@ void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize); GenerateTestAndBranch<Label>(deoptimize, - /* condition_input_index */ 0, + /* condition_input_index= */ 0, slow_path->GetEntryLabel(), - /* false_target */ nullptr); + /* false_target= */ nullptr); } void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { @@ -1844,8 +1921,8 @@ void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) { } else { NearLabel false_target; GenerateTestAndBranch<NearLabel>(select, - /* condition_input_index */ 2, - /* true_target */ nullptr, + /* condition_input_index= */ 2, + /* true_target= */ nullptr, &false_target); codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); __ Bind(&false_target); @@ -2347,7 +2424,7 @@ Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; - break; + UNREACHABLE(); } return Location::NoLocation(); } @@ -2482,6 +2559,14 @@ void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* codegen_->GenerateInvokePolymorphicCall(invoke); } +void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); +} + void LocationsBuilderX86_64::VisitNeg(HNeg* neg) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); @@ -3474,7 +3559,40 @@ void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instr LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType(); } } +void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + DCHECK(IsPowerOfTwo(AbsOrMin(imm))); + uint64_t abs_imm = AbsOrMin(imm); + CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); + if (instruction->GetResultType() == DataType::Type::kInt32) { + NearLabel done; + __ movl(out, numerator); + __ andl(out, Immediate(abs_imm-1)); + __ j(Condition::kZero, &done); + __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1)))); + __ testl(numerator, numerator); + __ cmov(Condition::kLess, out, tmp, false); + __ Bind(&done); + + } else { + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); + codegen_->Load64BitValue(tmp, abs_imm - 1); + NearLabel done; + __ movq(out, numerator); + __ andq(out, tmp); + __ j(Condition::kZero, &done); + __ movq(tmp, numerator); + __ sarq(tmp, Immediate(63)); + __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm))); + __ orq(out, tmp); + __ Bind(&done); + } +} void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { LocationSummary* locations = instruction->GetLocations(); Location second = locations->InAt(1); @@ -3489,9 +3607,17 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); if (instruction->GetResultType() == DataType::Type::kInt32) { - __ leal(tmp, Address(numerator, abs_imm - 1)); - __ testl(numerator, numerator); - __ cmov(kGreaterEqual, tmp, numerator); + // When denominator is equal to 2, we can add signed bit and numerator to tmp. + // Below we are using addl instruction instead of cmov which give us 1 cycle benefit. + if (abs_imm == 2) { + __ leal(tmp, Address(numerator, 0)); + __ shrl(tmp, Immediate(31)); + __ addl(tmp, numerator); + } else { + __ leal(tmp, Address(numerator, abs_imm - 1)); + __ testl(numerator, numerator); + __ cmov(kGreaterEqual, tmp, numerator); + } int shift = CTZ(imm); __ sarl(tmp, Immediate(shift)); @@ -3503,11 +3629,16 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { } else { DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>(); - - codegen_->Load64BitValue(rdx, abs_imm - 1); - __ addq(rdx, numerator); - __ testq(numerator, numerator); - __ cmov(kGreaterEqual, rdx, numerator); + if (abs_imm == 2) { + __ movq(rdx, numerator); + __ shrq(rdx, Immediate(63)); + __ addq(rdx, numerator); + } else { + codegen_->Load64BitValue(rdx, abs_imm - 1); + __ addq(rdx, numerator); + __ testq(numerator, numerator); + __ cmov(kGreaterEqual, rdx, numerator); + } int shift = CTZ(imm); __ sarq(rdx, Immediate(shift)); @@ -3547,7 +3678,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat if (instruction->GetResultType() == DataType::Type::kInt32) { int imm = second.GetConstant()->AsIntConstant()->GetValue(); - CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift); __ movl(numerator, eax); @@ -3584,7 +3715,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat CpuRegister rax = eax; CpuRegister rdx = edx; - CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift); + CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift); // Save the numerator. __ movq(numerator, rax); @@ -3651,8 +3782,12 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) { - DivByPowerOfTwo(instruction->AsDiv()); + } else if (IsPowerOfTwo(AbsOrMin(imm))) { + if (is_div) { + DivByPowerOfTwo(instruction->AsDiv()); + } else { + RemByPowerOfTwo(instruction->AsRem()); + } } else { DCHECK(imm <= -2 || imm >= 2); GenerateDivRemWithAnyConstant(instruction); @@ -3821,6 +3956,241 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + // The following is sub-optimal, but all we can do for now. It would be fine to also accept + // the second input to be the output (we can simply swap inputs). + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + // Can return immediately, as op1_loc == out_loc. + // Note: if we ever support separate registers, e.g., output into memory, we need to check for + // a copy here. + DCHECK(locations->Out().Equals(op1_loc)); + return; + } + + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); + + // (out := op1) + // out <=? op2 + // if out is min jmp done + // out := op2 + // done: + + if (type == DataType::Type::kInt64) { + __ cmpq(out, op2); + __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + __ cmpl(out, op2); + __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false); + } +} + +void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); + return; + } + + // (out := op1) + // out <=? op2 + // if Nan jmp Nan_label + // if out is min jmp done + // if op2 is min jmp op2_label + // handle -0/+0 + // jmp done + // Nan_label: + // out := NaN + // op2_label: + // out := op2 + // done: + // + // This removes one jmp, but needs to copy one input (op1) to out. + // + // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? + + XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); + + NearLabel nan, done, op2_label; + if (type == DataType::Type::kFloat64) { + __ ucomisd(out, op2); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ ucomiss(out, op2); + } + + __ j(Condition::kParityEven, &nan); + + __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); + __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); + + // Handle 0.0/-0.0. + if (is_min) { + if (type == DataType::Type::kFloat64) { + __ orpd(out, op2); + } else { + __ orps(out, op2); + } + } else { + if (type == DataType::Type::kFloat64) { + __ andpd(out, op2); + } else { + __ andps(out, op2); + } + } + __ jmp(&done); + + // NaN handling. + __ Bind(&nan); + if (type == DataType::Type::kFloat64) { + __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000))); + } else { + __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000))); + } + __ jmp(&done); + + // out := op2; + __ Bind(&op2_label); + if (type == DataType::Type::kFloat64) { + __ movsd(out, op2); + } else { + __ movss(out, op2); + } + + // Done. + __ Bind(&done); +} + +void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min, type); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderX86_64::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderX86_64::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderX86_64::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); + // Create mask. + __ movl(mask, out); + __ sarl(mask, Immediate(31)); + // Add mask. + __ addl(out, mask); + __ xorl(out, mask); + break; + } + case DataType::Type::kInt64: { + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); + // Create mask. + __ movq(mask, out); + __ sarq(mask, Immediate(63)); + // Add mask. + __ addq(out, mask); + __ xorq(out, mask); + break; + } + case DataType::Type::kFloat32: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF))); + __ andps(out, mask); + break; + } + case DataType::Type::kFloat64: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); + __ andpd(out, mask); + break; + } + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) { LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); locations->SetInAt(0, Location::Any()); @@ -4030,29 +4400,14 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; - if (instruction->IsStringAlloc()) { - locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); - } else { - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - } + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetOut(Location::RegisterLocation(RAX)); } void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - if (instruction->IsStringAlloc()) { - // String is allocated through StringFactory. Call NewEmptyString entry point. - CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); - MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize); - __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true)); - __ call(Address(temp, code_offset.SizeValue())); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); - } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); - DCHECK(!codegen_->IsLeafMethod()); - } + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + DCHECK(!codegen_->IsLeafMethod()); } void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { @@ -4065,10 +4420,8 @@ void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { } void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - QuickEntrypointEnum entrypoint = - CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); + // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. + QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); DCHECK(!codegen_->IsLeafMethod()); @@ -4200,7 +4553,7 @@ void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { } case MemBarrierKind::kNTStoreStore: // Non-Temporal Store/Store needs an explicit fence. - MemoryFence(/* non-temporal */ true); + MemoryFence(/* non-temporal= */ true); break; } } @@ -4277,7 +4630,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, // Note that a potential implicit null check is handled in this // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, base, offset, /* needs_null_check */ true); + instruction, out, base, offset, /* needs_null_check= */ true); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -4732,7 +5085,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { // Note that a potential implicit null check is handled in this // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); + instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true); } else { CpuRegister out = out_loc.AsRegister<CpuRegister>(); __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); @@ -5130,10 +5483,26 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, __ testl(value, value); __ j(kEqual, &is_null); } + // Load the address of the card table into `card`. __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(), - /* no_rip */ true)); + /* no_rip= */ true)); + // Calculate the offset (in the card table) of the card corresponding to + // `object`. __ movq(temp, object); __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift)); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ movb(Address(temp, card, TIMES_1, 0), card); if (value_can_be_null) { __ Bind(&is_null); @@ -5196,7 +5565,7 @@ void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruc } __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(), - /* no_rip */ true), + /* no_rip= */ true), Immediate(0)); if (successor == nullptr) { __ j(kNotEqual, slow_path->GetEntryLabel()); @@ -5462,6 +5831,26 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( // No need for memory fence, thanks to the x86-64 memory model. } +void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + CpuRegister temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Compare the bitstring in memory. + __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ movl(temp, Address(temp, mirror::Class::StatusOffset())); + // Compare the bitstring bits using SUB. + __ subl(temp, Immediate(path_to_root)); + // Shift out bits that do not contribute to the comparison. + __ shll(temp, Immediate(32u - mask_bits)); + } +} + HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { switch (desired_class_load_kind) { @@ -5471,14 +5860,14 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadClass::LoadKind::kJitBootImageAddress: case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kRuntimeCall: break; } @@ -5513,10 +5902,7 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { if (load_kind == HLoadClass::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution and/or initialization to save everything. - // Custom calling convention: RAX serves as both input and output. - RegisterSet caller_saves = RegisterSet::Empty(); - caller_saves.Add(Location::RegisterLocation(RAX)); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -5561,48 +5947,41 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()), - /* fixup_label */ nullptr, + /* fixup_label= */ nullptr, read_barrier_option); break; } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); + __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); codegen_->RecordBootImageTypePatch(cls); break; - case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(cls->GetClass().Get())); - DCHECK_NE(address, 0u); - __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. - break; - } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); - __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordBootImageTypePatch(cls); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); - if (masked_hash != 0) { - __ subl(out, Immediate(masked_hash)); - } + __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); + codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(cls)); break; } case HLoadClass::LoadKind::kBssEntry: { Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, - /* no_rip */ false); + /* no_rip= */ false); Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls); // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option); generate_null_check = true; break; } + case HLoadClass::LoadKind::kJitBootImageAddress: { + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); + DCHECK_NE(address, 0u); + __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. + break; + } case HLoadClass::LoadKind::kJitTableAddress: { Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, - /* no_rip */ true); + /* no_rip= */ true); Label* fixup_label = codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass()); // /* GcRoot<mirror::Class> */ out = *address @@ -5616,8 +5995,8 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + SlowPathCode* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ testl(out, out); @@ -5638,12 +6017,34 @@ void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) { if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } + // Rely on the type initialization to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); +} + +void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) { + // Custom calling convention: RAX serves as both input and output. + Location location = Location::RegisterLocation(RAX); + CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location); +} + +void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) { + codegen_->GenerateLoadMethodHandleRuntimeCall(load); +} + +void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) { + // Custom calling convention: RAX serves as both input and output. + Location location = Location::RegisterLocation(RAX); + CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location); +} + +void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) { + codegen_->GenerateLoadMethodTypeRuntimeCall(load); } void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { // We assume the class to not be null. - SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64( - check->GetLoadClass(), check, check->GetDexPc(), true); + SlowPathCode* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).AsRegister<CpuRegister>()); @@ -5653,14 +6054,14 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadString::LoadKind::kJitBootImageAddress: case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kRuntimeCall: break; } @@ -5677,10 +6078,7 @@ void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString to save everything. - // Custom calling convention: RAX serves as both input and output. - RegisterSet caller_saves = RegisterSet::Empty(); - caller_saves.Add(Location::RegisterLocation(RAX)); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -5708,26 +6106,19 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA switch (load->GetLoadKind()) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); - __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); + __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); codegen_->RecordBootImageStringPatch(load); return; } - case HLoadString::LoadKind::kBootImageAddress: { - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(load->GetString().Get())); - DCHECK_NE(address, 0u); - __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. - return; - } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); - __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordBootImageStringPatch(load); + __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); + codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(load)); return; } case HLoadString::LoadKind::kBssEntry: { Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, - /* no_rip */ false); + /* no_rip= */ false); Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); @@ -5738,9 +6129,15 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA __ Bind(slow_path->GetExitLabel()); return; } + case HLoadString::LoadKind::kJitBootImageAddress: { + uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); + DCHECK_NE(address, 0u); + __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. + return; + } case HLoadString::LoadKind::kJitTableAddress: { Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, - /* no_rip */ true); + /* no_rip= */ true); Label* fixup_label = codegen_->NewJitRootStringPatch( load->GetDexFile(), load->GetStringIndex(), load->GetString()); // /* GcRoot<mirror::String> */ out = *address @@ -5762,7 +6159,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA static Address GetExceptionTlsAddress() { return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(), - /* no_rip */ true); + /* no_rip= */ true); } void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) { @@ -5795,24 +6192,26 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } -static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { - if (type_check_kind == TypeCheckKind::kInterfaceCheck) { - // We need a temporary for holding the iftable length. - return true; - } - return kEmitCompilerReadBarrier && +// Temp is used for read barrier. +static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier && (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck); + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + return 1; + } + return 0; } -static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { - return kEmitCompilerReadBarrier && - !kUseBakerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck); +// Interface case has 2 temps, one for holding the number of interfaces, one for the current +// interface pointer, the current interface is compared in memory. +// The other checks have one temp for loading the object's class. +static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { + if (type_check_kind == TypeCheckKind::kInterfaceCheck) { + return 2; + } + return 1 + NumberOfInstanceOfTemps(type_check_kind); } void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { @@ -5834,6 +6233,8 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -5842,14 +6243,16 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::Any()); + } // Note that TypeCheckSlowPathX86_64 uses this "out" register too. locations->SetOut(Location::RequiresRegister()); - // When read barriers are enabled, we need a temporary register for - // some cases. - if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) { - locations->AddTemp(Location::RequiresRegister()); - } + locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); } void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { @@ -5860,9 +6263,9 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { Location cls = locations->InAt(1); Location out_loc = locations->Out(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); - Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ? - locations->GetTemp(0) : - Location::NoLocation(); + const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); + DCHECK_LE(num_temps, 1u); + Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -6031,7 +6434,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ j(kNotEqual, slow_path->GetEntryLabel()); __ movl(out, Immediate(1)); @@ -6063,7 +6466,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ jmp(slow_path->GetEntryLabel()); if (zero.IsLinked()) { @@ -6071,6 +6474,27 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + if (zero.IsLinked()) { + __ j(kNotEqual, &zero); + __ movl(out, Immediate(1)); + __ jmp(&done); + } else { + __ setcc(kEqual, out); + // setcc only sets the low byte. + __ andl(out, Immediate(1)); + } + break; + } } if (zero.IsLinked()) { @@ -6097,17 +6521,15 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { // Require a register for the interface check since there is a loop that compares the class to // a memory address. locations->SetInAt(1, Location::RequiresRegister()); + } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); } else { locations->SetInAt(1, Location::Any()); } - - // Note that TypeCheckSlowPathX86_64 uses this "temp" register too. - locations->AddTemp(Location::RequiresRegister()); - // When read barriers are enabled, we need an additional temporary - // register for some cases. - if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) { - locations->AddTemp(Location::RequiresRegister()); - } + // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86. + locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { @@ -6118,9 +6540,10 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); - Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ? - locations->GetTemp(1) : - Location::NoLocation(); + const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); + DCHECK_GE(num_temps, 1u); + DCHECK_LE(num_temps, 2u); + Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation(); const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -6283,7 +6706,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { break; } - case TypeCheckKind::kInterfaceCheck: + case TypeCheckKind::kInterfaceCheck: { // Fast path for the interface check. Try to avoid read barriers to improve the fast path. // We can not get false positives by doing this. // /* HeapReference<Class> */ temp = obj->klass_ @@ -6319,6 +6742,20 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // If `cls` was poisoned above, unpoison it. __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>()); break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); + break; + } } if (done.IsLinked()) { @@ -6346,6 +6783,48 @@ void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* in } } +void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) { + DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); + DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + // There is no immediate variant of negated bitwise and in X86. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) { + DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); + DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + Location dest = locations->Out(); + __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); +} + +void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location src = locations->InAt(0); + Location dest = locations->Out(); + switch (instruction->GetOpKind()) { + case HInstruction::kAnd: + __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>()); + break; + case HInstruction::kXor: + __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>()); + break; + default: + LOG(FATAL) << "Unreachable"; + } +} + void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); } void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); } @@ -6474,7 +6953,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister( // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, out_reg, offset, /* needs_null_check */ false); + instruction, out, out_reg, offset, /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it @@ -6508,7 +6987,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters( // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, obj_reg, offset, /* needs_null_check */ false); + instruction, out, obj_reg, offset, /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -6557,13 +7036,13 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad( // Slow path marking the GC root `root`. SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64( - instruction, root, /* unpoison_ref_before_marking */ false); + instruction, root, /* unpoison_ref_before_marking= */ false); codegen_->AddSlowPath(slow_path); // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint. const int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg()); - __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip */ true), Immediate(0)); + __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0)); // The entrypoint is null when the GC is not marking. __ j(kNotEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -6660,7 +7139,7 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; @@ -6689,10 +7168,10 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction DCHECK(temp1 != nullptr); DCHECK(temp2 != nullptr); slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64( - instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2); + instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2); } else { slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64( - instruction, ref, /* unpoison_ref_before_marking */ true); + instruction, ref, /* unpoison_ref_before_marking= */ true); } AddSlowPath(slow_path); @@ -7005,7 +7484,7 @@ class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenera CodeGeneratorX86_64* codegen_; private: - void Process(const MemoryRegion& region, int pos) OVERRIDE { + void Process(const MemoryRegion& region, int pos) override { // Patch the correct offset for the instruction. We use the address of the // 'next' instruction, which is 'pos' (patch the 4 bytes before). int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_; @@ -7152,7 +7631,7 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code, uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; uintptr_t address = reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); - typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t; + using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t; reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] = dchecked_integral_cast<uint32_t>(address); } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 1079e94dfc..f74e130702 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -83,22 +83,22 @@ class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention { public: FieldAccessCallingConventionX86_64() {} - Location GetObjectLocation() const OVERRIDE { + Location GetObjectLocation() const override { return Location::RegisterLocation(RSI); } - Location GetFieldIndexLocation() const OVERRIDE { + Location GetFieldIndexLocation() const override { return Location::RegisterLocation(RDI); } - Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { return Location::RegisterLocation(RAX); } Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, bool is_instance) - const OVERRIDE { + const override { return is_instance ? Location::RegisterLocation(RDX) : Location::RegisterLocation(RSI); } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { return Location::FpuRegisterLocation(XMM0); } @@ -112,9 +112,9 @@ class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventio InvokeDexCallingConventionVisitorX86_64() {} virtual ~InvokeDexCallingConventionVisitorX86_64() {} - Location GetNextLocation(DataType::Type type) OVERRIDE; - Location GetReturnLocation(DataType::Type type) const OVERRIDE; - Location GetMethodLocation() const OVERRIDE; + Location GetNextLocation(DataType::Type type) override; + Location GetReturnLocation(DataType::Type type) const override; + Location GetMethodLocation() const override; private: InvokeDexCallingConvention calling_convention; @@ -129,10 +129,10 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap { ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen) : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} - void EmitMove(size_t index) OVERRIDE; - void EmitSwap(size_t index) OVERRIDE; - void SpillScratch(int reg) OVERRIDE; - void RestoreScratch(int reg) OVERRIDE; + void EmitMove(size_t index) override; + void EmitSwap(size_t index) override; + void SpillScratch(int reg) override; + void RestoreScratch(int reg) override; X86_64Assembler* GetAssembler() const; @@ -157,14 +157,15 @@ class LocationsBuilderX86_64 : public HGraphVisitor { : HGraphVisitor(graph), codegen_(codegen) {} #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -188,14 +189,15 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen); #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -208,10 +210,12 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { // the suspend call. void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp); void HandleBitwiseOperation(HBinaryOperation* operation); void GenerateRemFP(HRem* rem); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivByPowerOfTwo(HDiv* instruction); + void RemByPowerOfTwo(HRem* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleCondition(HCondition* condition); @@ -222,6 +226,10 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -291,28 +299,27 @@ class JumpTableRIPFixup; class CodeGeneratorX86_64 : public CodeGenerator { public: CodeGeneratorX86_64(HGraph* graph, - const X86_64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorX86_64() {} - void GenerateFrameEntry() OVERRIDE; - void GenerateFrameExit() OVERRIDE; - void Bind(HBasicBlock* block) OVERRIDE; - void MoveConstant(Location destination, int32_t value) OVERRIDE; - void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; - void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + void GenerateFrameEntry() override; + void GenerateFrameExit() override; + void Bind(HBasicBlock* block) override; + void MoveConstant(Location destination, int32_t value) override; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; + void AddLocationAsTemp(Location location, LocationSummary* locations) override; - size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; // Generate code to invoke a runtime entry point. void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path = nullptr) OVERRIDE; + SlowPathCode* slow_path = nullptr) override; // Generate code to invoke a runtime entry point, but do not record // PC-related information in a stack map. @@ -322,49 +329,51 @@ class CodeGeneratorX86_64 : public CodeGenerator { void GenerateInvokeRuntime(int32_t entry_point_offset); - size_t GetWordSize() const OVERRIDE { + size_t GetWordSize() const override { return kX86_64WordSize; } - size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + size_t GetFloatingPointSpillSlotSize() const override { return GetGraph()->HasSIMD() ? 2 * kX86_64WordSize // 16 bytes == 2 x86_64 words for each spill : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill } - HGraphVisitor* GetLocationBuilder() OVERRIDE { + HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } - HGraphVisitor* GetInstructionVisitor() OVERRIDE { + HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; } - X86_64Assembler* GetAssembler() OVERRIDE { + X86_64Assembler* GetAssembler() override { return &assembler_; } - const X86_64Assembler& GetAssembler() const OVERRIDE { + const X86_64Assembler& GetAssembler() const override { return assembler_; } - ParallelMoveResolverX86_64* GetMoveResolver() OVERRIDE { + ParallelMoveResolverX86_64* GetMoveResolver() override { return &move_resolver_; } - uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE { + uintptr_t GetAddressOf(HBasicBlock* block) override { return GetLabelOf(block)->Position(); } - void SetupBlockedRegisters() const OVERRIDE; - void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; - void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; - void Finalize(CodeAllocator* allocator) OVERRIDE; + void SetupBlockedRegisters() const override; + void DumpCoreRegister(std::ostream& stream, int reg) const override; + void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; + void Finalize(CodeAllocator* allocator) override; - InstructionSet GetInstructionSet() const OVERRIDE { + InstructionSet GetInstructionSet() const override { return InstructionSet::kX86_64; } + const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const; + // Emit a write barrier. void MarkGCCard(CpuRegister temp, CpuRegister card, @@ -381,35 +390,37 @@ class CodeGeneratorX86_64 : public CodeGenerator { return CommonGetLabelOf<Label>(block_labels_, block); } - void Initialize() OVERRIDE { + void Initialize() override { block_labels_ = CommonInitializeLabels<Label>(); } - bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override { return false; } // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadString::LoadKind GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + HLoadString::LoadKind desired_string_load_kind) override; // Check if the desired_class_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadClass::LoadKind GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + HLoadClass::LoadKind desired_class_load_kind) override; // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) OVERRIDE; + ArtMethod* method) override; void GenerateStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; void GenerateVirtualCall( - HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; + void RecordBootImageIntrinsicPatch(uint32_t intrinsic_data); + void RecordBootImageRelRoPatch(uint32_t boot_image_offset); void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke); void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke); void RecordBootImageTypePatch(HLoadClass* load_class); @@ -423,20 +434,17 @@ class CodeGeneratorX86_64 : public CodeGenerator { dex::TypeIndex type_index, Handle<mirror::Class> handle); - void MoveFromReturnRegister(Location trg, DataType::Type type) OVERRIDE; + void LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); - void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; void PatchJitRootUse(uint8_t* code, const uint8_t* roots_data, const PatchInfo<Label>& info, uint64_t index_in_table) const; - void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; - - const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const { - return isa_features_; - } + void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. @@ -560,6 +568,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. void Store64BitValueToStack(Location dest, int64_t value); + void MoveFromReturnRegister(Location trg, DataType::Type type) override; + // Assign a 64 bit constant to an address. void MoveInt64ToAddress(const Address& addr_low, const Address& addr_high, @@ -578,9 +588,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { } } - void GenerateNop() OVERRIDE; - void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; - void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateNop() override; + void GenerateImplicitNullCheck(HNullCheck* instruction) override; + void GenerateExplicitNullCheck(HNullCheck* instruction) override; // When we don't know the proper offset for the value, we use kDummy32BitOffset. // We will fix this up in the linker later to have the right value. @@ -598,24 +608,26 @@ class CodeGeneratorX86_64 : public CodeGenerator { InstructionCodeGeneratorX86_64 instruction_visitor_; ParallelMoveResolverX86_64 move_resolver_; X86_64Assembler assembler_; - const X86_64InstructionSetFeatures& isa_features_; // Offset to the start of the constant area in the assembled code. // Used for fixups to the constant area. int constant_area_start_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<PatchInfo<Label>> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PatchInfo<Label>> boot_image_type_patches_; - // Type patch locations for kBssEntry. + // PC-relative type patch info for kBssEntry. ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_; - // String patch locations; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<PatchInfo<Label>> boot_image_string_patches_; - // String patch locations for kBssEntry. + // PC-relative String patch info for kBssEntry. ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<PatchInfo<Label>> boot_image_intrinsic_patches_; // Patches for string literals in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc index 2e31d35584..f406983fc2 100644 --- a/compiler/optimizing/code_sinking.cc +++ b/compiler/optimizing/code_sinking.cc @@ -25,11 +25,11 @@ namespace art { -void CodeSinking::Run() { +bool CodeSinking::Run() { HBasicBlock* exit = graph_->GetExitBlock(); if (exit == nullptr) { // Infinite loop, just bail. - return; + return false; } // TODO(ngeoffray): we do not profile branches yet, so use throw instructions // as an indicator of an uncommon branch. @@ -40,6 +40,7 @@ void CodeSinking::Run() { SinkCodeToUncommonBranch(exit_predecessor); } } + return true; } static bool IsInterestingInstruction(HInstruction* instruction) { @@ -179,7 +180,7 @@ static HInstruction* FindIdealPosition(HInstruction* instruction, DCHECK(!instruction->IsPhi()); // Makes no sense for Phi. // Find the target block. - CommonDominator finder(/* start_block */ nullptr); + CommonDominator finder(/* block= */ nullptr); for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { HInstruction* user = use.GetUser(); if (!(filter && ShouldFilterUse(instruction, user, post_dominated))) { @@ -258,12 +259,12 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { size_t number_of_instructions = graph_->GetCurrentInstructionId(); ScopedArenaVector<HInstruction*> worklist(allocator.Adapter(kArenaAllocMisc)); - ArenaBitVector processed_instructions(&allocator, number_of_instructions, /* expandable */ false); + ArenaBitVector processed_instructions(&allocator, number_of_instructions, /* expandable= */ false); processed_instructions.ClearAllBits(); - ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable */ false); + ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable= */ false); post_dominated.ClearAllBits(); ArenaBitVector instructions_that_can_move( - &allocator, number_of_instructions, /* expandable */ false); + &allocator, number_of_instructions, /* expandable= */ false); instructions_that_can_move.ClearAllBits(); ScopedArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc)); @@ -413,7 +414,7 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { } // Find the position of the instruction we're storing into, filtering out this // store and all other stores to that instruction. - position = FindIdealPosition(instruction->InputAt(0), post_dominated, /* filter */ true); + position = FindIdealPosition(instruction->InputAt(0), post_dominated, /* filter= */ true); // The position needs to be dominated by the store, in order for the store to move there. if (position == nullptr || !instruction->GetBlock()->Dominates(position->GetBlock())) { @@ -433,7 +434,7 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { continue; } MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSunk); - instruction->MoveBefore(position, /* ensure_safety */ false); + instruction->MoveBefore(position, /* do_checks= */ false); } } diff --git a/compiler/optimizing/code_sinking.h b/compiler/optimizing/code_sinking.h index 836d9d4f67..8eb3a520c3 100644 --- a/compiler/optimizing/code_sinking.h +++ b/compiler/optimizing/code_sinking.h @@ -33,7 +33,7 @@ class CodeSinking : public HOptimization { const char* name = kCodeSinkingPassName) : HOptimization(graph, name, stats) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kCodeSinkingPassName = "code_sinking"; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index a0fd5ffcb1..b5a7c137f6 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -89,7 +89,8 @@ void CodegenTest::TestCode(const std::vector<uint16_t>& data, bool has_result, i HGraph* graph = CreateCFG(data); // Remove suspend checks, they cannot be executed in this context. RemoveSuspendChecks(graph); - RunCode(target_config, graph, [](HGraph*) {}, has_result, expected); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, [](HGraph*) {}, has_result, expected); } } @@ -100,7 +101,8 @@ void CodegenTest::TestCodeLong(const std::vector<uint16_t>& data, HGraph* graph = CreateCFG(data, DataType::Type::kInt64); // Remove suspend checks, they cannot be executed in this context. RemoveSuspendChecks(graph); - RunCode(target_config, graph, [](HGraph*) {}, has_result, expected); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, [](HGraph*) {}, has_result, expected); } } @@ -451,7 +453,7 @@ TEST_F(CodegenTest, NonMaterializedCondition) { ASSERT_FALSE(equal->IsEmittedAtUseSite()); graph->BuildDominatorTree(); - PrepareForRegisterAllocation(graph).Run(); + PrepareForRegisterAllocation(graph, *compiler_options_).Run(); ASSERT_TRUE(equal->IsEmittedAtUseSite()); auto hook_before_codegen = [](HGraph* graph_in) { @@ -460,7 +462,8 @@ TEST_F(CodegenTest, NonMaterializedCondition) { block->InsertInstructionBefore(move, block->GetLastInstruction()); }; - RunCode(target_config, graph, hook_before_codegen, true, 0); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, hook_before_codegen, true, 0); } } @@ -506,7 +509,8 @@ TEST_F(CodegenTest, MaterializedCondition1) { new (graph_in->GetAllocator()) HParallelMove(graph_in->GetAllocator()); block->InsertInstructionBefore(move, block->GetLastInstruction()); }; - RunCode(target_config, graph, hook_before_codegen, true, lhs[i] < rhs[i]); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, hook_before_codegen, true, lhs[i] < rhs[i]); } } } @@ -573,7 +577,8 @@ TEST_F(CodegenTest, MaterializedCondition2) { new (graph_in->GetAllocator()) HParallelMove(graph_in->GetAllocator()); block->InsertInstructionBefore(move, block->GetLastInstruction()); }; - RunCode(target_config, graph, hook_before_codegen, true, lhs[i] < rhs[i]); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, hook_before_codegen, true, lhs[i] < rhs[i]); } } } @@ -682,7 +687,8 @@ void CodegenTest::TestComparison(IfCondition condition, block->AddInstruction(new (GetAllocator()) HReturn(comparison)); graph->BuildDominatorTree(); - RunCode(target_config, graph, [](HGraph*) {}, true, expected_result); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, [](HGraph*) {}, true, expected_result); } TEST_F(CodegenTest, ComparisonsInt) { @@ -713,10 +719,9 @@ TEST_F(CodegenTest, ComparisonsLong) { #ifdef ART_ENABLE_CODEGEN_arm TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) { - std::unique_ptr<const ArmInstructionSetFeatures> features( - ArmInstructionSetFeatures::FromCppDefines()); + OverrideInstructionSetFeatures(InstructionSet::kThumb2, "default"); HGraph* graph = CreateGraph(); - arm::CodeGeneratorARMVIXL codegen(graph, *features.get(), CompilerOptions()); + arm::CodeGeneratorARMVIXL codegen(graph, *compiler_options_); codegen.Initialize(); @@ -737,10 +742,9 @@ TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) { #ifdef ART_ENABLE_CODEGEN_arm64 // Regression test for b/34760542. TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) { - std::unique_ptr<const Arm64InstructionSetFeatures> features( - Arm64InstructionSetFeatures::FromCppDefines()); + OverrideInstructionSetFeatures(InstructionSet::kArm64, "default"); HGraph* graph = CreateGraph(); - arm64::CodeGeneratorARM64 codegen(graph, *features.get(), CompilerOptions()); + arm64::CodeGeneratorARM64 codegen(graph, *compiler_options_); codegen.Initialize(); @@ -787,10 +791,9 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) { // Check that ParallelMoveResolver works fine for ARM64 for both cases when SIMD is on and off. TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) { - std::unique_ptr<const Arm64InstructionSetFeatures> features( - Arm64InstructionSetFeatures::FromCppDefines()); + OverrideInstructionSetFeatures(InstructionSet::kArm64, "default"); HGraph* graph = CreateGraph(); - arm64::CodeGeneratorARM64 codegen(graph, *features.get(), CompilerOptions()); + arm64::CodeGeneratorARM64 codegen(graph, *compiler_options_); codegen.Initialize(); @@ -820,13 +823,40 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) { InternalCodeAllocator code_allocator; codegen.Finalize(&code_allocator); } + +// Check that ART ISA Features are propagated to VIXL for arm64 (using cortex-a75 as example). +TEST_F(CodegenTest, ARM64IsaVIXLFeaturesA75) { + OverrideInstructionSetFeatures(InstructionSet::kArm64, "cortex-a75"); + HGraph* graph = CreateGraph(); + arm64::CodeGeneratorARM64 codegen(graph, *compiler_options_); + vixl::CPUFeatures* features = codegen.GetVIXLAssembler()->GetCPUFeatures(); + + EXPECT_TRUE(features->Has(vixl::CPUFeatures::kCRC32)); + EXPECT_TRUE(features->Has(vixl::CPUFeatures::kDotProduct)); + EXPECT_TRUE(features->Has(vixl::CPUFeatures::kFPHalf)); + EXPECT_TRUE(features->Has(vixl::CPUFeatures::kAtomics)); +} + +// Check that ART ISA Features are propagated to VIXL for arm64 (using cortex-a53 as example). +TEST_F(CodegenTest, ARM64IsaVIXLFeaturesA53) { + OverrideInstructionSetFeatures(InstructionSet::kArm64, "cortex-a53"); + HGraph* graph = CreateGraph(); + arm64::CodeGeneratorARM64 codegen(graph, *compiler_options_); + vixl::CPUFeatures* features = codegen.GetVIXLAssembler()->GetCPUFeatures(); + + EXPECT_TRUE(features->Has(vixl::CPUFeatures::kCRC32)); + EXPECT_FALSE(features->Has(vixl::CPUFeatures::kDotProduct)); + EXPECT_FALSE(features->Has(vixl::CPUFeatures::kFPHalf)); + EXPECT_FALSE(features->Has(vixl::CPUFeatures::kAtomics)); +} + #endif #ifdef ART_ENABLE_CODEGEN_mips TEST_F(CodegenTest, MipsClobberRA) { - std::unique_ptr<const MipsInstructionSetFeatures> features_mips( - MipsInstructionSetFeatures::FromCppDefines()); - if (!CanExecute(InstructionSet::kMips) || features_mips->IsR6()) { + OverrideInstructionSetFeatures(InstructionSet::kMips, "mips32r"); + CHECK(!instruction_set_features_->AsMipsInstructionSetFeatures()->IsR6()); + if (!CanExecute(InstructionSet::kMips)) { // HMipsComputeBaseMethodAddress and the NAL instruction behind it // should only be generated on non-R6. return; @@ -860,7 +890,7 @@ TEST_F(CodegenTest, MipsClobberRA) { graph->BuildDominatorTree(); - mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), CompilerOptions()); + mips::CodeGeneratorMIPS codegenMIPS(graph, *compiler_options_); // Since there isn't HLoadClass or HLoadString, we need to manually indicate // that RA is clobbered and the method entry code should generate a stack frame // and preserve RA in it. And this is what we're testing here. diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h index c41c290c8b..dde39d46f3 100644 --- a/compiler/optimizing/codegen_test_utils.h +++ b/compiler/optimizing/codegen_test_utils.h @@ -17,17 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_ #define ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_ -#include "arch/arm/instruction_set_features_arm.h" #include "arch/arm/registers_arm.h" -#include "arch/arm64/instruction_set_features_arm64.h" #include "arch/instruction_set.h" -#include "arch/mips/instruction_set_features_mips.h" #include "arch/mips/registers_mips.h" -#include "arch/mips64/instruction_set_features_mips64.h" #include "arch/mips64/registers_mips64.h" -#include "arch/x86/instruction_set_features_x86.h" #include "arch/x86/registers_x86.h" -#include "arch/x86_64/instruction_set_features_x86_64.h" #include "code_simulator.h" #include "code_simulator_container.h" #include "common_compiler_test.h" @@ -101,15 +95,13 @@ class CodegenTargetConfig { // to just overwrite the code generator. class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL { public: - TestCodeGeneratorARMVIXL(HGraph* graph, - const ArmInstructionSetFeatures& isa_features, - const CompilerOptions& compiler_options) - : arm::CodeGeneratorARMVIXL(graph, isa_features, compiler_options) { + TestCodeGeneratorARMVIXL(HGraph* graph, const CompilerOptions& compiler_options) + : arm::CodeGeneratorARMVIXL(graph, compiler_options) { AddAllocatedRegister(Location::RegisterLocation(arm::R6)); AddAllocatedRegister(Location::RegisterLocation(arm::R7)); } - void SetupBlockedRegisters() const OVERRIDE { + void SetupBlockedRegisters() const override { arm::CodeGeneratorARMVIXL::SetupBlockedRegisters(); blocked_core_registers_[arm::R4] = true; blocked_core_registers_[arm::R6] = false; @@ -117,7 +109,7 @@ class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL { } void MaybeGenerateMarkingRegisterCheck(int code ATTRIBUTE_UNUSED, - Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE { + Location temp_loc ATTRIBUTE_UNUSED) override { // When turned on, the marking register checks in // CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck expects the // Thread Register and the Marking Register to be set to @@ -145,13 +137,11 @@ class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL { // function. class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 { public: - TestCodeGeneratorARM64(HGraph* graph, - const Arm64InstructionSetFeatures& isa_features, - const CompilerOptions& compiler_options) - : arm64::CodeGeneratorARM64(graph, isa_features, compiler_options) {} + TestCodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options) + : arm64::CodeGeneratorARM64(graph, compiler_options) {} void MaybeGenerateMarkingRegisterCheck(int codem ATTRIBUTE_UNUSED, - Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE { + Location temp_loc ATTRIBUTE_UNUSED) override { // When turned on, the marking register checks in // CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck expect the // Thread Register and the Marking Register to be set to @@ -165,15 +155,13 @@ class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 { #ifdef ART_ENABLE_CODEGEN_x86 class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 { public: - TestCodeGeneratorX86(HGraph* graph, - const X86InstructionSetFeatures& isa_features, - const CompilerOptions& compiler_options) - : x86::CodeGeneratorX86(graph, isa_features, compiler_options) { + TestCodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options) + : x86::CodeGeneratorX86(graph, compiler_options) { // Save edi, we need it for getting enough registers for long multiplication. AddAllocatedRegister(Location::RegisterLocation(x86::EDI)); } - void SetupBlockedRegisters() const OVERRIDE { + void SetupBlockedRegisters() const override { x86::CodeGeneratorX86::SetupBlockedRegisters(); // ebx is a callee-save register in C, but caller-save for ART. blocked_core_registers_[x86::EBX] = true; @@ -188,14 +176,16 @@ class InternalCodeAllocator : public CodeAllocator { public: InternalCodeAllocator() : size_(0) { } - virtual uint8_t* Allocate(size_t size) { + uint8_t* Allocate(size_t size) override { size_ = size; memory_.reset(new uint8_t[size]); return memory_.get(); } size_t GetSize() const { return size_; } - uint8_t* GetMemory() const { return memory_.get(); } + ArrayRef<const uint8_t> GetMemory() const override { + return ArrayRef<const uint8_t>(memory_.get(), size_); + } private: size_t size_; @@ -269,8 +259,8 @@ static void Run(const InternalCodeAllocator& allocator, InstructionSet target_isa = codegen.GetInstructionSet(); typedef Expected (*fptr)(); - CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize()); - fptr f = reinterpret_cast<fptr>(allocator.GetMemory()); + CommonCompilerTest::MakeExecutable(allocator.GetMemory().data(), allocator.GetMemory().size()); + fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(allocator.GetMemory().data())); if (target_isa == InstructionSet::kThumb2) { // For thumb we need the bottom bit set. f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1); @@ -298,7 +288,7 @@ static void RunCodeNoCheck(CodeGenerator* codegen, { ScopedArenaAllocator local_allocator(graph->GetArenaStack()); SsaLivenessAnalysis liveness(graph, codegen, &local_allocator); - PrepareForRegisterAllocation(graph).Run(); + PrepareForRegisterAllocation(graph, codegen->GetCompilerOptions()).Run(); liveness.Analyze(); std::unique_ptr<RegisterAllocator> register_allocator = RegisterAllocator::Create(&local_allocator, codegen, liveness); @@ -322,11 +312,11 @@ static void RunCode(CodeGenerator* codegen, template <typename Expected> static void RunCode(CodegenTargetConfig target_config, + const CompilerOptions& compiler_options, HGraph* graph, std::function<void(HGraph*)> hook_before_codegen, bool has_result, Expected expected) { - CompilerOptions compiler_options; std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph, compiler_options)); RunCode(codegen.get(), graph, hook_before_codegen, has_result, expected); @@ -334,55 +324,37 @@ static void RunCode(CodegenTargetConfig target_config, #ifdef ART_ENABLE_CODEGEN_arm CodeGenerator* create_codegen_arm_vixl32(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const ArmInstructionSetFeatures> features_arm( - ArmInstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) - TestCodeGeneratorARMVIXL(graph, *features_arm.get(), compiler_options); + return new (graph->GetAllocator()) TestCodeGeneratorARMVIXL(graph, compiler_options); } #endif #ifdef ART_ENABLE_CODEGEN_arm64 CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64( - Arm64InstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) - TestCodeGeneratorARM64(graph, *features_arm64.get(), compiler_options); + return new (graph->GetAllocator()) TestCodeGeneratorARM64(graph, compiler_options); } #endif #ifdef ART_ENABLE_CODEGEN_x86 CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) TestCodeGeneratorX86( - graph, *features_x86.get(), compiler_options); + return new (graph->GetAllocator()) TestCodeGeneratorX86(graph, compiler_options); } #endif #ifdef ART_ENABLE_CODEGEN_x86_64 CodeGenerator* create_codegen_x86_64(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( - X86_64InstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) - x86_64::CodeGeneratorX86_64(graph, *features_x86_64.get(), compiler_options); + return new (graph->GetAllocator()) x86_64::CodeGeneratorX86_64(graph, compiler_options); } #endif #ifdef ART_ENABLE_CODEGEN_mips CodeGenerator* create_codegen_mips(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const MipsInstructionSetFeatures> features_mips( - MipsInstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) - mips::CodeGeneratorMIPS(graph, *features_mips.get(), compiler_options); + return new (graph->GetAllocator()) mips::CodeGeneratorMIPS(graph, compiler_options); } #endif #ifdef ART_ENABLE_CODEGEN_mips64 CodeGenerator* create_codegen_mips64(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64( - Mips64InstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) - mips64::CodeGeneratorMIPS64(graph, *features_mips64.get(), compiler_options); + return new (graph->GetAllocator()) mips64::CodeGeneratorMIPS64(graph, compiler_options); } #endif diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h index 356ff9f41f..7d3af9521a 100644 --- a/compiler/optimizing/common_arm.h +++ b/compiler/optimizing/common_arm.h @@ -17,7 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ #define ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ -#include "debug/dwarf/register.h" +#include "dwarf/register.h" #include "instruction_simplifier_shared.h" #include "locations.h" #include "nodes.h" diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index ed2f8e995d..5556f16740 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -151,23 +151,15 @@ inline vixl::aarch64::CPURegister InputCPURegisterOrZeroRegAt(HInstruction* inst return InputCPURegisterAt(instr, index); } -inline int64_t Int64ConstantFrom(Location location) { - HConstant* instr = location.GetConstant(); - if (instr->IsIntConstant()) { - return instr->AsIntConstant()->GetValue(); - } else if (instr->IsNullConstant()) { - return 0; - } else { - DCHECK(instr->IsLongConstant()) << instr->DebugName(); - return instr->AsLongConstant()->GetValue(); - } +inline int64_t Int64FromLocation(Location location) { + return Int64FromConstant(location.GetConstant()); } inline vixl::aarch64::Operand OperandFrom(Location location, DataType::Type type) { if (location.IsRegister()) { return vixl::aarch64::Operand(RegisterFrom(location, type)); } else { - return vixl::aarch64::Operand(Int64ConstantFrom(location)); + return vixl::aarch64::Operand(Int64FromLocation(location)); } } @@ -234,6 +226,13 @@ inline vixl::aarch64::Operand OperandFromMemOperand( } } +inline bool AddSubCanEncodeAsImmediate(int64_t value) { + // If `value` does not fit but `-value` does, VIXL will automatically use + // the 'opposite' instruction. + return vixl::aarch64::Assembler::IsImmAddSub(value) + || vixl::aarch64::Assembler::IsImmAddSub(-value); +} + inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) { int64_t value = CodeGenerator::GetInt64ValueOf(constant); @@ -249,6 +248,20 @@ inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* return IsUint<8>(value); } + // Code generation for Min/Max: + // Cmp left_op, right_op + // Csel dst, left_op, right_op, cond + if (instr->IsMin() || instr->IsMax()) { + if (constant->GetUses().HasExactlyOneElement()) { + // If value can be encoded as immediate for the Cmp, then let VIXL handle + // the constant generation for the Csel. + return AddSubCanEncodeAsImmediate(value); + } + // These values are encodable as immediates for Cmp and VIXL will use csinc and csinv + // with the zr register as right_op, hence no constant generation is required. + return constant->IsZeroBitPattern() || constant->IsOne() || constant->IsMinusOne(); + } + // For single uses we let VIXL handle the constant generation since it will // use registers that are not managed by the register allocator (wip0, wip1). if (constant->GetUses().HasExactlyOneElement()) { @@ -275,10 +288,7 @@ inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr->IsSub()) << instr->DebugName(); // Uses aliases of ADD/SUB instructions. - // If `value` does not fit but `-value` does, VIXL will automatically use - // the 'opposite' instruction. - return vixl::aarch64::Assembler::IsImmAddSub(value) - || vixl::aarch64::Assembler::IsImmAddSub(-value); + return AddSubCanEncodeAsImmediate(value); } } diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc index 6f11e628ee..09e7cabfa4 100644 --- a/compiler/optimizing/constant_folding.cc +++ b/compiler/optimizing/constant_folding.cc @@ -26,13 +26,13 @@ class HConstantFoldingVisitor : public HGraphDelegateVisitor { : HGraphDelegateVisitor(graph) {} private: - void VisitBasicBlock(HBasicBlock* block) OVERRIDE; + void VisitBasicBlock(HBasicBlock* block) override; - void VisitUnaryOperation(HUnaryOperation* inst) OVERRIDE; - void VisitBinaryOperation(HBinaryOperation* inst) OVERRIDE; + void VisitUnaryOperation(HUnaryOperation* inst) override; + void VisitBinaryOperation(HBinaryOperation* inst) override; - void VisitTypeConversion(HTypeConversion* inst) OVERRIDE; - void VisitDivZeroCheck(HDivZeroCheck* inst) OVERRIDE; + void VisitTypeConversion(HTypeConversion* inst) override; + void VisitDivZeroCheck(HDivZeroCheck* inst) override; DISALLOW_COPY_AND_ASSIGN(HConstantFoldingVisitor); }; @@ -47,34 +47,35 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor { private: void VisitShift(HBinaryOperation* shift); - void VisitEqual(HEqual* instruction) OVERRIDE; - void VisitNotEqual(HNotEqual* instruction) OVERRIDE; - - void VisitAbove(HAbove* instruction) OVERRIDE; - void VisitAboveOrEqual(HAboveOrEqual* instruction) OVERRIDE; - void VisitBelow(HBelow* instruction) OVERRIDE; - void VisitBelowOrEqual(HBelowOrEqual* instruction) OVERRIDE; - - void VisitAnd(HAnd* instruction) OVERRIDE; - void VisitCompare(HCompare* instruction) OVERRIDE; - void VisitMul(HMul* instruction) OVERRIDE; - void VisitOr(HOr* instruction) OVERRIDE; - void VisitRem(HRem* instruction) OVERRIDE; - void VisitShl(HShl* instruction) OVERRIDE; - void VisitShr(HShr* instruction) OVERRIDE; - void VisitSub(HSub* instruction) OVERRIDE; - void VisitUShr(HUShr* instruction) OVERRIDE; - void VisitXor(HXor* instruction) OVERRIDE; + void VisitEqual(HEqual* instruction) override; + void VisitNotEqual(HNotEqual* instruction) override; + + void VisitAbove(HAbove* instruction) override; + void VisitAboveOrEqual(HAboveOrEqual* instruction) override; + void VisitBelow(HBelow* instruction) override; + void VisitBelowOrEqual(HBelowOrEqual* instruction) override; + + void VisitAnd(HAnd* instruction) override; + void VisitCompare(HCompare* instruction) override; + void VisitMul(HMul* instruction) override; + void VisitOr(HOr* instruction) override; + void VisitRem(HRem* instruction) override; + void VisitShl(HShl* instruction) override; + void VisitShr(HShr* instruction) override; + void VisitSub(HSub* instruction) override; + void VisitUShr(HUShr* instruction) override; + void VisitXor(HXor* instruction) override; }; -void HConstantFolding::Run() { +bool HConstantFolding::Run() { HConstantFoldingVisitor visitor(graph_); // Process basic blocks in reverse post-order in the dominator tree, // so that an instruction turned into a constant, used as input of // another instruction, may possibly be used to turn that second // instruction into a constant as well. visitor.VisitReversePostOrder(); + return true; } diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h index 05c6df4a93..72bd95b3cb 100644 --- a/compiler/optimizing/constant_folding.h +++ b/compiler/optimizing/constant_folding.h @@ -41,7 +41,7 @@ class HConstantFolding : public HOptimization { public: HConstantFolding(HGraph* graph, const char* name) : HOptimization(graph, name) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kConstantFoldingPassName = "constant_folding"; diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index d27104752b..74d9d3a993 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -16,8 +16,6 @@ #include <functional> -#include "arch/x86/instruction_set_features_x86.h" -#include "code_generator_x86.h" #include "constant_folding.h" #include "dead_code_elimination.h" #include "driver/compiler_options.h" @@ -60,9 +58,6 @@ class ConstantFoldingTest : public OptimizingUnitTest { std::string actual_before = printer_before.str(); EXPECT_EQ(expected_before, actual_before); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegenX86(graph_, *features_x86.get(), CompilerOptions()); HConstantFolding(graph_, "constant_folding").Run(); GraphChecker graph_checker_cf(graph_); graph_checker_cf.Run(); @@ -75,7 +70,7 @@ class ConstantFoldingTest : public OptimizingUnitTest { check_after_cf(graph_); - HDeadCodeElimination(graph_, nullptr /* stats */, "dead_code_elimination").Run(); + HDeadCodeElimination(graph_, /* stats= */ nullptr, "dead_code_elimination").Run(); GraphChecker graph_checker_dce(graph_); graph_checker_dce.Run(); ASSERT_TRUE(graph_checker_dce.IsValid()); diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc index 4a66cd2265..3a1a9e023d 100644 --- a/compiler/optimizing/constructor_fence_redundancy_elimination.cc +++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc @@ -34,7 +34,7 @@ class CFREVisitor : public HGraphVisitor { candidate_fence_targets_(scoped_allocator_.Adapter(kArenaAllocCFRE)), stats_(stats) {} - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + void VisitBasicBlock(HBasicBlock* block) override { // Visit all instructions in block. HGraphVisitor::VisitBasicBlock(block); @@ -43,86 +43,86 @@ class CFREVisitor : public HGraphVisitor { MergeCandidateFences(); } - void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE { + void VisitConstructorFence(HConstructorFence* constructor_fence) override { candidate_fences_.push_back(constructor_fence); for (size_t input_idx = 0; input_idx < constructor_fence->InputCount(); ++input_idx) { - candidate_fence_targets_.Insert(constructor_fence->InputAt(input_idx)); + candidate_fence_targets_.insert(constructor_fence->InputAt(input_idx)); } } - void VisitBoundType(HBoundType* bound_type) OVERRIDE { + void VisitBoundType(HBoundType* bound_type) override { VisitAlias(bound_type); } - void VisitNullCheck(HNullCheck* null_check) OVERRIDE { + void VisitNullCheck(HNullCheck* null_check) override { VisitAlias(null_check); } - void VisitSelect(HSelect* select) OVERRIDE { + void VisitSelect(HSelect* select) override { VisitAlias(select); } - void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { + void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override { HInstruction* value = instruction->InputAt(1); VisitSetLocation(instruction, value); } - void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { + void VisitStaticFieldSet(HStaticFieldSet* instruction) override { HInstruction* value = instruction->InputAt(1); VisitSetLocation(instruction, value); } - void VisitArraySet(HArraySet* instruction) OVERRIDE { + void VisitArraySet(HArraySet* instruction) override { HInstruction* value = instruction->InputAt(2); VisitSetLocation(instruction, value); } - void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) { + void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) override { // Pessimize: Merge all fences. MergeCandidateFences(); } - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override { HandleInvoke(invoke); } - void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE { + void VisitInvokeVirtual(HInvokeVirtual* invoke) override { HandleInvoke(invoke); } - void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE { + void VisitInvokeInterface(HInvokeInterface* invoke) override { HandleInvoke(invoke); } - void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE { + void VisitInvokeUnresolved(HInvokeUnresolved* invoke) override { HandleInvoke(invoke); } - void VisitInvokePolymorphic(HInvokePolymorphic* invoke) OVERRIDE { + void VisitInvokePolymorphic(HInvokePolymorphic* invoke) override { HandleInvoke(invoke); } - void VisitClinitCheck(HClinitCheck* clinit) OVERRIDE { + void VisitClinitCheck(HClinitCheck* clinit) override { HandleInvoke(clinit); } - void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) OVERRIDE { + void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } - void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) OVERRIDE { + void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } - void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) OVERRIDE { + void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } - void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) OVERRIDE { + void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } @@ -208,13 +208,13 @@ class CFREVisitor : public HGraphVisitor { // there is no benefit to this extra complexity unless we also reordered // the stores to come later. candidate_fences_.clear(); - candidate_fence_targets_.Clear(); + candidate_fence_targets_.clear(); } // A publishing 'store' is only interesting if the value being stored // is one of the fence `targets` in `candidate_fences`. bool IsInterestingPublishTarget(HInstruction* store_input) const { - return candidate_fence_targets_.Find(store_input) != candidate_fence_targets_.end(); + return candidate_fence_targets_.find(store_input) != candidate_fence_targets_.end(); } void MaybeMerge(HConstructorFence* target, HConstructorFence* src) { @@ -250,13 +250,14 @@ class CFREVisitor : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(CFREVisitor); }; -void ConstructorFenceRedundancyElimination::Run() { +bool ConstructorFenceRedundancyElimination::Run() { CFREVisitor cfre_visitor(graph_, stats_); // Arbitrarily visit in reverse-post order. // The exact block visit order does not matter, as the algorithm // only operates on a single block at a time. cfre_visitor.VisitReversePostOrder(); + return true; } } // namespace art diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.h b/compiler/optimizing/constructor_fence_redundancy_elimination.h index f4b06d5544..014b342258 100644 --- a/compiler/optimizing/constructor_fence_redundancy_elimination.h +++ b/compiler/optimizing/constructor_fence_redundancy_elimination.h @@ -52,7 +52,7 @@ class ConstructorFenceRedundancyElimination : public HOptimization { const char* name = kCFREPassName) : HOptimization(graph, name, stats) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kCFREPassName = "constructor_fence_redundancy_elimination"; diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h index 4a6c91459f..3cbcc9e0c3 100644 --- a/compiler/optimizing/data_type.h +++ b/compiler/optimizing/data_type.h @@ -210,6 +210,42 @@ class DataType { static bool IsTypeConversionImplicit(Type input_type, Type result_type); static bool IsTypeConversionImplicit(int64_t value, Type result_type); + static bool IsZeroExtension(Type input_type, Type result_type) { + return IsIntOrLongType(result_type) && + IsUnsignedType(input_type) && + Size(result_type) > Size(input_type); + } + + static Type ToSigned(Type type) { + switch (type) { + case Type::kUint8: + return Type::kInt8; + case Type::kUint16: + return Type::kInt16; + case Type::kUint32: + return Type::kInt32; + case Type::kUint64: + return Type::kInt64; + default: + return type; + } + } + + static Type ToUnsigned(Type type) { + switch (type) { + case Type::kInt8: + return Type::kUint8; + case Type::kInt16: + return Type::kUint16; + case Type::kInt32: + return Type::kUint32; + case Type::kInt64: + return Type::kUint64; + default: + return type; + } + } + static const char* PrettyDescriptor(Type type); private: diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 9fa0f72e80..1dc10948cc 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -508,7 +508,7 @@ void HDeadCodeElimination::RemoveDeadInstructions() { } } -void HDeadCodeElimination::Run() { +bool HDeadCodeElimination::Run() { // Do not eliminate dead blocks if the graph has irreducible loops. We could // support it, but that would require changes in our loop representation to handle // multiple entry points. We decided it was not worth the complexity. @@ -526,6 +526,7 @@ void HDeadCodeElimination::Run() { } SsaRedundantPhiElimination(graph_).Run(); RemoveDeadInstructions(); + return true; } } // namespace art diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index 92a7f562e1..799721acf2 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -32,7 +32,8 @@ class HDeadCodeElimination : public HOptimization { HDeadCodeElimination(HGraph* graph, OptimizingCompilerStats* stats, const char* name) : HOptimization(graph, name, stats) {} - void Run() OVERRIDE; + bool Run() override; + static constexpr const char* kDeadCodeEliminationPassName = "dead_code_elimination"; private: diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index adb6ce1187..f5cd4dc27a 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -16,8 +16,6 @@ #include "dead_code_elimination.h" -#include "arch/x86/instruction_set_features_x86.h" -#include "code_generator_x86.h" #include "driver/compiler_options.h" #include "graph_checker.h" #include "optimizing_unit_test.h" @@ -45,10 +43,7 @@ void DeadCodeEliminationTest::TestCode(const std::vector<uint16_t>& data, std::string actual_before = printer_before.str(); ASSERT_EQ(actual_before, expected_before); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions()); - HDeadCodeElimination(graph, nullptr /* stats */, "dead_code_elimination").Run(); + HDeadCodeElimination(graph, /* stats= */ nullptr, "dead_code_elimination").Run(); GraphChecker graph_checker(graph); graph_checker.Run(); ASSERT_TRUE(graph_checker.IsValid()); diff --git a/compiler/optimizing/emit_swap_mips_test.cc b/compiler/optimizing/emit_swap_mips_test.cc index b63914faf7..63a370a47b 100644 --- a/compiler/optimizing/emit_swap_mips_test.cc +++ b/compiler/optimizing/emit_swap_mips_test.cc @@ -27,12 +27,13 @@ namespace art { class EmitSwapMipsTest : public OptimizingUnitTest { public: - void SetUp() OVERRIDE { + void SetUp() override { + instruction_set_ = InstructionSet::kMips; + instruction_set_features_ = MipsInstructionSetFeatures::FromCppDefines(); + OptimizingUnitTest::SetUp(); graph_ = CreateGraph(); - isa_features_ = MipsInstructionSetFeatures::FromCppDefines(); - codegen_ = new (graph_->GetAllocator()) mips::CodeGeneratorMIPS(graph_, - *isa_features_.get(), - CompilerOptions()); + codegen_.reset( + new (graph_->GetAllocator()) mips::CodeGeneratorMIPS(graph_, *compiler_options_)); moves_ = new (GetAllocator()) HParallelMove(GetAllocator()); test_helper_.reset( new AssemblerTestInfrastructure(GetArchitectureString(), @@ -45,10 +46,12 @@ class EmitSwapMipsTest : public OptimizingUnitTest { GetAssemblyHeader())); } - void TearDown() OVERRIDE { + void TearDown() override { test_helper_.reset(); - isa_features_.reset(); + codegen_.reset(); + graph_ = nullptr; ResetPoolAndAllocator(); + OptimizingUnitTest::TearDown(); } // Get the typically used name for this architecture. @@ -106,10 +109,9 @@ class EmitSwapMipsTest : public OptimizingUnitTest { protected: HGraph* graph_; HParallelMove* moves_; - mips::CodeGeneratorMIPS* codegen_; + std::unique_ptr<mips::CodeGeneratorMIPS> codegen_; mips::MipsAssembler* assembler_; std::unique_ptr<AssemblerTestInfrastructure> test_helper_; - std::unique_ptr<const MipsInstructionSetFeatures> isa_features_; }; TEST_F(EmitSwapMipsTest, TwoRegisters) { diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index c88baa8610..01d9603802 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -25,6 +25,11 @@ #include "base/bit_vector-inl.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" +#include "handle.h" +#include "mirror/class.h" +#include "obj_ptr-inl.h" +#include "scoped_thread_state_change-inl.h" +#include "subtype_check.h" namespace art { @@ -53,6 +58,30 @@ static bool IsExitTryBoundaryIntoExitBlock(HBasicBlock* block) { !boundary->IsEntry(); } + +size_t GraphChecker::Run(bool pass_change, size_t last_size) { + size_t current_size = GetGraph()->GetReversePostOrder().size(); + if (!pass_change) { + // Nothing changed for certain. Do a quick sanity check on that assertion + // for anything other than the first call (when last size was still 0). + if (last_size != 0) { + if (current_size != last_size) { + AddError(StringPrintf("Incorrect no-change assertion, " + "last graph size %zu vs current graph size %zu", + last_size, current_size)); + } + } + // TODO: if we would trust the "false" value of the flag completely, we + // could skip checking the graph at this point. + } + + // VisitReversePostOrder is used instead of VisitInsertionOrder, + // as the latter might visit dead blocks removed by the dominator + // computation. + VisitReversePostOrder(); + return current_size; +} + void GraphChecker::VisitBasicBlock(HBasicBlock* block) { current_block_ = block; @@ -548,30 +577,85 @@ void GraphChecker::VisitReturnVoid(HReturnVoid* ret) { } } -void GraphChecker::VisitCheckCast(HCheckCast* check) { - VisitInstruction(check); - HInstruction* input = check->InputAt(1); - if (!input->IsLoadClass()) { - AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.", +void GraphChecker::CheckTypeCheckBitstringInput(HTypeCheckInstruction* check, + size_t input_pos, + bool check_value, + uint32_t expected_value, + const char* name) { + if (!check->InputAt(input_pos)->IsIntConstant()) { + AddError(StringPrintf("%s:%d (bitstring) expects a HIntConstant input %zu (%s), not %s:%d.", check->DebugName(), check->GetId(), - input->DebugName(), - input->GetId())); + input_pos, + name, + check->InputAt(2)->DebugName(), + check->InputAt(2)->GetId())); + } else if (check_value) { + uint32_t actual_value = + static_cast<uint32_t>(check->InputAt(input_pos)->AsIntConstant()->GetValue()); + if (actual_value != expected_value) { + AddError(StringPrintf("%s:%d (bitstring) has %s 0x%x, not 0x%x as expected.", + check->DebugName(), + check->GetId(), + name, + actual_value, + expected_value)); + } } } -void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) { - VisitInstruction(instruction); - HInstruction* input = instruction->InputAt(1); - if (!input->IsLoadClass()) { - AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.", - instruction->DebugName(), - instruction->GetId(), - input->DebugName(), - input->GetId())); +void GraphChecker::HandleTypeCheckInstruction(HTypeCheckInstruction* check) { + VisitInstruction(check); + HInstruction* input = check->InputAt(1); + if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + if (!input->IsNullConstant()) { + AddError(StringPrintf("%s:%d (bitstring) expects a HNullConstant as second input, not %s:%d.", + check->DebugName(), + check->GetId(), + input->DebugName(), + input->GetId())); + } + bool check_values = false; + BitString::StorageType expected_path_to_root = 0u; + BitString::StorageType expected_mask = 0u; + { + ScopedObjectAccess soa(Thread::Current()); + ObjPtr<mirror::Class> klass = check->GetClass().Get(); + MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); + SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass); + if (state == SubtypeCheckInfo::kAssigned) { + expected_path_to_root = + SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass); + expected_mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass); + check_values = true; + } else { + AddError(StringPrintf("%s:%d (bitstring) references a class with unassigned bitstring.", + check->DebugName(), + check->GetId())); + } + } + CheckTypeCheckBitstringInput( + check, /* input_pos= */ 2, check_values, expected_path_to_root, "path_to_root"); + CheckTypeCheckBitstringInput(check, /* input_pos= */ 3, check_values, expected_mask, "mask"); + } else { + if (!input->IsLoadClass()) { + AddError(StringPrintf("%s:%d (classic) expects a HLoadClass as second input, not %s:%d.", + check->DebugName(), + check->GetId(), + input->DebugName(), + input->GetId())); + } } } +void GraphChecker::VisitCheckCast(HCheckCast* check) { + HandleTypeCheckInstruction(check); +} + +void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) { + HandleTypeCheckInstruction(instruction); +} + void GraphChecker::HandleLoop(HBasicBlock* loop_header) { int id = loop_header->GetBlockId(); HLoopInformation* loop_information = loop_header->GetLoopInformation(); @@ -847,7 +931,7 @@ void GraphChecker::VisitPhi(HPhi* phi) { // because the BitVector reallocation strategy has very bad worst-case behavior. ArenaBitVector visited(&allocator, GetGraph()->GetCurrentInstructionId(), - /* expandable */ false, + /* expandable= */ false, kArenaAllocGraphChecker); visited.ClearAllBits(); if (!IsConstantEquivalent(phi, other_phi, &visited)) { diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 0f0b49d240..d085609197 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -38,39 +38,43 @@ class GraphChecker : public HGraphDelegateVisitor { seen_ids_.ClearAllBits(); } - // Check the whole graph (in reverse post-order). - void Run() { - // VisitReversePostOrder is used instead of VisitInsertionOrder, - // as the latter might visit dead blocks removed by the dominator - // computation. - VisitReversePostOrder(); - } - - void VisitBasicBlock(HBasicBlock* block) OVERRIDE; - - void VisitInstruction(HInstruction* instruction) OVERRIDE; - void VisitPhi(HPhi* phi) OVERRIDE; - - void VisitBinaryOperation(HBinaryOperation* op) OVERRIDE; - void VisitBooleanNot(HBooleanNot* instruction) OVERRIDE; - void VisitBoundType(HBoundType* instruction) OVERRIDE; - void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE; - void VisitCheckCast(HCheckCast* check) OVERRIDE; - void VisitCondition(HCondition* op) OVERRIDE; - void VisitConstant(HConstant* instruction) OVERRIDE; - void VisitDeoptimize(HDeoptimize* instruction) OVERRIDE; - void VisitIf(HIf* instruction) OVERRIDE; - void VisitInstanceOf(HInstanceOf* check) OVERRIDE; - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; - void VisitLoadException(HLoadException* load) OVERRIDE; - void VisitNeg(HNeg* instruction) OVERRIDE; - void VisitPackedSwitch(HPackedSwitch* instruction) OVERRIDE; - void VisitReturn(HReturn* ret) OVERRIDE; - void VisitReturnVoid(HReturnVoid* ret) OVERRIDE; - void VisitSelect(HSelect* instruction) OVERRIDE; - void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE; - void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; - + // Check the whole graph. The pass_change parameter indicates whether changes + // may have occurred during the just executed pass. The default value is + // conservatively "true" (something may have changed). The last_size parameter + // and return value pass along the observed graph sizes. + size_t Run(bool pass_change = true, size_t last_size = 0); + + void VisitBasicBlock(HBasicBlock* block) override; + + void VisitInstruction(HInstruction* instruction) override; + void VisitPhi(HPhi* phi) override; + + void VisitBinaryOperation(HBinaryOperation* op) override; + void VisitBooleanNot(HBooleanNot* instruction) override; + void VisitBoundType(HBoundType* instruction) override; + void VisitBoundsCheck(HBoundsCheck* check) override; + void VisitCheckCast(HCheckCast* check) override; + void VisitCondition(HCondition* op) override; + void VisitConstant(HConstant* instruction) override; + void VisitDeoptimize(HDeoptimize* instruction) override; + void VisitIf(HIf* instruction) override; + void VisitInstanceOf(HInstanceOf* check) override; + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override; + void VisitLoadException(HLoadException* load) override; + void VisitNeg(HNeg* instruction) override; + void VisitPackedSwitch(HPackedSwitch* instruction) override; + void VisitReturn(HReturn* ret) override; + void VisitReturnVoid(HReturnVoid* ret) override; + void VisitSelect(HSelect* instruction) override; + void VisitTryBoundary(HTryBoundary* try_boundary) override; + void VisitTypeConversion(HTypeConversion* instruction) override; + + void CheckTypeCheckBitstringInput(HTypeCheckInstruction* check, + size_t input_pos, + bool check_value, + uint32_t expected_value, + const char* name); + void HandleTypeCheckInstruction(HTypeCheckInstruction* instruction); void HandleLoop(HBasicBlock* loop_header); void HandleBooleanInput(HInstruction* instruction, size_t input_index); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 5ff31cead5..2a7bbcb72f 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -106,8 +106,7 @@ std::ostream& operator<<(std::ostream& os, const StringList& list) { } } -typedef Disassembler* create_disasm_prototype(InstructionSet instruction_set, - DisassemblerOptions* options); +using create_disasm_prototype = Disassembler*(InstructionSet, DisassemblerOptions*); class HGraphVisualizerDisassembler { public: HGraphVisualizerDisassembler(InstructionSet instruction_set, @@ -131,10 +130,10 @@ class HGraphVisualizerDisassembler { // been generated, so we can read data in literal pools. disassembler_ = std::unique_ptr<Disassembler>((*create_disassembler)( instruction_set, - new DisassemblerOptions(/* absolute_addresses */ false, + new DisassemblerOptions(/* absolute_addresses= */ false, base_address, end_address, - /* can_read_literals */ true, + /* can_read_literals= */ true, Is64BitInstructionSet(instruction_set) ? &Thread::DumpThreadOffset<PointerSize::k64> : &Thread::DumpThreadOffset<PointerSize::k32>))); @@ -333,7 +332,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { return output_; } - void VisitParallelMove(HParallelMove* instruction) OVERRIDE { + void VisitParallelMove(HParallelMove* instruction) override { StartAttributeStream("liveness") << instruction->GetLifetimePosition(); StringList moves; for (size_t i = 0, e = instruction->NumMoves(); i < e; ++i) { @@ -346,36 +345,36 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("moves") << moves; } - void VisitIntConstant(HIntConstant* instruction) OVERRIDE { + void VisitIntConstant(HIntConstant* instruction) override { StartAttributeStream() << instruction->GetValue(); } - void VisitLongConstant(HLongConstant* instruction) OVERRIDE { + void VisitLongConstant(HLongConstant* instruction) override { StartAttributeStream() << instruction->GetValue(); } - void VisitFloatConstant(HFloatConstant* instruction) OVERRIDE { + void VisitFloatConstant(HFloatConstant* instruction) override { StartAttributeStream() << instruction->GetValue(); } - void VisitDoubleConstant(HDoubleConstant* instruction) OVERRIDE { + void VisitDoubleConstant(HDoubleConstant* instruction) override { StartAttributeStream() << instruction->GetValue(); } - void VisitPhi(HPhi* phi) OVERRIDE { + void VisitPhi(HPhi* phi) override { StartAttributeStream("reg") << phi->GetRegNumber(); StartAttributeStream("is_catch_phi") << std::boolalpha << phi->IsCatchPhi() << std::noboolalpha; } - void VisitMemoryBarrier(HMemoryBarrier* barrier) OVERRIDE { + void VisitMemoryBarrier(HMemoryBarrier* barrier) override { StartAttributeStream("kind") << barrier->GetBarrierKind(); } - void VisitMonitorOperation(HMonitorOperation* monitor) OVERRIDE { + void VisitMonitorOperation(HMonitorOperation* monitor) override { StartAttributeStream("kind") << (monitor->IsEnter() ? "enter" : "exit"); } - void VisitLoadClass(HLoadClass* load_class) OVERRIDE { + void VisitLoadClass(HLoadClass* load_class) override { StartAttributeStream("load_kind") << load_class->GetLoadKind(); const char* descriptor = load_class->GetDexFile().GetTypeDescriptor( load_class->GetDexFile().GetTypeId(load_class->GetTypeIndex())); @@ -386,23 +385,42 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << load_class->NeedsAccessCheck() << std::noboolalpha; } - void VisitLoadString(HLoadString* load_string) OVERRIDE { + void VisitLoadMethodHandle(HLoadMethodHandle* load_method_handle) override { + StartAttributeStream("load_kind") << "RuntimeCall"; + StartAttributeStream("method_handle_index") << load_method_handle->GetMethodHandleIndex(); + } + + void VisitLoadMethodType(HLoadMethodType* load_method_type) override { + StartAttributeStream("load_kind") << "RuntimeCall"; + const DexFile& dex_file = load_method_type->GetDexFile(); + const dex::ProtoId& proto_id = dex_file.GetProtoId(load_method_type->GetProtoIndex()); + StartAttributeStream("method_type") << dex_file.GetProtoSignature(proto_id); + } + + void VisitLoadString(HLoadString* load_string) override { StartAttributeStream("load_kind") << load_string->GetLoadKind(); } - void VisitCheckCast(HCheckCast* check_cast) OVERRIDE { - StartAttributeStream("check_kind") << check_cast->GetTypeCheckKind(); + void HandleTypeCheckInstruction(HTypeCheckInstruction* check) { + StartAttributeStream("check_kind") << check->GetTypeCheckKind(); StartAttributeStream("must_do_null_check") << std::boolalpha - << check_cast->MustDoNullCheck() << std::noboolalpha; + << check->MustDoNullCheck() << std::noboolalpha; + if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + StartAttributeStream("path_to_root") << std::hex + << "0x" << check->GetBitstringPathToRoot() << std::dec; + StartAttributeStream("mask") << std::hex << "0x" << check->GetBitstringMask() << std::dec; + } } - void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE { - StartAttributeStream("check_kind") << instance_of->GetTypeCheckKind(); - StartAttributeStream("must_do_null_check") << std::boolalpha - << instance_of->MustDoNullCheck() << std::noboolalpha; + void VisitCheckCast(HCheckCast* check_cast) override { + HandleTypeCheckInstruction(check_cast); + } + + void VisitInstanceOf(HInstanceOf* instance_of) override { + HandleTypeCheckInstruction(instance_of); } - void VisitArrayLength(HArrayLength* array_length) OVERRIDE { + void VisitArrayLength(HArrayLength* array_length) override { StartAttributeStream("is_string_length") << std::boolalpha << array_length->IsStringLength() << std::noboolalpha; if (array_length->IsEmittedAtUseSite()) { @@ -410,31 +428,31 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { } } - void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE { + void VisitBoundsCheck(HBoundsCheck* bounds_check) override { StartAttributeStream("is_string_char_at") << std::boolalpha << bounds_check->IsStringCharAt() << std::noboolalpha; } - void VisitArrayGet(HArrayGet* array_get) OVERRIDE { + void VisitArrayGet(HArrayGet* array_get) override { StartAttributeStream("is_string_char_at") << std::boolalpha << array_get->IsStringCharAt() << std::noboolalpha; } - void VisitArraySet(HArraySet* array_set) OVERRIDE { + void VisitArraySet(HArraySet* array_set) override { StartAttributeStream("value_can_be_null") << std::boolalpha << array_set->GetValueCanBeNull() << std::noboolalpha; StartAttributeStream("needs_type_check") << std::boolalpha << array_set->NeedsTypeCheck() << std::noboolalpha; } - void VisitCompare(HCompare* compare) OVERRIDE { + void VisitCompare(HCompare* compare) override { ComparisonBias bias = compare->GetBias(); StartAttributeStream("bias") << (bias == ComparisonBias::kGtBias ? "gt" : (bias == ComparisonBias::kLtBias ? "lt" : "none")); } - void VisitInvoke(HInvoke* invoke) OVERRIDE { + void VisitInvoke(HInvoke* invoke) override { StartAttributeStream("dex_file_index") << invoke->GetDexMethodIndex(); ArtMethod* method = invoke->GetResolvedMethod(); // We don't print signatures, which conflict with c1visualizer format. @@ -451,12 +469,12 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << std::noboolalpha; } - void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE { + void VisitInvokeUnresolved(HInvokeUnresolved* invoke) override { VisitInvoke(invoke); StartAttributeStream("invoke_type") << invoke->GetInvokeType(); } - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override { VisitInvoke(invoke); StartAttributeStream("method_load_kind") << invoke->GetMethodLoadKind(); StartAttributeStream("intrinsic") << invoke->GetIntrinsic(); @@ -465,96 +483,104 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { } } - void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE { + void VisitInvokeVirtual(HInvokeVirtual* invoke) override { VisitInvoke(invoke); StartAttributeStream("intrinsic") << invoke->GetIntrinsic(); } - void VisitInvokePolymorphic(HInvokePolymorphic* invoke) OVERRIDE { + void VisitInvokePolymorphic(HInvokePolymorphic* invoke) override { VisitInvoke(invoke); StartAttributeStream("invoke_type") << "InvokePolymorphic"; } - void VisitInstanceFieldGet(HInstanceFieldGet* iget) OVERRIDE { + void VisitInstanceFieldGet(HInstanceFieldGet* iget) override { StartAttributeStream("field_name") << iget->GetFieldInfo().GetDexFile().PrettyField(iget->GetFieldInfo().GetFieldIndex(), /* with type */ false); StartAttributeStream("field_type") << iget->GetFieldType(); } - void VisitInstanceFieldSet(HInstanceFieldSet* iset) OVERRIDE { + void VisitInstanceFieldSet(HInstanceFieldSet* iset) override { StartAttributeStream("field_name") << iset->GetFieldInfo().GetDexFile().PrettyField(iset->GetFieldInfo().GetFieldIndex(), /* with type */ false); StartAttributeStream("field_type") << iset->GetFieldType(); } - void VisitStaticFieldGet(HStaticFieldGet* sget) OVERRIDE { + void VisitStaticFieldGet(HStaticFieldGet* sget) override { StartAttributeStream("field_name") << sget->GetFieldInfo().GetDexFile().PrettyField(sget->GetFieldInfo().GetFieldIndex(), /* with type */ false); StartAttributeStream("field_type") << sget->GetFieldType(); } - void VisitStaticFieldSet(HStaticFieldSet* sset) OVERRIDE { + void VisitStaticFieldSet(HStaticFieldSet* sset) override { StartAttributeStream("field_name") << sset->GetFieldInfo().GetDexFile().PrettyField(sset->GetFieldInfo().GetFieldIndex(), /* with type */ false); StartAttributeStream("field_type") << sset->GetFieldType(); } - void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE { + void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) override { StartAttributeStream("field_type") << field_access->GetFieldType(); } - void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* field_access) OVERRIDE { + void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* field_access) override { StartAttributeStream("field_type") << field_access->GetFieldType(); } - void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* field_access) OVERRIDE { + void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* field_access) override { StartAttributeStream("field_type") << field_access->GetFieldType(); } - void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* field_access) OVERRIDE { + void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* field_access) override { StartAttributeStream("field_type") << field_access->GetFieldType(); } - void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE { + void VisitTryBoundary(HTryBoundary* try_boundary) override { StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit"); } - void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE { + void VisitDeoptimize(HDeoptimize* deoptimize) override { StartAttributeStream("kind") << deoptimize->GetKind(); } - void VisitVecOperation(HVecOperation* vec_operation) OVERRIDE { + void VisitVecOperation(HVecOperation* vec_operation) override { StartAttributeStream("packed_type") << vec_operation->GetPackedType(); } - void VisitVecMemoryOperation(HVecMemoryOperation* vec_mem_operation) OVERRIDE { + void VisitVecMemoryOperation(HVecMemoryOperation* vec_mem_operation) override { StartAttributeStream("alignment") << vec_mem_operation->GetAlignment().ToString(); } - void VisitVecHalvingAdd(HVecHalvingAdd* hadd) OVERRIDE { + void VisitVecHalvingAdd(HVecHalvingAdd* hadd) override { VisitVecBinaryOperation(hadd); StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha; } - void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE { + void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) override { VisitVecOperation(instruction); StartAttributeStream("kind") << instruction->GetOpKind(); } + void VisitVecDotProd(HVecDotProd* instruction) override { + VisitVecOperation(instruction); + DataType::Type arg_type = instruction->InputAt(1)->AsVecOperation()->GetPackedType(); + StartAttributeStream("type") << (instruction->IsZeroExtending() ? + DataType::ToUnsigned(arg_type) : + DataType::ToSigned(arg_type)); + } + #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64) - void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE { + void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) override { StartAttributeStream("kind") << instruction->GetOpKind(); } - void VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) OVERRIDE { + void VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) override { StartAttributeStream("kind") << instruction->GetOpKind(); } - void VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) OVERRIDE { + void VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) override { StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind(); if (HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) { StartAttributeStream("shift") << instruction->GetShiftAmount(); @@ -576,6 +602,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { } StartAttributeStream() << input_list; } + if (instruction->GetDexPc() != kNoDexPc) { + StartAttributeStream("dex_pc") << instruction->GetDexPc(); + } else { + StartAttributeStream("dex_pc") << "n/a"; + } instruction->Accept(this); if (instruction->HasEnvironment()) { StringList envs; @@ -641,20 +672,32 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha; } + // For the builder and the inliner, we want to add extra information on HInstructions + // that have reference types, and also HInstanceOf/HCheckcast. if ((IsPass(HGraphBuilder::kBuilderPassName) || IsPass(HInliner::kInlinerPassName)) - && (instruction->GetType() == DataType::Type::kReference)) { - ReferenceTypeInfo info = instruction->IsLoadClass() - ? instruction->AsLoadClass()->GetLoadedClassRTI() - : instruction->GetReferenceTypeInfo(); + && (instruction->GetType() == DataType::Type::kReference || + instruction->IsInstanceOf() || + instruction->IsCheckCast())) { + ReferenceTypeInfo info = (instruction->GetType() == DataType::Type::kReference) + ? instruction->IsLoadClass() + ? instruction->AsLoadClass()->GetLoadedClassRTI() + : instruction->GetReferenceTypeInfo() + : instruction->IsInstanceOf() + ? instruction->AsInstanceOf()->GetTargetClassRTI() + : instruction->AsCheckCast()->GetTargetClassRTI(); ScopedObjectAccess soa(Thread::Current()); if (info.IsValid()) { StartAttributeStream("klass") << mirror::Class::PrettyDescriptor(info.GetTypeHandle().Get()); - StartAttributeStream("can_be_null") - << std::boolalpha << instruction->CanBeNull() << std::noboolalpha; + if (instruction->GetType() == DataType::Type::kReference) { + StartAttributeStream("can_be_null") + << std::boolalpha << instruction->CanBeNull() << std::noboolalpha; + } StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; - } else if (instruction->IsLoadClass()) { + } else if (instruction->IsLoadClass() || + instruction->IsInstanceOf() || + instruction->IsCheckCast()) { StartAttributeStream("klass") << "unresolved"; } else { // The NullConstant may be added to the graph during other passes that happen between @@ -778,7 +821,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { Flush(); } - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + void VisitBasicBlock(HBasicBlock* block) override { StartTag("block"); PrintProperty("name", "B", block->GetBlockId()); if (block->GetLifetimeStart() != kNoLifetime) { @@ -881,8 +924,8 @@ void HGraphVisualizer::DumpGraphWithDisassembly() const { HGraphVisualizerPrinter printer(graph_, *output_, "disassembly", - /* is_after_pass */ true, - /* graph_in_bad_state */ false, + /* is_after_pass= */ true, + /* graph_in_bad_state= */ false, codegen_, codegen_.GetDisassemblyInformation()); printer.Run(); diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index f05159b735..e8460a843f 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -43,7 +43,6 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> { buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn), num_entries_(0u) { - // ArenaAllocator returns zeroed memory, so no need to set buckets to null. DCHECK(IsPowerOfTwo(num_buckets_)); std::fill_n(buckets_, num_buckets_, nullptr); buckets_owned_.SetInitialBits(num_buckets_); @@ -57,8 +56,6 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> { buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn), num_entries_(0u) { - // ArenaAllocator returns zeroed memory, so entries of buckets_ and - // buckets_owned_ are initialized to null and false, respectively. DCHECK(IsPowerOfTwo(num_buckets_)); PopulateFromInternal(other); } @@ -348,11 +345,11 @@ class GlobalValueNumberer : public ValueObject { side_effects_(side_effects), sets_(graph->GetBlocks().size(), nullptr, allocator_.Adapter(kArenaAllocGvn)), visited_blocks_( - &allocator_, graph->GetBlocks().size(), /* expandable */ false, kArenaAllocGvn) { + &allocator_, graph->GetBlocks().size(), /* expandable= */ false, kArenaAllocGvn) { visited_blocks_.ClearAllBits(); } - void Run(); + bool Run(); private: // Per-block GVN. Will also update the ValueSet of the dominated and @@ -397,7 +394,7 @@ class GlobalValueNumberer : public ValueObject { DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer); }; -void GlobalValueNumberer::Run() { +bool GlobalValueNumberer::Run() { DCHECK(side_effects_.HasRun()); sets_[graph_->GetEntryBlock()->GetBlockId()] = new (&allocator_) ValueSet(&allocator_); @@ -406,6 +403,7 @@ void GlobalValueNumberer::Run() { for (HBasicBlock* block : graph_->GetReversePostOrder()) { VisitBasicBlock(block); } + return true; } void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { @@ -478,7 +476,10 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { HInstruction* next = current->GetNext(); // Do not kill the set with the side effects of the instruction just now: if // the instruction is GVN'ed, we don't need to kill. - if (current->CanBeMoved()) { + // + // BoundType is a special case example of an instruction which shouldn't be moved but can be + // GVN'ed. + if (current->CanBeMoved() || current->IsBoundType()) { if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) { // For commutative ops, (x op y) will be treated the same as (y op x) // after fixed ordering. @@ -542,12 +543,12 @@ HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet( // that is larger, we return it if no perfectly-matching set is found. // Note that we defer testing WillBeReferencedAgain until all other criteria // have been satisfied because it might be expensive. - if (current_set->CanHoldCopyOf(reference_set, /* exact_match */ true)) { + if (current_set->CanHoldCopyOf(reference_set, /* exact_match= */ true)) { if (!WillBeReferencedAgain(current_block)) { return current_block; } } else if (secondary_match == nullptr && - current_set->CanHoldCopyOf(reference_set, /* exact_match */ false)) { + current_set->CanHoldCopyOf(reference_set, /* exact_match= */ false)) { if (!WillBeReferencedAgain(current_block)) { secondary_match = current_block; } @@ -557,9 +558,9 @@ HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet( return secondary_match; } -void GVNOptimization::Run() { +bool GVNOptimization::Run() { GlobalValueNumberer gvn(graph_, side_effects_); - gvn.Run(); + return gvn.Run(); } } // namespace art diff --git a/compiler/optimizing/gvn.h b/compiler/optimizing/gvn.h index 4fdba26ebd..bbf2265e98 100644 --- a/compiler/optimizing/gvn.h +++ b/compiler/optimizing/gvn.h @@ -31,7 +31,7 @@ class GVNOptimization : public HOptimization { const char* pass_name = kGlobalValueNumberingPassName) : HOptimization(graph, pass_name), side_effects_(side_effects) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kGlobalValueNumberingPassName = "GVN"; diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index d270c6a28e..3a10d5831d 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -243,7 +243,7 @@ HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph, const char* name) graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)) { } -void HInductionVarAnalysis::Run() { +bool HInductionVarAnalysis::Run() { // Detects sequence variables (generalized induction variables) during an outer to inner // traversal of all loops using Gerlek's algorithm. The order is important to enable // range analysis on outer loop while visiting inner loops. @@ -253,6 +253,7 @@ void HInductionVarAnalysis::Run() { VisitLoop(graph_block->GetLoopInformation()); } } + return !induction_.empty(); } void HInductionVarAnalysis::VisitLoop(HLoopInformation* loop) { @@ -1073,8 +1074,8 @@ bool HInductionVarAnalysis::IsTaken(InductionInfo* lower_expr, && lower_value >= upper_value; default: LOG(FATAL) << "CONDITION UNREACHABLE"; + UNREACHABLE(); } - return false; // not certain, may be untaken } bool HInductionVarAnalysis::IsFinite(InductionInfo* upper_expr, @@ -1098,8 +1099,8 @@ bool HInductionVarAnalysis::IsFinite(InductionInfo* upper_expr, return (IsAtLeast(upper_expr, &value) && value >= (min - stride_value)); default: LOG(FATAL) << "CONDITION UNREACHABLE"; + UNREACHABLE(); } - return false; // not certain, may be infinite } bool HInductionVarAnalysis::FitsNarrowerControl(InductionInfo* lower_expr, diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h index acad77d35f..a48aa90059 100644 --- a/compiler/optimizing/induction_var_analysis.h +++ b/compiler/optimizing/induction_var_analysis.h @@ -37,7 +37,7 @@ class HInductionVarAnalysis : public HOptimization { public: explicit HInductionVarAnalysis(HGraph* graph, const char* name = kInductionPassName); - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kInductionPassName = "induction_var_analysis"; diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 99dec11240..4c78fa8f06 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -78,22 +78,15 @@ static bool IsGEZero(HInstruction* instruction) { DCHECK(instruction != nullptr); if (instruction->IsArrayLength()) { return true; - } else if (instruction->IsInvokeStaticOrDirect()) { - switch (instruction->AsInvoke()->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - // Instruction MIN(>=0, >=0) is >= 0. - return IsGEZero(instruction->InputAt(0)) && - IsGEZero(instruction->InputAt(1)); - case Intrinsics::kMathAbsInt: - case Intrinsics::kMathAbsLong: - // Instruction ABS(>=0) is >= 0. - // NOTE: ABS(minint) = minint prevents assuming - // >= 0 without looking at the argument. - return IsGEZero(instruction->InputAt(0)); - default: - break; - } + } else if (instruction->IsMin()) { + // Instruction MIN(>=0, >=0) is >= 0. + return IsGEZero(instruction->InputAt(0)) && + IsGEZero(instruction->InputAt(1)); + } else if (instruction->IsAbs()) { + // Instruction ABS(>=0) is >= 0. + // NOTE: ABS(minint) = minint prevents assuming + // >= 0 without looking at the argument. + return IsGEZero(instruction->InputAt(0)); } int64_t value = -1; return IsInt64AndGet(instruction, &value) && value >= 0; @@ -102,21 +95,14 @@ static bool IsGEZero(HInstruction* instruction) { /** Hunts "under the hood" for a suitable instruction at the hint. */ static bool IsMaxAtHint( HInstruction* instruction, HInstruction* hint, /*out*/HInstruction** suitable) { - if (instruction->IsInvokeStaticOrDirect()) { - switch (instruction->AsInvoke()->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - // For MIN(x, y), return most suitable x or y as maximum. - return IsMaxAtHint(instruction->InputAt(0), hint, suitable) || - IsMaxAtHint(instruction->InputAt(1), hint, suitable); - default: - break; - } + if (instruction->IsMin()) { + // For MIN(x, y), return most suitable x or y as maximum. + return IsMaxAtHint(instruction->InputAt(0), hint, suitable) || + IsMaxAtHint(instruction->InputAt(1), hint, suitable); } else { *suitable = instruction; return HuntForDeclaration(instruction) == hint; } - return false; } /** Post-analysis simplification of a minimum value that makes the bound more useful to clients. */ @@ -230,13 +216,13 @@ bool InductionVarRange::GetInductionRange(HInstruction* context, chase_hint_ = chase_hint; bool in_body = context->GetBlock() != loop->GetHeader(); int64_t stride_value = 0; - *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min */ true)); - *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false), chase_hint); + *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min= */ true)); + *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min= */ false), chase_hint); *needs_finite_test = NeedsTripCount(info, &stride_value) && IsUnsafeTripCount(trip); chase_hint_ = nullptr; // Retry chasing constants for wrap-around (merge sensitive). if (!min_val->is_known && info->induction_class == HInductionVarAnalysis::kWrapAround) { - *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min */ true)); + *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min= */ true)); } return true; } @@ -365,14 +351,16 @@ void InductionVarRange::Replace(HInstruction* instruction, } } -bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const { - HInductionVarAnalysis::InductionInfo *trip = - induction_analysis_->LookupInfo(loop, GetLoopControl(loop)); - if (trip != nullptr && !IsUnsafeTripCount(trip)) { - IsConstant(trip->op_a, kExact, tc); - return true; - } - return false; +bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const { + bool is_constant_unused = false; + return CheckForFiniteAndConstantProps(loop, &is_constant_unused, trip_count); +} + +bool InductionVarRange::HasKnownTripCount(HLoopInformation* loop, + /*out*/ int64_t* trip_count) const { + bool is_constant = false; + CheckForFiniteAndConstantProps(loop, &is_constant, trip_count); + return is_constant; } bool InductionVarRange::IsUnitStride(HInstruction* context, @@ -431,6 +419,18 @@ HInstruction* InductionVarRange::GenerateTripCount(HLoopInformation* loop, // Private class methods. // +bool InductionVarRange::CheckForFiniteAndConstantProps(HLoopInformation* loop, + /*out*/ bool* is_constant, + /*out*/ int64_t* trip_count) const { + HInductionVarAnalysis::InductionInfo *trip = + induction_analysis_->LookupInfo(loop, GetLoopControl(loop)); + if (trip != nullptr && !IsUnsafeTripCount(trip)) { + *is_constant = IsConstant(trip->op_a, kExact, trip_count); + return true; + } + return false; +} + bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info, ConstantRequest request, /*out*/ int64_t* value) const { @@ -445,8 +445,8 @@ bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info, } // Try range analysis on the invariant, only accept a proper range // to avoid arithmetic wrap-around anomalies. - Value min_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ true); - Value max_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ false); + Value min_val = GetVal(info, nullptr, /* in_body= */ true, /* is_min= */ true); + Value max_val = GetVal(info, nullptr, /* in_body= */ true, /* is_min= */ false); if (IsConstantValue(min_val) && IsConstantValue(max_val) && min_val.b_constant <= max_val.b_constant) { if ((request == kExact && min_val.b_constant == max_val.b_constant) || request == kAtMost) { @@ -791,10 +791,10 @@ InductionVarRange::Value InductionVarRange::GetMul(HInductionVarAnalysis::Induct return MulRangeAndConstant(value, info1, trip, in_body, is_min); } // Interval ranges. - Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true); - Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false); - Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true); - Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false); + Value v1_min = GetVal(info1, trip, in_body, /* is_min= */ true); + Value v1_max = GetVal(info1, trip, in_body, /* is_min= */ false); + Value v2_min = GetVal(info2, trip, in_body, /* is_min= */ true); + Value v2_max = GetVal(info2, trip, in_body, /* is_min= */ false); // Positive range vs. positive or negative range. if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) { if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) { @@ -825,10 +825,10 @@ InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::Induct return DivRangeAndConstant(value, info1, trip, in_body, is_min); } // Interval ranges. - Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true); - Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false); - Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true); - Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false); + Value v1_min = GetVal(info1, trip, in_body, /* is_min= */ true); + Value v1_max = GetVal(info1, trip, in_body, /* is_min= */ false); + Value v2_min = GetVal(info2, trip, in_body, /* is_min= */ true); + Value v2_max = GetVal(info2, trip, in_body, /* is_min= */ false); // Positive range vs. positive or negative range. if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) { if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) { @@ -1019,10 +1019,10 @@ bool InductionVarRange::GenerateRangeOrLastValue(HInstruction* context, // Code generation for taken test: generate the code when requested or otherwise analyze // if code generation is feasible when taken test is needed. if (taken_test != nullptr) { - return GenerateCode(trip->op_b, nullptr, graph, block, taken_test, in_body, /* is_min */ false); + return GenerateCode(trip->op_b, nullptr, graph, block, taken_test, in_body, /* is_min= */ false); } else if (*needs_taken_test) { if (!GenerateCode( - trip->op_b, nullptr, nullptr, nullptr, nullptr, in_body, /* is_min */ false)) { + trip->op_b, nullptr, nullptr, nullptr, nullptr, in_body, /* is_min= */ false)) { return false; } } @@ -1030,9 +1030,9 @@ bool InductionVarRange::GenerateRangeOrLastValue(HInstruction* context, return // Success on lower if invariant (not set), or code can be generated. ((info->induction_class == HInductionVarAnalysis::kInvariant) || - GenerateCode(info, trip, graph, block, lower, in_body, /* is_min */ true)) && + GenerateCode(info, trip, graph, block, lower, in_body, /* is_min= */ true)) && // And success on upper. - GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false); + GenerateCode(info, trip, graph, block, upper, in_body, /* is_min= */ false); } bool InductionVarRange::GenerateLastValuePolynomial(HInductionVarAnalysis::InductionInfo* info, diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 0b980f596a..906dc6bb7b 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -161,9 +161,15 @@ class InductionVarRange { } /** - * Checks if header logic of a loop terminates. Sets trip-count tc if known. + * Checks if header logic of a loop terminates. If trip count is known sets 'trip_count' to its + * value. */ - bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const; + bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const; + + /** + * Checks if a trip count is known for the loop and sets 'trip_count' to its value in this case. + */ + bool HasKnownTripCount(HLoopInformation* loop, /*out*/ int64_t* trip_count) const; /** * Checks if the given instruction is a unit stride induction inside the closest enveloping @@ -194,6 +200,14 @@ class InductionVarRange { }; /** + * Checks if header logic of a loop terminates. If trip count is known (constant) sets + * 'is_constant' to true and 'trip_count' to the trip count value. + */ + bool CheckForFiniteAndConstantProps(HLoopInformation* loop, + /*out*/ bool* is_constant, + /*out*/ int64_t* trip_count) const; + + /** * Returns true if exact or upper/lower bound on the given induction * information is known as a 64-bit constant, which is returned in value. */ diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index e5bc6ef22c..f6af384af0 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -252,24 +252,24 @@ class InductionVarRangeTest : public OptimizingUnitTest { Value GetMin(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* trip) { - return range_.GetVal(info, trip, /* in_body */ true, /* is_min */ true); + return range_.GetVal(info, trip, /* in_body= */ true, /* is_min= */ true); } Value GetMax(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* trip) { - return range_.GetVal(info, trip, /* in_body */ true, /* is_min */ false); + return range_.GetVal(info, trip, /* in_body= */ true, /* is_min= */ false); } Value GetMul(HInductionVarAnalysis::InductionInfo* info1, HInductionVarAnalysis::InductionInfo* info2, bool is_min) { - return range_.GetMul(info1, info2, nullptr, /* in_body */ true, is_min); + return range_.GetMul(info1, info2, nullptr, /* in_body= */ true, is_min); } Value GetDiv(HInductionVarAnalysis::InductionInfo* info1, HInductionVarAnalysis::InductionInfo* info2, bool is_min) { - return range_.GetDiv(info1, info2, nullptr, /* in_body */ true, is_min); + return range_.GetDiv(info1, info2, nullptr, /* in_body= */ true, is_min); } Value GetRem(HInductionVarAnalysis::InductionInfo* info1, @@ -701,7 +701,11 @@ TEST_F(InductionVarRangeTest, MaxValue) { TEST_F(InductionVarRangeTest, ArrayLengthAndHints) { // We pass a bogus constant for the class to avoid mocking one. - HInstruction* new_array = new (GetAllocator()) HNewArray(x_, x_, 0); + HInstruction* new_array = new (GetAllocator()) HNewArray( + /* cls= */ x_, + /* length= */ x_, + /* dex_pc= */ 0, + /* component_size_shift= */ 0); entry_block_->AddInstruction(new_array); HInstruction* array_length = new (GetAllocator()) HArrayLength(new_array, 0); entry_block_->AddInstruction(array_length); diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 4fc7262265..205077fb49 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -18,15 +18,16 @@ #include "art_method-inl.h" #include "base/enums.h" +#include "base/logging.h" #include "builder.h" #include "class_linker.h" +#include "class_root.h" #include "constant_folding.h" #include "data_type-inl.h" #include "dead_code_elimination.h" #include "dex/inline_method_analyser.h" #include "dex/verification_results.h" #include "dex/verified_method.h" -#include "driver/compiler_driver-inl.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "instruction_simplifier.h" @@ -35,8 +36,9 @@ #include "jit/jit_code_cache.h" #include "mirror/class_loader.h" #include "mirror/dex_cache.h" +#include "mirror/object_array-alloc-inl.h" +#include "mirror/object_array-inl.h" #include "nodes.h" -#include "optimizing_compiler.h" #include "reference_type_propagation.h" #include "register_allocator_linear_scan.h" #include "scoped_thread_state_change-inl.h" @@ -124,13 +126,18 @@ void HInliner::UpdateInliningBudget() { } } -void HInliner::Run() { - if (graph_->IsDebuggable()) { +bool HInliner::Run() { + if (codegen_->GetCompilerOptions().GetInlineMaxCodeUnits() == 0) { + // Inlining effectively disabled. + return false; + } else if (graph_->IsDebuggable()) { // For simplicity, we currently never inline when the graph is debuggable. This avoids // doing some logic in the runtime to discover if a method could have been inlined. - return; + return false; } + bool didInline = false; + // Initialize the number of instructions for the method being compiled. Recursive calls // to HInliner::Run have already updated the instruction count. if (outermost_graph_ == graph_) { @@ -143,14 +150,15 @@ void HInliner::Run() { // If we're compiling with a core image (which is only used for // test purposes), honor inlining directives in method names: - // - if a method's name contains the substring "$inline$", ensure - // that this method is actually inlined; // - if a method's name contains the substring "$noinline$", do not - // inline that method. - // We limit this to AOT compilation, as the JIT may or may not inline + // inline that method; + // - if a method's name contains the substring "$inline$", ensure + // that this method is actually inlined. + // We limit the latter to AOT compilation, as the JIT may or may not inline // depending on the state of classes at runtime. - const bool honor_inlining_directives = - IsCompilingWithCoreImage() && Runtime::Current()->IsAotCompiler(); + const bool honor_noinline_directives = codegen_->GetCompilerOptions().CompilingWithCoreImage(); + const bool honor_inline_directives = + honor_noinline_directives && Runtime::Current()->IsAotCompiler(); // Keep a copy of all blocks when starting the visit. ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder(); @@ -164,25 +172,32 @@ void HInliner::Run() { HInvoke* call = instruction->AsInvoke(); // As long as the call is not intrinsified, it is worth trying to inline. if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) { - if (honor_inlining_directives) { + if (honor_noinline_directives) { // Debugging case: directives in method names control or assert on inlining. std::string callee_name = outer_compilation_unit_.GetDexFile()->PrettyMethod( - call->GetDexMethodIndex(), /* with_signature */ false); + call->GetDexMethodIndex(), /* with_signature= */ false); // Tests prevent inlining by having $noinline$ in their method names. if (callee_name.find("$noinline$") == std::string::npos) { - if (!TryInline(call)) { + if (TryInline(call)) { + didInline = true; + } else if (honor_inline_directives) { bool should_have_inlined = (callee_name.find("$inline$") != std::string::npos); CHECK(!should_have_inlined) << "Could not inline " << callee_name; } } } else { + DCHECK(!honor_inline_directives); // Normal case: try to inline. - TryInline(call); + if (TryInline(call)) { + didInline = true; + } } } instruction = next; } } + + return didInline; } static bool IsMethodOrDeclaringClassFinal(ArtMethod* method) @@ -274,7 +289,7 @@ static uint32_t FindMethodIndexIn(ArtMethod* method, } } -static dex::TypeIndex FindClassIndexIn(mirror::Class* cls, +static dex::TypeIndex FindClassIndexIn(ObjPtr<mirror::Class> cls, const DexCompilationUnit& compilation_unit) REQUIRES_SHARED(Locks::mutator_lock_) { const DexFile& dex_file = *compilation_unit.GetDexFile(); @@ -353,7 +368,7 @@ HInliner::InlineCacheType HInliner::GetInlineCacheType( } } -static mirror::Class* GetMonomorphicType(Handle<mirror::ObjectArray<mirror::Class>> classes) +static ObjPtr<mirror::Class> GetMonomorphicType(Handle<mirror::ObjectArray<mirror::Class>> classes) REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(classes->Get(0) != nullptr); return classes->Get(0); @@ -367,6 +382,11 @@ ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) { // No CHA-based devirtulization for AOT compiler (yet). return nullptr; } + if (Runtime::Current()->IsZygote()) { + // No CHA-based devirtulization for Zygote, as it compiles with + // offline information. + return nullptr; + } if (outermost_graph_->IsCompilingOsr()) { // We do not support HDeoptimize in OSR methods. return nullptr; @@ -392,7 +412,7 @@ ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) { return single_impl; } -static bool IsMethodUnverified(CompilerDriver* const compiler_driver, ArtMethod* method) +static bool IsMethodUnverified(const CompilerOptions& compiler_options, ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) { if (!method->GetDeclaringClass()->IsVerified()) { if (Runtime::Current()->UseJitCompilation()) { @@ -401,8 +421,9 @@ static bool IsMethodUnverified(CompilerDriver* const compiler_driver, ArtMethod* return true; } uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex(); - if (!compiler_driver->IsMethodVerifiedWithoutFailures( - method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) { + if (!compiler_options.IsMethodVerifiedWithoutFailures(method->GetDexMethodIndex(), + class_def_idx, + *method->GetDexFile())) { // Method has soft or hard failures, don't analyze. return true; } @@ -410,11 +431,11 @@ static bool IsMethodUnverified(CompilerDriver* const compiler_driver, ArtMethod* return false; } -static bool AlwaysThrows(CompilerDriver* const compiler_driver, ArtMethod* method) +static bool AlwaysThrows(const CompilerOptions& compiler_options, ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(method != nullptr); // Skip non-compilable and unverified methods. - if (!method->IsCompilable() || IsMethodUnverified(compiler_driver, method)) { + if (!method->IsCompilable() || IsMethodUnverified(compiler_options, method)) { return false; } // Skip native methods, methods with try blocks, and methods that are too large. @@ -446,9 +467,10 @@ static bool AlwaysThrows(CompilerDriver* const compiler_driver, ArtMethod* metho bool HInliner::TryInline(HInvoke* invoke_instruction) { if (invoke_instruction->IsInvokeUnresolved() || - invoke_instruction->IsInvokePolymorphic()) { - return false; // Don't bother to move further if we know the method is unresolved or an - // invoke-polymorphic. + invoke_instruction->IsInvokePolymorphic() || + invoke_instruction->IsInvokeCustom()) { + return false; // Don't bother to move further if we know the method is unresolved or the + // invocation is polymorphic (invoke-{polymorphic,custom}). } ScopedObjectAccess soa(Thread::Current()); @@ -487,7 +509,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { bool result = TryInlineAndReplace(invoke_instruction, actual_method, ReferenceTypeInfo::CreateInvalid(), - /* do_rtp */ true, + /* do_rtp= */ true, cha_devirtualize); if (result) { // Successfully inlined. @@ -501,7 +523,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface); } } - } else if (!cha_devirtualize && AlwaysThrows(compiler_driver_, actual_method)) { + } else if (!cha_devirtualize && AlwaysThrows(codegen_->GetCompilerOptions(), actual_method)) { // Set always throws property for non-inlined method call with single target // (unless it was obtained through CHA, because that would imply we have // to add the CHA dependency, which seems not worth it). @@ -524,7 +546,7 @@ static Handle<mirror::ObjectArray<mirror::Class>> AllocateInlineCacheHolder( Handle<mirror::ObjectArray<mirror::Class>> inline_cache = hs->NewHandle( mirror::ObjectArray<mirror::Class>::Alloc( self, - class_linker->GetClassRoot(ClassLinker::kClassArrayClass), + GetClassRoot<mirror::ObjectArray<mirror::Class>>(class_linker), InlineCache::kIndividualCacheSize)); if (inline_cache == nullptr) { // We got an OOME. Just clear the exception, and don't inline. @@ -572,9 +594,12 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file, StackHandleScope<1> hs(Thread::Current()); Handle<mirror::ObjectArray<mirror::Class>> inline_cache; - InlineCacheType inline_cache_type = Runtime::Current()->IsAotCompiler() - ? GetInlineCacheAOT(caller_dex_file, invoke_instruction, &hs, &inline_cache) - : GetInlineCacheJIT(invoke_instruction, &hs, &inline_cache); + // The Zygote JIT compiles based on a profile, so we shouldn't use runtime inline caches + // for it. + InlineCacheType inline_cache_type = + (Runtime::Current()->IsAotCompiler() || Runtime::Current()->IsZygote()) + ? GetInlineCacheAOT(caller_dex_file, invoke_instruction, &hs, &inline_cache) + : GetInlineCacheJIT(invoke_instruction, &hs, &inline_cache); switch (inline_cache_type) { case kInlineCacheNoData: { @@ -662,8 +687,7 @@ HInliner::InlineCacheType HInliner::GetInlineCacheAOT( StackHandleScope<1>* hs, /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache) REQUIRES_SHARED(Locks::mutator_lock_) { - DCHECK(Runtime::Current()->IsAotCompiler()); - const ProfileCompilationInfo* pci = compiler_driver_->GetProfileCompilationInfo(); + const ProfileCompilationInfo* pci = codegen_->GetCompilerOptions().GetProfileCompilationInfo(); if (pci == nullptr) { return kInlineCacheNoData; } @@ -716,7 +740,7 @@ HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile( offline_profile.dex_references.size()); for (size_t i = 0; i < offline_profile.dex_references.size(); i++) { bool found = false; - for (const DexFile* dex_file : compiler_driver_->GetDexFilesForOatFile()) { + for (const DexFile* dex_file : codegen_->GetCompilerOptions().GetDexFilesForOatFile()) { if (offline_profile.dex_references[i].MatchesDex(dex_file)) { dex_profile_index_to_dex_cache[i] = caller_compilation_unit_.GetClassLinker()->FindDexCache(self, *dex_file); @@ -764,7 +788,7 @@ HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile( HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker, HInstruction* receiver, uint32_t dex_pc) const { - ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0); + ArtField* field = GetClassRoot<mirror::Object>(class_linker)->GetInstanceField(0); DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); HInstanceFieldGet* result = new (graph_->GetAllocator()) HInstanceFieldGet( receiver, @@ -841,9 +865,9 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); if (!TryInlineAndReplace(invoke_instruction, resolved_method, - ReferenceTypeInfo::Create(monomorphic_type, /* is_exact */ true), - /* do_rtp */ false, - /* cha_devirtualize */ false)) { + ReferenceTypeInfo::Create(monomorphic_type, /* is_exact= */ true), + /* do_rtp= */ false, + /* cha_devirtualize= */ false)) { return false; } @@ -854,7 +878,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, class_index, monomorphic_type, invoke_instruction, - /* with_deoptimization */ true); + /* with_deoptimization= */ true); // Run type propagation to get the guard typed, and eventually propagate the // type of the receiver. @@ -862,7 +886,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), handles_, - /* is_first_run */ false); + /* is_first_run= */ false); rtp_fixup.Run(); MaybeRecordStat(stats_, MethodCompilationStat::kInlinedMonomorphicCall); @@ -932,9 +956,9 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, klass, is_referrer, invoke_instruction->GetDexPc(), - /* needs_access_check */ false); + /* needs_access_check= */ false); HLoadClass::LoadKind kind = HSharpening::ComputeLoadClassKind( - load_class, codegen_, compiler_driver_, caller_compilation_unit_); + load_class, codegen_, caller_compilation_unit_); DCHECK(kind != HLoadClass::LoadKind::kInvalid) << "We should always be able to reference a class for inline caches"; // Load kind must be set before inserting the instruction into the graph. @@ -1010,7 +1034,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, if (!class_index.IsValid() || !TryBuildAndInline(invoke_instruction, method, - ReferenceTypeInfo::Create(handle, /* is_exact */ true), + ReferenceTypeInfo::Create(handle, /* is_exact= */ true), &return_replacement)) { all_targets_inlined = false; } else { @@ -1062,7 +1086,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), handles_, - /* is_first_run */ false); + /* is_first_run= */ false); rtp_fixup.Run(); return true; } @@ -1133,14 +1157,14 @@ void HInliner::CreateDiamondPatternForPolymorphicInline(HInstruction* compare, graph_->UpdateLoopAndTryInformationOfNewBlock( - then, original_invoke_block, /* replace_if_back_edge */ false); + then, original_invoke_block, /* replace_if_back_edge= */ false); graph_->UpdateLoopAndTryInformationOfNewBlock( - otherwise, original_invoke_block, /* replace_if_back_edge */ false); + otherwise, original_invoke_block, /* replace_if_back_edge= */ false); // In case the original invoke location was a back edge, we need to update // the loop to now have the merge block as a back edge. graph_->UpdateLoopAndTryInformationOfNewBlock( - merge, original_invoke_block, /* replace_if_back_edge */ true); + merge, original_invoke_block, /* replace_if_back_edge= */ true); } bool HInliner::TryInlinePolymorphicCallToSameTarget( @@ -1258,7 +1282,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), handles_, - /* is_first_run */ false); + /* is_first_run= */ false); rtp_fixup.Run(); MaybeRecordStat(stats_, MethodCompilationStat::kInlinedPolymorphicCall); @@ -1281,9 +1305,7 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, // If invoke_instruction is devirtualized to a different method, give intrinsics // another chance before we try to inline it. - bool wrong_invoke_type = false; - if (invoke_instruction->GetResolvedMethod() != method && - IntrinsicsRecognizer::Recognize(invoke_instruction, method, &wrong_invoke_type)) { + if (invoke_instruction->GetResolvedMethod() != method && method->IsIntrinsic()) { MaybeRecordStat(stats_, MethodCompilationStat::kIntrinsicRecognized); if (invoke_instruction->IsInvokeInterface()) { // We don't intrinsify an invoke-interface directly. @@ -1296,6 +1318,7 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, invoke_instruction->GetDexMethodIndex(), // Use interface method's dex method index. method, method->GetMethodIndex()); + DCHECK_NE(new_invoke->GetIntrinsic(), Intrinsics::kNone); HInputsRef inputs = invoke_instruction->GetInputs(); for (size_t index = 0; index != inputs.size(); ++index) { new_invoke->SetArgumentAt(index, inputs[index]); @@ -1305,14 +1328,11 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, if (invoke_instruction->GetType() == DataType::Type::kReference) { new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo()); } - // Run intrinsic recognizer again to set new_invoke's intrinsic. - IntrinsicsRecognizer::Recognize(new_invoke, method, &wrong_invoke_type); - DCHECK_NE(new_invoke->GetIntrinsic(), Intrinsics::kNone); return_replacement = new_invoke; // invoke_instruction is replaced with new_invoke. should_remove_invoke_instruction = true; } else { - // invoke_instruction is intrinsified and stays. + invoke_instruction->SetResolvedMethod(method); } } else if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) { if (invoke_instruction->IsInvokeInterface()) { @@ -1386,7 +1406,7 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), handles_, - /* is_first_run */ false).Run(); + /* is_first_run= */ false).Run(); } return true; } @@ -1403,6 +1423,18 @@ size_t HInliner::CountRecursiveCallsOf(ArtMethod* method) const { return count; } +static inline bool MayInline(const CompilerOptions& compiler_options, + const DexFile& inlined_from, + const DexFile& inlined_into) { + // We're not allowed to inline across dex files if we're the no-inline-from dex file. + if (!IsSameDexFile(inlined_from, inlined_into) && + ContainsElement(compiler_options.GetNoInlineFromDexFile(), &inlined_from)) { + return false; + } + + return true; +} + bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, ArtMethod* method, ReferenceTypeInfo receiver_type, @@ -1424,8 +1456,9 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, // Check whether we're allowed to inline. The outermost compilation unit is the relevant // dex file here (though the transitivity of an inline chain would allow checking the calller). - if (!compiler_driver_->MayInline(method->GetDexFile(), - outer_compilation_unit_.GetDexFile())) { + if (!MayInline(codegen_->GetCompilerOptions(), + *method->GetDexFile(), + *outer_compilation_unit_.GetDexFile())) { if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) { LOG_SUCCESS() << "Successfully replaced pattern of invoke " << method->PrettyMethod(); @@ -1450,7 +1483,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } - size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits(); + size_t inline_max_code_units = codegen_->GetCompilerOptions().GetInlineMaxCodeUnits(); if (accessor.InsnsSizeInCodeUnits() > inline_max_code_units) { LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedCodeItem) << "Method " << method->PrettyMethod() @@ -1474,7 +1507,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } - if (IsMethodUnverified(compiler_driver_, method)) { + if (IsMethodUnverified(codegen_->GetCompilerOptions(), method)) { LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNotVerified) << "Method " << method->PrettyMethod() << " couldn't be verified, so it cannot be inlined"; @@ -1599,7 +1632,8 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, [](uint16_t index) { return index != DexFile::kDexNoIndex16; })); // Create HInstanceFieldSet for each IPUT that stores non-zero data. - HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, /* this */ 0u); + HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, + /* arg_vreg_index= */ 0u); bool needs_constructor_barrier = false; for (size_t i = 0; i != number_of_iputs; ++i) { HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, iput_args[i]); @@ -1617,7 +1651,7 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, } } if (needs_constructor_barrier) { - // See CompilerDriver::RequiresConstructorBarrier for more details. + // See DexCompilationUnit::RequiresConstructorBarrier for more details. DCHECK(obj != nullptr) << "only non-static methods can have a constructor fence"; HConstructorFence* constructor_fence = @@ -1641,7 +1675,7 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index, REQUIRES_SHARED(Locks::mutator_lock_) { ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); ArtField* resolved_field = - class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false); + class_linker->LookupResolvedField(field_index, referrer, /* is_static= */ false); DCHECK(resolved_field != nullptr); HInstanceFieldGet* iget = new (graph_->GetAllocator()) HInstanceFieldGet( obj, @@ -1654,7 +1688,7 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index, *referrer->GetDexFile(), // Read barrier generates a runtime call in slow path and we need a valid // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537. - /* dex_pc */ 0); + /* dex_pc= */ 0); if (iget->GetType() == DataType::Type::kReference) { // Use the same dex_cache that we used for field lookup as the hint_dex_cache. Handle<mirror::DexCache> dex_cache = handles_->NewHandle(referrer->GetDexCache()); @@ -1662,7 +1696,7 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index, outer_compilation_unit_.GetClassLoader(), dex_cache, handles_, - /* is_first_run */ false); + /* is_first_run= */ false); rtp.Visit(iget); } return iget; @@ -1676,7 +1710,7 @@ HInstanceFieldSet* HInliner::CreateInstanceFieldSet(uint32_t field_index, REQUIRES_SHARED(Locks::mutator_lock_) { ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); ArtField* resolved_field = - class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false); + class_linker->LookupResolvedField(field_index, referrer, /* is_static= */ false); DCHECK(resolved_field != nullptr); if (is_final != nullptr) { // This information is needed only for constructors. @@ -1695,18 +1729,33 @@ HInstanceFieldSet* HInliner::CreateInstanceFieldSet(uint32_t field_index, *referrer->GetDexFile(), // Read barrier generates a runtime call in slow path and we need a valid // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537. - /* dex_pc */ 0); + /* dex_pc= */ 0); return iput; } template <typename T> -static inline Handle<T> NewHandleIfDifferent(T* object, +static inline Handle<T> NewHandleIfDifferent(ObjPtr<T> object, Handle<T> hint, VariableSizedHandleScope* handles) REQUIRES_SHARED(Locks::mutator_lock_) { return (object != hint.Get()) ? handles->NewHandle(object) : hint; } +static bool CanEncodeInlinedMethodInStackMap(const DexFile& caller_dex_file, ArtMethod* callee) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (!Runtime::Current()->IsAotCompiler()) { + // JIT can always encode methods in stack maps. + return true; + } + if (IsSameDexFile(caller_dex_file, *callee->GetDexFile())) { + return true; + } + // TODO(ngeoffray): Support more AOT cases for inlining: + // - methods in multidex + // - methods in boot image for on-device non-PIC compilation. + return false; +} + bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, ArtMethod* resolved_method, ReferenceTypeInfo receiver_type, @@ -1714,7 +1763,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, HInstruction** return_replacement) { DCHECK(!(resolved_method->IsStatic() && receiver_type.IsValid())); ScopedObjectAccess soa(Thread::Current()); - const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); + const dex::CodeItem* code_item = resolved_method->GetCodeItem(); const DexFile& callee_dex_file = *resolved_method->GetDexFile(); uint32_t method_index = resolved_method->GetDexMethodIndex(); CodeItemDebugInfoAccessor code_item_accessor(resolved_method->DexInstructionDebugInfo()); @@ -1727,6 +1776,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, caller_compilation_unit_.GetClassLoader(), handles_); + Handle<mirror::Class> compiling_class = handles_->NewHandle(resolved_method->GetDeclaringClass()); DexCompilationUnit dex_compilation_unit( class_loader, class_linker, @@ -1735,8 +1785,9 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, resolved_method->GetDeclaringClass()->GetDexClassDefIndex(), method_index, resolved_method->GetAccessFlags(), - /* verified_method */ nullptr, - dex_cache); + /* verified_method= */ nullptr, + dex_cache, + compiling_class); InvokeType invoke_type = invoke_instruction->GetInvokeType(); if (invoke_type == kInterface) { @@ -1745,16 +1796,25 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, invoke_type = kVirtual; } + bool caller_dead_reference_safe = graph_->IsDeadReferenceSafe(); + const dex::ClassDef& callee_class = resolved_method->GetClassDef(); + // MethodContainsRSensitiveAccess is currently slow, but HasDeadReferenceSafeAnnotation() + // is currently rarely true. + bool callee_dead_reference_safe = + annotations::HasDeadReferenceSafeAnnotation(callee_dex_file, callee_class) + && !annotations::MethodContainsRSensitiveAccess(callee_dex_file, callee_class, method_index); + const int32_t caller_instruction_counter = graph_->GetCurrentInstructionId(); HGraph* callee_graph = new (graph_->GetAllocator()) HGraph( graph_->GetAllocator(), graph_->GetArenaStack(), callee_dex_file, method_index, - compiler_driver_->GetInstructionSet(), + codegen_->GetCompilerOptions().GetInstructionSet(), invoke_type, + callee_dead_reference_safe, graph_->IsDebuggable(), - /* osr */ false, + /* osr= */ false, caller_instruction_counter); callee_graph->SetArtMethod(resolved_method); @@ -1775,7 +1835,6 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, code_item_accessor, &dex_compilation_unit, &outer_compilation_unit_, - compiler_driver_, codegen_, inline_stats_, resolved_method->GetQuickenedInfo(), @@ -1788,8 +1847,8 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, return false; } - if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph, - compiler_driver_->GetInstructionSet())) { + if (!RegisterAllocator::CanAllocateRegistersFor( + *callee_graph, codegen_->GetCompilerOptions().GetInstructionSet())) { LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedRegisterAllocator) << "Method " << callee_dex_file.PrettyMethod(method_index) << " cannot be inlined because of the register allocator"; @@ -1836,7 +1895,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, outer_compilation_unit_.GetClassLoader(), dex_compilation_unit.GetDexCache(), handles_, - /* is_first_run */ false).Run(); + /* is_first_run= */ false).Run(); } RunOptimizations(callee_graph, code_item, dex_compilation_unit); @@ -1980,23 +2039,26 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, inline_stats_->AddTo(stats_); } + if (caller_dead_reference_safe && !callee_dead_reference_safe) { + // Caller was dead reference safe, but is not anymore, since we inlined dead + // reference unsafe code. Prior transformations remain valid, since they did not + // affect the inlined code. + graph_->MarkDeadReferenceUnsafe(); + } + return true; } void HInliner::RunOptimizations(HGraph* callee_graph, - const DexFile::CodeItem* code_item, + const dex::CodeItem* code_item, const DexCompilationUnit& dex_compilation_unit) { // Note: if the outermost_graph_ is being compiled OSR, we should not run any // optimization that could lead to a HDeoptimize. The following optimizations do not. HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner"); HConstantFolding fold(callee_graph, "constant_folding$inliner"); - HSharpening sharpening(callee_graph, codegen_, compiler_driver_); - InstructionSimplifier simplify(callee_graph, codegen_, compiler_driver_, inline_stats_); - IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_); + InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_); HOptimization* optimizations[] = { - &intrinsics, - &sharpening, &simplify, &fold, &dce, @@ -2031,7 +2093,6 @@ void HInliner::RunOptimizations(HGraph* callee_graph, codegen_, outer_compilation_unit_, dex_compilation_unit, - compiler_driver_, handles_, inline_stats_, total_number_of_dex_registers_ + accessor.RegistersSize(), @@ -2065,7 +2126,7 @@ bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* // is more specific than the class which declares the method. if (!resolved_method->IsStatic()) { if (IsReferenceTypeRefinement(GetClassRTI(resolved_method->GetDeclaringClass()), - /* declared_can_be_null */ false, + /* declared_can_be_null= */ false, invoke_instruction->InputAt(0u))) { return true; } @@ -2074,7 +2135,7 @@ bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* // Iterate over the list of parameter types and test whether any of the // actual inputs has a more specific reference type than the type declared in // the signature. - const DexFile::TypeList* param_list = resolved_method->GetParameterTypeList(); + const dex::TypeList* param_list = resolved_method->GetParameterTypeList(); for (size_t param_idx = 0, input_idx = resolved_method->IsStatic() ? 0 : 1, e = (param_list == nullptr ? 0 : param_list->Size()); @@ -2085,7 +2146,7 @@ bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* ObjPtr<mirror::Class> param_cls = resolved_method->LookupResolvedClassFromTypeIndex( param_list->GetTypeItem(param_idx).type_idx_); if (IsReferenceTypeRefinement(GetClassRTI(param_cls), - /* declared_can_be_null */ true, + /* declared_can_be_null= */ true, input)) { return true; } @@ -2102,14 +2163,13 @@ bool HInliner::ReturnTypeMoreSpecific(HInvoke* invoke_instruction, if (return_replacement->GetType() == DataType::Type::kReference) { // Test if the return type is a refinement of the declared return type. if (IsReferenceTypeRefinement(invoke_instruction->GetReferenceTypeInfo(), - /* declared_can_be_null */ true, + /* declared_can_be_null= */ true, return_replacement)) { return true; } else if (return_replacement->IsInstanceFieldGet()) { HInstanceFieldGet* field_get = return_replacement->AsInstanceFieldGet(); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); if (field_get->GetFieldInfo().GetField() == - class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0)) { + GetClassRoot<mirror::Object>()->GetInstanceField(0)) { return true; } } diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 02465d37ba..15d7349694 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -19,13 +19,12 @@ #include "dex/dex_file_types.h" #include "dex/invoke_type.h" -#include "jit/profile_compilation_info.h" #include "optimization.h" +#include "profile/profile_compilation_info.h" namespace art { class CodeGenerator; -class CompilerDriver; class DexCompilationUnit; class HGraph; class HInvoke; @@ -38,7 +37,6 @@ class HInliner : public HOptimization { CodeGenerator* codegen, const DexCompilationUnit& outer_compilation_unit, const DexCompilationUnit& caller_compilation_unit, - CompilerDriver* compiler_driver, VariableSizedHandleScope* handles, OptimizingCompilerStats* stats, size_t total_number_of_dex_registers, @@ -51,7 +49,6 @@ class HInliner : public HOptimization { outer_compilation_unit_(outer_compilation_unit), caller_compilation_unit_(caller_compilation_unit), codegen_(codegen), - compiler_driver_(compiler_driver), total_number_of_dex_registers_(total_number_of_dex_registers), total_number_of_instructions_(total_number_of_instructions), parent_(parent), @@ -60,7 +57,7 @@ class HInliner : public HOptimization { handles_(handles), inline_stats_(nullptr) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kInlinerPassName = "inliner"; @@ -101,7 +98,7 @@ class HInliner : public HOptimization { // Run simple optimizations on `callee_graph`. void RunOptimizations(HGraph* callee_graph, - const DexFile::CodeItem* code_item, + const dex::CodeItem* code_item, const DexCompilationUnit& dex_compilation_unit) REQUIRES_SHARED(Locks::mutator_lock_); @@ -280,7 +277,6 @@ class HInliner : public HOptimization { const DexCompilationUnit& outer_compilation_unit_; const DexCompilationUnit& caller_compilation_unit_; CodeGenerator* const codegen_; - CompilerDriver* const compiler_driver_; const size_t total_number_of_dex_registers_; size_t total_number_of_instructions_; diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index c7aef3779d..5e7b57523f 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -19,12 +19,13 @@ #include "art_method-inl.h" #include "base/arena_bit_vector.h" #include "base/bit_vector-inl.h" +#include "base/logging.h" #include "block_builder.h" -#include "class_linker.h" +#include "class_linker-inl.h" +#include "code_generator.h" #include "data_type-inl.h" #include "dex/bytecode_utils.h" #include "dex/dex_instruction-inl.h" -#include "driver/compiler_driver-inl.h" #include "driver/dex_compilation_unit.h" #include "driver/compiler_options.h" #include "imtable-inl.h" @@ -47,7 +48,6 @@ HInstructionBuilder::HInstructionBuilder(HGraph* graph, DataType::Type return_type, const DexCompilationUnit* dex_compilation_unit, const DexCompilationUnit* outer_compilation_unit, - CompilerDriver* compiler_driver, CodeGenerator* code_generator, ArrayRef<const uint8_t> interpreter_metadata, OptimizingCompilerStats* compiler_stats, @@ -61,7 +61,6 @@ HInstructionBuilder::HInstructionBuilder(HGraph* graph, return_type_(return_type), block_builder_(block_builder), ssa_builder_(ssa_builder), - compiler_driver_(compiler_driver), code_generator_(code_generator), dex_compilation_unit_(dex_compilation_unit), outer_compilation_unit_(outer_compilation_unit), @@ -73,7 +72,8 @@ HInstructionBuilder::HInstructionBuilder(HGraph* graph, current_locals_(nullptr), latest_result_(nullptr), current_this_parameter_(nullptr), - loop_headers_(local_allocator->Adapter(kArenaAllocGraphBuilder)) { + loop_headers_(local_allocator->Adapter(kArenaAllocGraphBuilder)), + class_cache_(std::less<dex::TypeIndex>(), local_allocator->Adapter(kArenaAllocGraphBuilder)) { loop_headers_.reserve(kDefaultNumberOfLoops); } @@ -319,8 +319,8 @@ bool HInstructionBuilder::Build() { // Find locations where we want to generate extra stackmaps for native debugging. // This allows us to generate the info only at interesting points (for example, // at start of java statement) rather than before every dex instruction. - const bool native_debuggable = compiler_driver_ != nullptr && - compiler_driver_->GetCompilerOptions().GetNativeDebuggable(); + const bool native_debuggable = code_generator_ != nullptr && + code_generator_->GetCompilerOptions().GetNativeDebuggable(); ArenaBitVector* native_debug_info_locations = nullptr; if (native_debuggable) { native_debug_info_locations = FindNativeDebugInfoLocations(); @@ -434,7 +434,7 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) { HInvokeStaticOrDirect::DispatchInfo dispatch_info = { HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall, HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - /* method_load_data */ 0u + /* method_load_data= */ 0u }; InvokeType invoke_type = dex_compilation_unit_->IsStatic() ? kStatic : kDirect; HInvokeStaticOrDirect* invoke = new (allocator_) HInvokeStaticOrDirect( @@ -448,14 +448,8 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) { invoke_type, target_method, HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); - HandleInvoke(invoke, - in_vregs, - /* args */ nullptr, - graph_->GetNumberOfVRegs() - in_vregs, - /* is_range */ true, - dex_file_->GetMethodShorty(method_idx), - /* clinit_check */ nullptr, - /* is_unresolved */ false); + RangeInstructionOperands operands(graph_->GetNumberOfVRegs() - in_vregs, in_vregs); + HandleInvoke(invoke, operands, dex_file_->GetMethodShorty(method_idx), /* is_unresolved= */ false); // Add the return instruction. if (return_type_ == DataType::Type::kVoid) { @@ -472,22 +466,17 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) { } ArenaBitVector* HInstructionBuilder::FindNativeDebugInfoLocations() { - // The callback gets called when the line number changes. - // In other words, it marks the start of new java statement. - struct Callback { - static bool Position(void* ctx, const DexFile::PositionInfo& entry) { - static_cast<ArenaBitVector*>(ctx)->SetBit(entry.address_); - return false; - } - }; ArenaBitVector* locations = ArenaBitVector::Create(local_allocator_, code_item_accessor_.InsnsSizeInCodeUnits(), - /* expandable */ false, + /* expandable= */ false, kArenaAllocGraphBuilder); locations->ClearAllBits(); - dex_file_->DecodeDebugPositionInfo(code_item_accessor_.DebugInfoOffset(), - Callback::Position, - locations); + // The visitor gets called when the line number changes. + // In other words, it marks the start of new java statement. + code_item_accessor_.DecodeDebugPositionInfo([&](const DexFile::PositionInfo& entry) { + locations->SetBit(entry.address_); + return false; + }); // Instruction-specific tweaks. for (const DexInstructionPcPair& inst : code_item_accessor_) { switch (inst->Opcode()) { @@ -570,7 +559,7 @@ void HInstructionBuilder::InitializeParameters() { uint16_t locals_index = graph_->GetNumberOfLocalVRegs(); uint16_t parameter_index = 0; - const DexFile::MethodId& referrer_method_id = + const dex::MethodId& referrer_method_id = dex_file_->GetMethodId(dex_compilation_unit_->GetDexMethodIndex()); if (!dex_compilation_unit_->IsStatic()) { // Add the implicit 'this' argument, not expressed in the signature. @@ -578,7 +567,7 @@ void HInstructionBuilder::InitializeParameters() { referrer_method_id.class_idx_, parameter_index++, DataType::Type::kReference, - /* is_this */ true); + /* is_this= */ true); AppendInstruction(parameter); UpdateLocal(locals_index++, parameter); number_of_parameters--; @@ -587,15 +576,15 @@ void HInstructionBuilder::InitializeParameters() { DCHECK(current_this_parameter_ == nullptr); } - const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id); - const DexFile::TypeList* arg_types = dex_file_->GetProtoParameters(proto); + const dex::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id); + const dex::TypeList* arg_types = dex_file_->GetProtoParameters(proto); for (int i = 0, shorty_pos = 1; i < number_of_parameters; i++) { HParameterValue* parameter = new (allocator_) HParameterValue( *dex_file_, arg_types->GetTypeItem(shorty_pos - 1).type_idx_, parameter_index++, DataType::FromShorty(shorty[shorty_pos]), - /* is_this */ false); + /* is_this= */ false); ++shorty_pos; AppendInstruction(parameter); // Store the parameter value in the local that the dex code will use @@ -720,20 +709,18 @@ void HInstructionBuilder::Binop_22b(const Instruction& instruction, bool reverse // Does the method being compiled need any constructor barriers being inserted? // (Always 'false' for methods that aren't <init>.) -static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDriver* driver) { +static bool RequiresConstructorBarrier(const DexCompilationUnit* cu) { // Can be null in unit tests only. if (UNLIKELY(cu == nullptr)) { return false; } - Thread* self = Thread::Current(); - return cu->IsConstructor() - && !cu->IsStatic() - // RequiresConstructorBarrier must only be queried for <init> methods; - // it's effectively "false" for every other method. - // - // See CompilerDriver::RequiresConstructBarrier for more explanation. - && driver->RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex()); + // Constructor barriers are applicable only for <init> methods. + if (LIKELY(!cu->IsConstructor() || cu->IsStatic())) { + return false; + } + + return cu->RequiresConstructorBarrier(); } // Returns true if `block` has only one successor which starts at the next @@ -779,7 +766,7 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction, // Only <init> (which is a return-void) could possibly have a constructor fence. // This may insert additional redundant constructor fences from the super constructors. // TODO: remove redundant constructor fences (b/36656456). - if (RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)) { + if (RequiresConstructorBarrier(dex_compilation_unit_)) { // Compiling instance constructor. DCHECK_STREQ("<init>", graph_->GetMethodName()); @@ -793,7 +780,7 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction, } AppendInstruction(new (allocator_) HReturnVoid(dex_pc)); } else { - DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)); + DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_)); HInstruction* value = LoadLocal(instruction.VRegA(), type); AppendInstruction(new (allocator_) HReturn(value, dex_pc)); } @@ -860,7 +847,7 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in // make this an invoke-unresolved to handle cross-dex invokes or abstract super methods, both of // which require runtime handling. if (invoke_type == kSuper) { - ObjPtr<mirror::Class> compiling_class = GetCompilingClass(); + ObjPtr<mirror::Class> compiling_class = dex_compilation_unit_->GetCompilingClass().Get(); if (compiling_class == nullptr) { // We could not determine the method's class we need to wait until runtime. DCHECK(Runtime::Current()->IsAotCompiler()); @@ -890,8 +877,8 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in // The back-end code generator relies on this check in order to ensure that it will not // attempt to read the dex_cache with a dex_method_index that is not from the correct // dex_file. If we didn't do this check then the dex_method_index will not be updated in the - // builder, which means that the code-generator (and compiler driver during sharpening and - // inliner, maybe) might invoke an incorrect method. + // builder, which means that the code-generator (and sharpening and inliner, maybe) + // might invoke an incorrect method. // TODO: The actual method could still be referenced in the current dex file, so we // could try locating it. // TODO: Remove the dex_file restriction. @@ -916,16 +903,13 @@ static bool IsStringConstructor(ArtMethod* method) { bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, uint32_t dex_pc, uint32_t method_idx, - uint32_t number_of_vreg_arguments, - bool is_range, - uint32_t* args, - uint32_t register_index) { + const InstructionOperands& operands) { InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode()); - const char* descriptor = dex_file_->GetMethodShorty(method_idx); - DataType::Type return_type = DataType::FromShorty(descriptor[0]); + const char* shorty = dex_file_->GetMethodShorty(method_idx); + DataType::Type return_type = DataType::FromShorty(shorty[0]); // Remove the return type from the 'proto'. - size_t number_of_arguments = strlen(descriptor) - 1; + size_t number_of_arguments = strlen(shorty) - 1; if (invoke_type != kStatic) { // instance call // One extra argument for 'this'. number_of_arguments++; @@ -942,14 +926,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, dex_pc, method_idx, invoke_type); - return HandleInvoke(invoke, - number_of_vreg_arguments, - args, - register_index, - is_range, - descriptor, - nullptr, /* clinit_check */ - true /* is_unresolved */); + return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ true); } // Replace calls to String.<init> with StringFactory. @@ -968,20 +945,15 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, HInvoke* invoke = new (allocator_) HInvokeStaticOrDirect( allocator_, number_of_arguments - 1, - DataType::Type::kReference /*return_type */, + /* return_type= */ DataType::Type::kReference, dex_pc, method_idx, - nullptr /* resolved_method */, + /* resolved_method= */ nullptr, dispatch_info, invoke_type, target_method, HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit); - return HandleStringInit(invoke, - number_of_vreg_arguments, - args, - register_index, - is_range, - descriptor); + return HandleStringInit(invoke, operands, shorty); } // Potential class initialization check, in the case of a static method call. @@ -994,8 +966,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, = HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit; ScopedObjectAccess soa(Thread::Current()); if (invoke_type == kStatic) { - clinit_check = ProcessClinitCheckForInvoke( - dex_pc, resolved_method, &clinit_check_requirement); + clinit_check = + ProcessClinitCheckForInvoke(dex_pc, resolved_method, &clinit_check_requirement); } else if (invoke_type == kSuper) { if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) { // Update the method index to the one resolved. Note that this may be a no-op if @@ -1004,11 +976,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, } } - HInvokeStaticOrDirect::DispatchInfo dispatch_info = { - HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u - }; + HInvokeStaticOrDirect::DispatchInfo dispatch_info = + HSharpening::SharpenInvokeStaticOrDirect(resolved_method, code_generator_); MethodReference target_method(resolved_method->GetDexFile(), resolved_method->GetDexMethodIndex()); invoke = new (allocator_) HInvokeStaticOrDirect(allocator_, @@ -1041,42 +1010,39 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, resolved_method, ImTable::GetImtIndex(resolved_method)); } - - return HandleInvoke(invoke, - number_of_vreg_arguments, - args, - register_index, - is_range, - descriptor, - clinit_check, - false /* is_unresolved */); + return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false, clinit_check); } -bool HInstructionBuilder::BuildInvokePolymorphic(const Instruction& instruction ATTRIBUTE_UNUSED, - uint32_t dex_pc, +bool HInstructionBuilder::BuildInvokePolymorphic(uint32_t dex_pc, uint32_t method_idx, - uint32_t proto_idx, - uint32_t number_of_vreg_arguments, - bool is_range, - uint32_t* args, - uint32_t register_index) { - const char* descriptor = dex_file_->GetShorty(proto_idx); - DCHECK_EQ(1 + ArtMethod::NumArgRegisters(descriptor), number_of_vreg_arguments); - DataType::Type return_type = DataType::FromShorty(descriptor[0]); - size_t number_of_arguments = strlen(descriptor); + dex::ProtoIndex proto_idx, + const InstructionOperands& operands) { + const char* shorty = dex_file_->GetShorty(proto_idx); + DCHECK_EQ(1 + ArtMethod::NumArgRegisters(shorty), operands.GetNumberOfOperands()); + DataType::Type return_type = DataType::FromShorty(shorty[0]); + size_t number_of_arguments = strlen(shorty); HInvoke* invoke = new (allocator_) HInvokePolymorphic(allocator_, number_of_arguments, return_type, dex_pc, method_idx); - return HandleInvoke(invoke, - number_of_vreg_arguments, - args, - register_index, - is_range, - descriptor, - nullptr /* clinit_check */, - false /* is_unresolved */); + return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false); +} + + +bool HInstructionBuilder::BuildInvokeCustom(uint32_t dex_pc, + uint32_t call_site_idx, + const InstructionOperands& operands) { + dex::ProtoIndex proto_idx = dex_file_->GetProtoIndexForCallSite(call_site_idx); + const char* shorty = dex_file_->GetShorty(proto_idx); + DataType::Type return_type = DataType::FromShorty(shorty[0]); + size_t number_of_arguments = strlen(shorty) - 1; + HInvoke* invoke = new (allocator_) HInvokeCustom(allocator_, + number_of_arguments, + call_site_idx, + return_type, + dex_pc); + return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false); } HNewInstance* HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc) { @@ -1099,6 +1065,10 @@ HNewInstance* HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, u if (load_class->NeedsAccessCheck() || klass->IsFinalizable() || !klass->IsInstantiable()) { entrypoint = kQuickAllocObjectWithChecks; } + // We will always be able to resolve the string class since it is in the BCP. + if (!klass.IsNull() && klass->IsStringClass()) { + entrypoint = kQuickAllocStringObject; + } // Consider classes we haven't resolved as potentially finalizable. bool finalizable = (klass == nullptr) || klass->IsFinalizable(); @@ -1167,30 +1137,219 @@ void HInstructionBuilder::BuildConstructorFenceForAllocation(HInstruction* alloc MethodCompilationStat::kConstructorFenceGeneratedNew); } +static bool IsInBootImage(ObjPtr<mirror::Class> cls, const CompilerOptions& compiler_options) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (compiler_options.IsBootImage()) { + std::string temp; + const char* descriptor = cls->GetDescriptor(&temp); + return compiler_options.IsImageClass(descriptor); + } else { + return Runtime::Current()->GetHeap()->FindSpaceFromObject(cls, false)->IsImageSpace(); + } +} + static bool IsSubClass(ObjPtr<mirror::Class> to_test, ObjPtr<mirror::Class> super_class) REQUIRES_SHARED(Locks::mutator_lock_) { return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class); } +static bool HasTrivialClinit(ObjPtr<mirror::Class> klass, PointerSize pointer_size) + REQUIRES_SHARED(Locks::mutator_lock_) { + // Check if the class has encoded fields that trigger bytecode execution. + // (Encoded fields are just a different representation of <clinit>.) + if (klass->NumStaticFields() != 0u) { + DCHECK(klass->GetClassDef() != nullptr); + EncodedStaticFieldValueIterator it(klass->GetDexFile(), *klass->GetClassDef()); + for (; it.HasNext(); it.Next()) { + switch (it.GetValueType()) { + case EncodedArrayValueIterator::ValueType::kBoolean: + case EncodedArrayValueIterator::ValueType::kByte: + case EncodedArrayValueIterator::ValueType::kShort: + case EncodedArrayValueIterator::ValueType::kChar: + case EncodedArrayValueIterator::ValueType::kInt: + case EncodedArrayValueIterator::ValueType::kLong: + case EncodedArrayValueIterator::ValueType::kFloat: + case EncodedArrayValueIterator::ValueType::kDouble: + case EncodedArrayValueIterator::ValueType::kNull: + case EncodedArrayValueIterator::ValueType::kString: + // Primitive, null or j.l.String initialization is permitted. + break; + case EncodedArrayValueIterator::ValueType::kType: + // Type initialization can load classes and execute bytecode through a class loader + // which can execute arbitrary bytecode. We do not optimize for known class loaders; + // kType is rarely used (if ever). + return false; + default: + // Other types in the encoded static field list are rejected by the DexFileVerifier. + LOG(FATAL) << "Unexpected type " << it.GetValueType(); + UNREACHABLE(); + } + } + } + // Check if the class has <clinit> that executes arbitrary code. + // Initialization of static fields of the class itself with constants is allowed. + ArtMethod* clinit = klass->FindClassInitializer(pointer_size); + if (clinit != nullptr) { + const DexFile& dex_file = *clinit->GetDexFile(); + CodeItemInstructionAccessor accessor(dex_file, clinit->GetCodeItem()); + for (DexInstructionPcPair it : accessor) { + switch (it->Opcode()) { + case Instruction::CONST_4: + case Instruction::CONST_16: + case Instruction::CONST: + case Instruction::CONST_HIGH16: + case Instruction::CONST_WIDE_16: + case Instruction::CONST_WIDE_32: + case Instruction::CONST_WIDE: + case Instruction::CONST_WIDE_HIGH16: + case Instruction::CONST_STRING: + case Instruction::CONST_STRING_JUMBO: + // Primitive, null or j.l.String initialization is permitted. + break; + case Instruction::RETURN_VOID: + case Instruction::RETURN_VOID_NO_BARRIER: + break; + case Instruction::SPUT: + case Instruction::SPUT_WIDE: + case Instruction::SPUT_OBJECT: + case Instruction::SPUT_BOOLEAN: + case Instruction::SPUT_BYTE: + case Instruction::SPUT_CHAR: + case Instruction::SPUT_SHORT: + // Only initialization of a static field of the same class is permitted. + if (dex_file.GetFieldId(it->VRegB_21c()).class_idx_ != klass->GetDexTypeIndex()) { + return false; + } + break; + case Instruction::NEW_ARRAY: + // Only primitive arrays are permitted. + if (Primitive::GetType(dex_file.GetTypeDescriptor(dex_file.GetTypeId( + dex::TypeIndex(it->VRegC_22c())))[1]) == Primitive::kPrimNot) { + return false; + } + break; + case Instruction::APUT: + case Instruction::APUT_WIDE: + case Instruction::APUT_BOOLEAN: + case Instruction::APUT_BYTE: + case Instruction::APUT_CHAR: + case Instruction::APUT_SHORT: + case Instruction::FILL_ARRAY_DATA: + case Instruction::NOP: + // Allow initialization of primitive arrays (only constants can be stored). + // Note: We expect NOPs used for fill-array-data-payload but accept all NOPs + // (even unreferenced switch payloads if they make it through the verifier). + break; + default: + return false; + } + } + } + return true; +} + +static bool HasTrivialInitialization(ObjPtr<mirror::Class> cls, + const CompilerOptions& compiler_options) + REQUIRES_SHARED(Locks::mutator_lock_) { + Runtime* runtime = Runtime::Current(); + PointerSize pointer_size = runtime->GetClassLinker()->GetImagePointerSize(); + + // Check the superclass chain. + for (ObjPtr<mirror::Class> klass = cls; klass != nullptr; klass = klass->GetSuperClass()) { + if (klass->IsInitialized() && IsInBootImage(klass, compiler_options)) { + break; // `klass` and its superclasses are already initialized in the boot image. + } + if (!HasTrivialClinit(klass, pointer_size)) { + return false; + } + } + + // Also check interfaces with default methods as they need to be initialized as well. + ObjPtr<mirror::IfTable> iftable = cls->GetIfTable(); + DCHECK(iftable != nullptr); + for (int32_t i = 0, count = iftable->Count(); i != count; ++i) { + ObjPtr<mirror::Class> iface = iftable->GetInterface(i); + if (!iface->HasDefaultMethods()) { + continue; // Initializing `cls` does not initialize this interface. + } + if (iface->IsInitialized() && IsInBootImage(iface, compiler_options)) { + continue; // This interface is already initialized in the boot image. + } + if (!HasTrivialClinit(iface, pointer_size)) { + return false; + } + } + return true; +} + bool HInstructionBuilder::IsInitialized(Handle<mirror::Class> cls) const { if (cls == nullptr) { return false; } - // `CanAssumeClassIsLoaded` will return true if we're JITting, or will - // check whether the class is in an image for the AOT compilation. - if (cls->IsInitialized() && - compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) { - return true; - } - - if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) { + // Check if the class will be initialized at runtime. + if (cls->IsInitialized()) { + Runtime* runtime = Runtime::Current(); + if (!runtime->IsAotCompiler()) { + DCHECK(runtime->UseJitCompilation()); + // For JIT, the class cannot revert to an uninitialized state. + return true; + } + // Assume loaded only if klass is in the boot image. App classes cannot be assumed + // loaded because we don't even know what class loader will be used to load them. + if (IsInBootImage(cls.Get(), code_generator_->GetCompilerOptions())) { + return true; + } + } + + // We can avoid the class initialization check for `cls` in static methods and constructors + // in the very same class; invoking a static method involves a class initialization check + // and so does the instance allocation that must be executed before invoking a constructor. + // Other instance methods of the same class can run on an escaped instance + // of an erroneous class. Even a superclass may need to be checked as the subclass + // can be completely initialized while the superclass is initializing and the subclass + // remains initialized when the superclass initializer throws afterwards. b/62478025 + // Note: The HClinitCheck+HInvokeStaticOrDirect merging can still apply. + auto is_static_method_or_constructor_of_cls = [cls](const DexCompilationUnit& compilation_unit) + REQUIRES_SHARED(Locks::mutator_lock_) { + return (compilation_unit.GetAccessFlags() & (kAccStatic | kAccConstructor)) != 0u && + compilation_unit.GetCompilingClass().Get() == cls.Get(); + }; + if (is_static_method_or_constructor_of_cls(*outer_compilation_unit_) || + // Check also the innermost method. Though excessive copies of ClinitCheck can be + // eliminated by GVN, that happens only after the decision whether to inline the + // graph or not and that may depend on the presence of the ClinitCheck. + // TODO: We should walk over the entire inlined method chain, but we don't pass that + // information to the builder. + is_static_method_or_constructor_of_cls(*dex_compilation_unit_)) { return true; } - // TODO: We should walk over the inlined methods, but we don't pass - // that information to the builder. - if (IsSubClass(GetCompilingClass(), cls.Get())) { + // Otherwise, we may be able to avoid the check if `cls` is a superclass of a method being + // compiled here (anywhere in the inlining chain) as the `cls` must have started initializing + // before calling any `cls` or subclass methods. Static methods require a clinit check and + // instance methods require an instance which cannot be created before doing a clinit check. + // When a subclass of `cls` starts initializing, it starts initializing its superclass + // chain up to `cls` without running any bytecode, i.e. without any opportunity for circular + // initialization weirdness. + // + // If the initialization of `cls` is trivial (`cls` and its superclasses and superinterfaces + // with default methods initialize only their own static fields using constant values), it must + // complete, either successfully or by throwing and marking `cls` erroneous, without allocating + // any instances of `cls` or subclasses (or any other class) and without calling any methods. + // If it completes by throwing, no instances of `cls` shall be created and no subclass method + // bytecode shall execute (see above), therefore the instruction we're building shall be + // unreachable. By reaching the instruction, we know that `cls` was initialized successfully. + // + // TODO: We should walk over the entire inlined methods chain, but we don't pass that + // information to the builder. (We could also check if we're guaranteed a non-null instance + // of `cls` at this location but that's outside the scope of the instruction builder.) + bool is_subclass = IsSubClass(outer_compilation_unit_->GetCompilingClass().Get(), cls.Get()); + if (dex_compilation_unit_ != outer_compilation_unit_) { + is_subclass = is_subclass || + IsSubClass(dex_compilation_unit_->GetCompilingClass().Get(), cls.Get()); + } + if (is_subclass && HasTrivialInitialization(cls.Get(), code_generator_->GetCompilerOptions())) { return true; } @@ -1198,9 +1357,9 @@ bool HInstructionBuilder::IsInitialized(Handle<mirror::Class> cls) const { } HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke( - uint32_t dex_pc, - ArtMethod* resolved_method, - HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) { + uint32_t dex_pc, + ArtMethod* resolved_method, + HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) { Handle<mirror::Class> klass = handles_->NewHandle(resolved_method->GetDeclaringClass()); HClinitCheck* clinit_check = nullptr; @@ -1211,7 +1370,7 @@ HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke( klass->GetDexFile(), klass, dex_pc, - /* needs_access_check */ false); + /* needs_access_check= */ false); if (cls != nullptr) { *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit; clinit_check = new (allocator_) HClinitCheck(cls, dex_pc); @@ -1222,26 +1381,22 @@ HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke( } bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke, - uint32_t number_of_vreg_arguments, - uint32_t* args, - uint32_t register_index, - bool is_range, - const char* descriptor, + const InstructionOperands& operands, + const char* shorty, size_t start_index, size_t* argument_index) { - uint32_t descriptor_index = 1; // Skip the return type. - + uint32_t shorty_index = 1; // Skip the return type. + const size_t number_of_operands = operands.GetNumberOfOperands(); for (size_t i = start_index; // Make sure we don't go over the expected arguments or over the number of // dex registers given. If the instruction was seen as dead by the verifier, // it hasn't been properly checked. - (i < number_of_vreg_arguments) && (*argument_index < invoke->GetNumberOfArguments()); + (i < number_of_operands) && (*argument_index < invoke->GetNumberOfArguments()); i++, (*argument_index)++) { - DataType::Type type = DataType::FromShorty(descriptor[descriptor_index++]); + DataType::Type type = DataType::FromShorty(shorty[shorty_index++]); bool is_wide = (type == DataType::Type::kInt64) || (type == DataType::Type::kFloat64); - if (!is_range - && is_wide - && ((i + 1 == number_of_vreg_arguments) || (args[i] + 1 != args[i + 1]))) { + if (is_wide && ((i + 1 == number_of_operands) || + (operands.GetOperand(i) + 1 != operands.GetOperand(i + 1)))) { // Longs and doubles should be in pairs, that is, sequential registers. The verifier should // reject any class where this is violated. However, the verifier only does these checks // on non trivially dead instructions, so we just bailout the compilation. @@ -1252,7 +1407,7 @@ bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke, MethodCompilationStat::kNotCompiledMalformedOpcode); return false; } - HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type); + HInstruction* arg = LoadLocal(operands.GetOperand(i), type); invoke->SetArgumentAt(*argument_index, arg); if (is_wide) { i++; @@ -1279,19 +1434,16 @@ bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke, } bool HInstructionBuilder::HandleInvoke(HInvoke* invoke, - uint32_t number_of_vreg_arguments, - uint32_t* args, - uint32_t register_index, - bool is_range, - const char* descriptor, - HClinitCheck* clinit_check, - bool is_unresolved) { + const InstructionOperands& operands, + const char* shorty, + bool is_unresolved, + HClinitCheck* clinit_check) { DCHECK(!invoke->IsInvokeStaticOrDirect() || !invoke->AsInvokeStaticOrDirect()->IsStringInit()); size_t start_index = 0; size_t argument_index = 0; if (invoke->GetInvokeType() != InvokeType::kStatic) { // Instance call. - uint32_t obj_reg = is_range ? register_index : args[0]; + uint32_t obj_reg = operands.GetOperand(0); HInstruction* arg = is_unresolved ? LoadLocal(obj_reg, DataType::Type::kReference) : LoadNullCheckedLocal(obj_reg, invoke->GetDexPc()); @@ -1300,14 +1452,7 @@ bool HInstructionBuilder::HandleInvoke(HInvoke* invoke, argument_index = 1; } - if (!SetupInvokeArguments(invoke, - number_of_vreg_arguments, - args, - register_index, - is_range, - descriptor, - start_index, - &argument_index)) { + if (!SetupInvokeArguments(invoke, operands, shorty, start_index, &argument_index)) { return false; } @@ -1327,24 +1472,14 @@ bool HInstructionBuilder::HandleInvoke(HInvoke* invoke, } bool HInstructionBuilder::HandleStringInit(HInvoke* invoke, - uint32_t number_of_vreg_arguments, - uint32_t* args, - uint32_t register_index, - bool is_range, - const char* descriptor) { + const InstructionOperands& operands, + const char* shorty) { DCHECK(invoke->IsInvokeStaticOrDirect()); DCHECK(invoke->AsInvokeStaticOrDirect()->IsStringInit()); size_t start_index = 1; size_t argument_index = 0; - if (!SetupInvokeArguments(invoke, - number_of_vreg_arguments, - args, - register_index, - is_range, - descriptor, - start_index, - &argument_index)) { + if (!SetupInvokeArguments(invoke, operands, shorty, start_index, &argument_index)) { return false; } @@ -1352,31 +1487,35 @@ bool HInstructionBuilder::HandleStringInit(HInvoke* invoke, // This is a StringFactory call, not an actual String constructor. Its result // replaces the empty String pre-allocated by NewInstance. - uint32_t orig_this_reg = is_range ? register_index : args[0]; + uint32_t orig_this_reg = operands.GetOperand(0); HInstruction* arg_this = LoadLocal(orig_this_reg, DataType::Type::kReference); // Replacing the NewInstance might render it redundant. Keep a list of these - // to be visited once it is clear whether it is has remaining uses. + // to be visited once it is clear whether it has remaining uses. if (arg_this->IsNewInstance()) { ssa_builder_->AddUninitializedString(arg_this->AsNewInstance()); } else { DCHECK(arg_this->IsPhi()); - // NewInstance is not the direct input of the StringFactory call. It might - // be redundant but optimizing this case is not worth the effort. + // We can get a phi as input of a String.<init> if there is a loop between the + // allocation and the String.<init> call. As we don't know which other phis might alias + // with `arg_this`, we keep a record of those invocations so we can later replace + // the allocation with the invocation. + // Add the actual 'this' input so the analysis knows what is the allocation instruction. + // The input will be removed during the analysis. + invoke->AddInput(arg_this); + ssa_builder_->AddUninitializedStringPhi(invoke); } - // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`. for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) { if ((*current_locals_)[vreg] == arg_this) { (*current_locals_)[vreg] = invoke; } } - return true; } static DataType::Type GetFieldAccessType(const DexFile& dex_file, uint16_t field_index) { - const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index); + const dex::FieldId& field_id = dex_file.GetFieldId(field_index); const char* type = dex_file.GetFieldTypeDescriptor(field_id); return DataType::FromShorty(type[0]); } @@ -1400,7 +1539,7 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio } ScopedObjectAccess soa(Thread::Current()); - ArtField* resolved_field = ResolveField(field_index, /* is_static */ false, is_put); + ArtField* resolved_field = ResolveField(field_index, /* is_static= */ false, is_put); // Generate an explicit null check on the reference, unless the field access // is unresolved. In that case, we rely on the runtime to perform various @@ -1463,41 +1602,6 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio return true; } -static ObjPtr<mirror::Class> GetClassFrom(CompilerDriver* driver, - const DexCompilationUnit& compilation_unit) { - ScopedObjectAccess soa(Thread::Current()); - Handle<mirror::ClassLoader> class_loader = compilation_unit.GetClassLoader(); - Handle<mirror::DexCache> dex_cache = compilation_unit.GetDexCache(); - - return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit); -} - -ObjPtr<mirror::Class> HInstructionBuilder::GetOutermostCompilingClass() const { - return GetClassFrom(compiler_driver_, *outer_compilation_unit_); -} - -ObjPtr<mirror::Class> HInstructionBuilder::GetCompilingClass() const { - return GetClassFrom(compiler_driver_, *dex_compilation_unit_); -} - -bool HInstructionBuilder::IsOutermostCompilingClass(dex::TypeIndex type_index) const { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<2> hs(soa.Self()); - Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache(); - Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); - Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass( - soa, dex_cache, class_loader, type_index, dex_compilation_unit_))); - Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass())); - - // GetOutermostCompilingClass returns null when the class is unresolved - // (e.g. if it derives from an unresolved class). This is bogus knowing that - // we are compiling it. - // When this happens we cannot establish a direct relation between the current - // class and the outer class, so we return false. - // (Note that this is only used for optimizing invokes and field accesses) - return (cls != nullptr) && (outer_class.Get() == cls.Get()); -} - void HInstructionBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put, @@ -1517,18 +1621,17 @@ void HInstructionBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& in ArtField* HInstructionBuilder::ResolveField(uint16_t field_idx, bool is_static, bool is_put) { ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<2> hs(soa.Self()); ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker(); Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); - Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass())); ArtField* resolved_field = class_linker->ResolveField(field_idx, dex_compilation_unit_->GetDexCache(), class_loader, is_static); + DCHECK_EQ(resolved_field == nullptr, soa.Self()->IsExceptionPending()); if (UNLIKELY(resolved_field == nullptr)) { - // Clean up any exception left by type resolution. + // Clean up any exception left by field resolution. soa.Self()->ClearException(); return nullptr; } @@ -1540,6 +1643,7 @@ ArtField* HInstructionBuilder::ResolveField(uint16_t field_idx, bool is_static, } // Check access. + Handle<mirror::Class> compiling_class = dex_compilation_unit_->GetCompilingClass(); if (compiling_class == nullptr) { if (!resolved_field->IsPublic()) { return nullptr; @@ -1569,7 +1673,7 @@ void HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction, uint16_t field_index = instruction.VRegB_21c(); ScopedObjectAccess soa(Thread::Current()); - ArtField* resolved_field = ResolveField(field_index, /* is_static */ true, is_put); + ArtField* resolved_field = ResolveField(field_index, /* is_static= */ true, is_put); if (resolved_field == nullptr) { MaybeRecordStat(compilation_stats_, @@ -1586,7 +1690,7 @@ void HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction, klass->GetDexFile(), klass, dex_pc, - /* needs_access_check */ false); + /* needs_access_check= */ false); if (constant == nullptr) { // The class cannot be referenced from this compiled code. Generate @@ -1697,17 +1801,27 @@ void HInstructionBuilder::BuildArrayAccess(const Instruction& instruction, graph_->SetHasBoundsChecks(true); } +HNewArray* HInstructionBuilder::BuildNewArray(uint32_t dex_pc, + dex::TypeIndex type_index, + HInstruction* length) { + HLoadClass* cls = BuildLoadClass(type_index, dex_pc); + + const char* descriptor = dex_file_->GetTypeDescriptor(dex_file_->GetTypeId(type_index)); + DCHECK_EQ(descriptor[0], '['); + size_t component_type_shift = Primitive::ComponentSizeShift(Primitive::GetType(descriptor[1])); + + HNewArray* new_array = new (allocator_) HNewArray(cls, length, dex_pc, component_type_shift); + AppendInstruction(new_array); + return new_array; +} + HNewArray* HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc, dex::TypeIndex type_index, - uint32_t number_of_vreg_arguments, - bool is_range, - uint32_t* args, - uint32_t register_index) { - HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc); - HLoadClass* cls = BuildLoadClass(type_index, dex_pc); - HNewArray* const object = new (allocator_) HNewArray(cls, length, dex_pc); - AppendInstruction(object); + const InstructionOperands& operands) { + const size_t number_of_operands = operands.GetNumberOfOperands(); + HInstruction* length = graph_->GetIntConstant(number_of_operands, dex_pc); + HNewArray* new_array = BuildNewArray(dex_pc, type_index, length); const char* descriptor = dex_file_->StringByTypeIdx(type_index); DCHECK_EQ(descriptor[0], '[') << descriptor; char primitive = descriptor[1]; @@ -1717,16 +1831,16 @@ HNewArray* HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc, bool is_reference_array = (primitive == 'L') || (primitive == '['); DataType::Type type = is_reference_array ? DataType::Type::kReference : DataType::Type::kInt32; - for (size_t i = 0; i < number_of_vreg_arguments; ++i) { - HInstruction* value = LoadLocal(is_range ? register_index + i : args[i], type); + for (size_t i = 0; i < number_of_operands; ++i) { + HInstruction* value = LoadLocal(operands.GetOperand(i), type); HInstruction* index = graph_->GetIntConstant(i, dex_pc); - HArraySet* aset = new (allocator_) HArraySet(object, index, value, type, dex_pc); + HArraySet* aset = new (allocator_) HArraySet(new_array, index, value, type, dex_pc); ssa_builder_->MaybeAddAmbiguousArraySet(aset); AppendInstruction(aset); } - latest_result_ = object; + latest_result_ = new_array; - return object; + return new_array; } template <typename T> @@ -1815,35 +1929,11 @@ void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object, } } -static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls) - REQUIRES_SHARED(Locks::mutator_lock_) { - if (cls == nullptr) { - return TypeCheckKind::kUnresolvedCheck; - } else if (cls->IsInterface()) { - return TypeCheckKind::kInterfaceCheck; - } else if (cls->IsArrayClass()) { - if (cls->GetComponentType()->IsObjectClass()) { - return TypeCheckKind::kArrayObjectCheck; - } else if (cls->CannotBeAssignedFromOtherTypes()) { - return TypeCheckKind::kExactCheck; - } else { - return TypeCheckKind::kArrayCheck; - } - } else if (cls->IsFinal()) { - return TypeCheckKind::kExactCheck; - } else if (cls->IsAbstract()) { - return TypeCheckKind::kAbstractClassCheck; - } else { - return TypeCheckKind::kClassHierarchyCheck; - } -} - void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc) { HLoadString* load_string = new (allocator_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc); HSharpening::ProcessLoadString(load_string, code_generator_, - compiler_driver_, *dex_compilation_unit_, handles_); AppendInstruction(load_string); @@ -1852,22 +1942,8 @@ void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) { ScopedObjectAccess soa(Thread::Current()); const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); - Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); - Handle<mirror::Class> klass = handles_->NewHandle(compiler_driver_->ResolveClass( - soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_)); - - bool needs_access_check = true; - if (klass != nullptr) { - if (klass->IsPublic()) { - needs_access_check = false; - } else { - ObjPtr<mirror::Class> compiling_class = GetCompilingClass(); - if (compiling_class != nullptr && compiling_class->CanAccess(klass.Get())) { - needs_access_check = false; - } - } - } - + Handle<mirror::Class> klass = ResolveClass(soa, type_index); + bool needs_access_check = LoadClassNeedsAccessCheck(klass); return BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check); } @@ -1888,18 +1964,19 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, } // Note: `klass` must be from `handles_`. + bool is_referrers_class = + (klass != nullptr) && (outer_compilation_unit_->GetCompilingClass().Get() == klass.Get()); HLoadClass* load_class = new (allocator_) HLoadClass( graph_->GetCurrentMethod(), type_index, *actual_dex_file, klass, - klass != nullptr && (klass.Get() == GetOutermostCompilingClass()), + is_referrers_class, dex_pc, needs_access_check); HLoadClass::LoadKind load_kind = HSharpening::ComputeLoadClassKind(load_class, code_generator_, - compiler_driver_, *dex_compilation_unit_); if (load_kind == HLoadClass::LoadKind::kInvalid) { @@ -1912,35 +1989,109 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, return load_class; } +Handle<mirror::Class> HInstructionBuilder::ResolveClass(ScopedObjectAccess& soa, + dex::TypeIndex type_index) { + auto it = class_cache_.find(type_index); + if (it != class_cache_.end()) { + return it->second; + } + + ObjPtr<mirror::Class> klass = dex_compilation_unit_->GetClassLinker()->ResolveType( + type_index, dex_compilation_unit_->GetDexCache(), dex_compilation_unit_->GetClassLoader()); + DCHECK_EQ(klass == nullptr, soa.Self()->IsExceptionPending()); + soa.Self()->ClearException(); // Clean up the exception left by type resolution if any. + + Handle<mirror::Class> h_klass = handles_->NewHandle(klass); + class_cache_.Put(type_index, h_klass); + return h_klass; +} + +bool HInstructionBuilder::LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) { + if (klass == nullptr) { + return true; + } else if (klass->IsPublic()) { + return false; + } else { + ObjPtr<mirror::Class> compiling_class = dex_compilation_unit_->GetCompilingClass().Get(); + return compiling_class == nullptr || !compiling_class->CanAccess(klass.Get()); + } +} + +void HInstructionBuilder::BuildLoadMethodHandle(uint16_t method_handle_index, uint32_t dex_pc) { + const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); + HLoadMethodHandle* load_method_handle = new (allocator_) HLoadMethodHandle( + graph_->GetCurrentMethod(), method_handle_index, dex_file, dex_pc); + AppendInstruction(load_method_handle); +} + +void HInstructionBuilder::BuildLoadMethodType(dex::ProtoIndex proto_index, uint32_t dex_pc) { + const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); + HLoadMethodType* load_method_type = + new (allocator_) HLoadMethodType(graph_->GetCurrentMethod(), proto_index, dex_file, dex_pc); + AppendInstruction(load_method_type); +} + void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction, uint8_t destination, uint8_t reference, dex::TypeIndex type_index, uint32_t dex_pc) { HInstruction* object = LoadLocal(reference, DataType::Type::kReference); - HLoadClass* cls = BuildLoadClass(type_index, dex_pc); ScopedObjectAccess soa(Thread::Current()); - TypeCheckKind check_kind = ComputeTypeCheckKind(cls->GetClass()); + const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); + Handle<mirror::Class> klass = ResolveClass(soa, type_index); + bool needs_access_check = LoadClassNeedsAccessCheck(klass); + TypeCheckKind check_kind = HSharpening::ComputeTypeCheckKind( + klass.Get(), code_generator_, needs_access_check); + + HInstruction* class_or_null = nullptr; + HIntConstant* bitstring_path_to_root = nullptr; + HIntConstant* bitstring_mask = nullptr; + if (check_kind == TypeCheckKind::kBitstringCheck) { + // TODO: Allow using the bitstring check also if we need an access check. + DCHECK(!needs_access_check); + class_or_null = graph_->GetNullConstant(dex_pc); + MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); + uint32_t path_to_root = + SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass.Get()); + uint32_t mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass.Get()); + bitstring_path_to_root = graph_->GetIntConstant(static_cast<int32_t>(path_to_root), dex_pc); + bitstring_mask = graph_->GetIntConstant(static_cast<int32_t>(mask), dex_pc); + } else { + class_or_null = BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check); + } + DCHECK(class_or_null != nullptr); + if (instruction.Opcode() == Instruction::INSTANCE_OF) { - AppendInstruction(new (allocator_) HInstanceOf(object, cls, check_kind, dex_pc)); + AppendInstruction(new (allocator_) HInstanceOf(object, + class_or_null, + check_kind, + klass, + dex_pc, + allocator_, + bitstring_path_to_root, + bitstring_mask)); UpdateLocal(destination, current_block_->GetLastInstruction()); } else { DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST); // We emit a CheckCast followed by a BoundType. CheckCast is a statement // which may throw. If it succeeds BoundType sets the new type of `object` // for all subsequent uses. - AppendInstruction(new (allocator_) HCheckCast(object, cls, check_kind, dex_pc)); + AppendInstruction( + new (allocator_) HCheckCast(object, + class_or_null, + check_kind, + klass, + dex_pc, + allocator_, + bitstring_path_to_root, + bitstring_mask)); AppendInstruction(new (allocator_) HBoundType(object, dex_pc)); UpdateLocal(reference, current_block_->GetLastInstruction()); } } -bool HInstructionBuilder::NeedsAccessCheck(dex::TypeIndex type_index, bool* finalizable) const { - return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks( - LookupReferrerClass(), LookupResolvedType(type_index, *dex_compilation_unit_), finalizable); -} - bool HInstructionBuilder::CanDecodeQuickenedInfo() const { return !quicken_info_.IsNull(); } @@ -2116,11 +2267,10 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, } else { method_idx = instruction.VRegB_35c(); } - uint32_t number_of_vreg_arguments = instruction.VRegA_35c(); uint32_t args[5]; - instruction.GetVarArgs(args); - if (!BuildInvoke(instruction, dex_pc, method_idx, - number_of_vreg_arguments, false, args, -1)) { + uint32_t number_of_vreg_arguments = instruction.GetVarArgs(args); + VarArgsInstructionOperands operands(args, number_of_vreg_arguments); + if (!BuildInvoke(instruction, dex_pc, method_idx, operands)) { return false; } break; @@ -2143,10 +2293,8 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, } else { method_idx = instruction.VRegB_3rc(); } - uint32_t number_of_vreg_arguments = instruction.VRegA_3rc(); - uint32_t register_index = instruction.VRegC(); - if (!BuildInvoke(instruction, dex_pc, method_idx, - number_of_vreg_arguments, true, nullptr, register_index)) { + RangeInstructionOperands operands(instruction.VRegC(), instruction.VRegA_3rc()); + if (!BuildInvoke(instruction, dex_pc, method_idx, operands)) { return false; } break; @@ -2154,33 +2302,32 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::INVOKE_POLYMORPHIC: { uint16_t method_idx = instruction.VRegB_45cc(); - uint16_t proto_idx = instruction.VRegH_45cc(); - uint32_t number_of_vreg_arguments = instruction.VRegA_45cc(); + dex::ProtoIndex proto_idx(instruction.VRegH_45cc()); uint32_t args[5]; - instruction.GetVarArgs(args); - return BuildInvokePolymorphic(instruction, - dex_pc, - method_idx, - proto_idx, - number_of_vreg_arguments, - false, - args, - -1); + uint32_t number_of_vreg_arguments = instruction.GetVarArgs(args); + VarArgsInstructionOperands operands(args, number_of_vreg_arguments); + return BuildInvokePolymorphic(dex_pc, method_idx, proto_idx, operands); } case Instruction::INVOKE_POLYMORPHIC_RANGE: { uint16_t method_idx = instruction.VRegB_4rcc(); - uint16_t proto_idx = instruction.VRegH_4rcc(); - uint32_t number_of_vreg_arguments = instruction.VRegA_4rcc(); - uint32_t register_index = instruction.VRegC_4rcc(); - return BuildInvokePolymorphic(instruction, - dex_pc, - method_idx, - proto_idx, - number_of_vreg_arguments, - true, - nullptr, - register_index); + dex::ProtoIndex proto_idx(instruction.VRegH_4rcc()); + RangeInstructionOperands operands(instruction.VRegC_4rcc(), instruction.VRegA_4rcc()); + return BuildInvokePolymorphic(dex_pc, method_idx, proto_idx, operands); + } + + case Instruction::INVOKE_CUSTOM: { + uint16_t call_site_idx = instruction.VRegB_35c(); + uint32_t args[5]; + uint32_t number_of_vreg_arguments = instruction.GetVarArgs(args); + VarArgsInstructionOperands operands(args, number_of_vreg_arguments); + return BuildInvokeCustom(dex_pc, call_site_idx, operands); + } + + case Instruction::INVOKE_CUSTOM_RANGE: { + uint16_t call_site_idx = instruction.VRegB_3rc(); + RangeInstructionOperands operands(instruction.VRegC_3rc(), instruction.VRegA_3rc()); + return BuildInvokeCustom(dex_pc, call_site_idx, operands); } case Instruction::NEG_INT: { @@ -2718,40 +2865,27 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::NEW_ARRAY: { dex::TypeIndex type_index(instruction.VRegC_22c()); HInstruction* length = LoadLocal(instruction.VRegB_22c(), DataType::Type::kInt32); - HLoadClass* cls = BuildLoadClass(type_index, dex_pc); + HNewArray* new_array = BuildNewArray(dex_pc, type_index, length); - HNewArray* new_array = new (allocator_) HNewArray(cls, length, dex_pc); - AppendInstruction(new_array); UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction()); BuildConstructorFenceForAllocation(new_array); break; } case Instruction::FILLED_NEW_ARRAY: { - uint32_t number_of_vreg_arguments = instruction.VRegA_35c(); dex::TypeIndex type_index(instruction.VRegB_35c()); uint32_t args[5]; - instruction.GetVarArgs(args); - HNewArray* new_array = BuildFilledNewArray(dex_pc, - type_index, - number_of_vreg_arguments, - /* is_range */ false, - args, - /* register_index */ 0); + uint32_t number_of_vreg_arguments = instruction.GetVarArgs(args); + VarArgsInstructionOperands operands(args, number_of_vreg_arguments); + HNewArray* new_array = BuildFilledNewArray(dex_pc, type_index, operands); BuildConstructorFenceForAllocation(new_array); break; } case Instruction::FILLED_NEW_ARRAY_RANGE: { - uint32_t number_of_vreg_arguments = instruction.VRegA_3rc(); dex::TypeIndex type_index(instruction.VRegB_3rc()); - uint32_t register_index = instruction.VRegC_3rc(); - HNewArray* new_array = BuildFilledNewArray(dex_pc, - type_index, - number_of_vreg_arguments, - /* is_range */ true, - /* args*/ nullptr, - register_index); + RangeInstructionOperands operands(instruction.VRegC_3rc(), instruction.VRegA_3rc()); + HNewArray* new_array = BuildFilledNewArray(dex_pc, type_index, operands); BuildConstructorFenceForAllocation(new_array); break; } @@ -2812,7 +2946,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::IGET_CHAR_QUICK: case Instruction::IGET_SHORT: case Instruction::IGET_SHORT_QUICK: { - if (!BuildInstanceFieldAccess(instruction, dex_pc, /* is_put */ false, quicken_index)) { + if (!BuildInstanceFieldAccess(instruction, dex_pc, /* is_put= */ false, quicken_index)) { return false; } break; @@ -2832,7 +2966,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::IPUT_CHAR_QUICK: case Instruction::IPUT_SHORT: case Instruction::IPUT_SHORT_QUICK: { - if (!BuildInstanceFieldAccess(instruction, dex_pc, /* is_put */ true, quicken_index)) { + if (!BuildInstanceFieldAccess(instruction, dex_pc, /* is_put= */ true, quicken_index)) { return false; } break; @@ -2845,7 +2979,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::SGET_BYTE: case Instruction::SGET_CHAR: case Instruction::SGET_SHORT: { - BuildStaticFieldAccess(instruction, dex_pc, /* is_put */ false); + BuildStaticFieldAccess(instruction, dex_pc, /* is_put= */ false); break; } @@ -2856,7 +2990,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::SPUT_BYTE: case Instruction::SPUT_CHAR: case Instruction::SPUT_SHORT: { - BuildStaticFieldAccess(instruction, dex_pc, /* is_put */ true); + BuildStaticFieldAccess(instruction, dex_pc, /* is_put= */ true); break; } @@ -2906,6 +3040,20 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, break; } + case Instruction::CONST_METHOD_HANDLE: { + uint16_t method_handle_idx = instruction.VRegB_21c(); + BuildLoadMethodHandle(method_handle_idx, dex_pc); + UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction()); + break; + } + + case Instruction::CONST_METHOD_TYPE: { + dex::ProtoIndex proto_idx(instruction.VRegB_21c()); + BuildLoadMethodType(proto_idx, dex_pc); + UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction()); + break; + } + case Instruction::MOVE_EXCEPTION: { AppendInstruction(new (allocator_) HLoadException(dex_pc)); UpdateLocal(instruction.VRegA_11x(), current_block_->GetLastInstruction()); @@ -2959,7 +3107,21 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, break; } - default: + case Instruction::UNUSED_3E: + case Instruction::UNUSED_3F: + case Instruction::UNUSED_40: + case Instruction::UNUSED_41: + case Instruction::UNUSED_42: + case Instruction::UNUSED_43: + case Instruction::UNUSED_79: + case Instruction::UNUSED_7A: + case Instruction::UNUSED_F3: + case Instruction::UNUSED_F4: + case Instruction::UNUSED_F5: + case Instruction::UNUSED_F6: + case Instruction::UNUSED_F7: + case Instruction::UNUSED_F8: + case Instruction::UNUSED_F9: { VLOG(compiler) << "Did not compile " << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex()) << " because of unhandled instruction " @@ -2967,6 +3129,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, MaybeRecordStat(compilation_stats_, MethodCompilationStat::kNotCompiledUnhandledInstruction); return false; + } } return true; } // NOLINT(readability/fn_size) @@ -2980,7 +3143,7 @@ ObjPtr<mirror::Class> HInstructionBuilder::LookupResolvedType( ObjPtr<mirror::Class> HInstructionBuilder::LookupReferrerClass() const { // TODO: Cache the result in a Handle<mirror::Class>. - const DexFile::MethodId& method_id = + const dex::MethodId& method_id = dex_compilation_unit_->GetDexFile()->GetMethodId(dex_compilation_unit_->GetDexMethodIndex()); return LookupResolvedType(method_id.class_idx_, *dex_compilation_unit_); } diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 4428c53277..d701445946 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -34,16 +34,18 @@ class ArenaBitVector; class ArtField; class ArtMethod; class CodeGenerator; -class CompilerDriver; class DexCompilationUnit; class HBasicBlockBuilder; class Instruction; +class InstructionOperands; class OptimizingCompilerStats; +class ScopedObjectAccess; class SsaBuilder; class VariableSizedHandleScope; namespace mirror { class Class; +class MethodType; } // namespace mirror class HInstructionBuilder : public ValueObject { @@ -56,7 +58,6 @@ class HInstructionBuilder : public ValueObject { DataType::Type return_type, const DexCompilationUnit* dex_compilation_unit, const DexCompilationUnit* outer_compilation_unit, - CompilerDriver* compiler_driver, CodeGenerator* code_generator, ArrayRef<const uint8_t> interpreter_metadata, OptimizingCompilerStats* compiler_stats, @@ -95,11 +96,6 @@ class HInstructionBuilder : public ValueObject { void InitializeParameters(); - // Returns whether the current method needs access check for the type. - // Output parameter finalizable is set to whether the type is finalizable. - bool NeedsAccessCheck(dex::TypeIndex type_index, /*out*/bool* finalizable) const - REQUIRES_SHARED(Locks::mutator_lock_); - template<typename T> void Unop_12x(const Instruction& instruction, DataType::Type type, uint32_t dex_pc); @@ -166,29 +162,28 @@ class HInstructionBuilder : public ValueObject { bool BuildInvoke(const Instruction& instruction, uint32_t dex_pc, uint32_t method_idx, - uint32_t number_of_vreg_arguments, - bool is_range, - uint32_t* args, - uint32_t register_index); + const InstructionOperands& operands); // Builds an invocation node for invoke-polymorphic and returns whether the // instruction is supported. - bool BuildInvokePolymorphic(const Instruction& instruction, - uint32_t dex_pc, + bool BuildInvokePolymorphic(uint32_t dex_pc, uint32_t method_idx, - uint32_t proto_idx, - uint32_t number_of_vreg_arguments, - bool is_range, - uint32_t* args, - uint32_t register_index); + dex::ProtoIndex proto_idx, + const InstructionOperands& operands); + + // Builds an invocation node for invoke-custom and returns whether the + // instruction is supported. + bool BuildInvokeCustom(uint32_t dex_pc, + uint32_t call_site_idx, + const InstructionOperands& operands); + + // Builds a new array node. + HNewArray* BuildNewArray(uint32_t dex_pc, dex::TypeIndex type_index, HInstruction* length); // Builds a new array node and the instructions that fill it. HNewArray* BuildFilledNewArray(uint32_t dex_pc, dex::TypeIndex type_index, - uint32_t number_of_vreg_arguments, - bool is_range, - uint32_t* args, - uint32_t register_index); + const InstructionOperands& operands); void BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc); @@ -232,43 +227,37 @@ class HInstructionBuilder : public ValueObject { bool needs_access_check) REQUIRES_SHARED(Locks::mutator_lock_); - // Returns the outer-most compiling method's class. - ObjPtr<mirror::Class> GetOutermostCompilingClass() const; + Handle<mirror::Class> ResolveClass(ScopedObjectAccess& soa, dex::TypeIndex type_index) + REQUIRES_SHARED(Locks::mutator_lock_); + + bool LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) + REQUIRES_SHARED(Locks::mutator_lock_); - // Returns the class whose method is being compiled. - ObjPtr<mirror::Class> GetCompilingClass() const; + // Builds a `HLoadMethodHandle` loading the given `method_handle_index`. + void BuildLoadMethodHandle(uint16_t method_handle_idx, uint32_t dex_pc); - // Returns whether `type_index` points to the outer-most compiling method's class. - bool IsOutermostCompilingClass(dex::TypeIndex type_index) const; + // Builds a `HLoadMethodType` loading the given `proto_index`. + void BuildLoadMethodType(dex::ProtoIndex proto_index, uint32_t dex_pc); void PotentiallySimplifyFakeString(uint16_t original_dex_register, uint32_t dex_pc, HInvoke* invoke); bool SetupInvokeArguments(HInvoke* invoke, - uint32_t number_of_vreg_arguments, - uint32_t* args, - uint32_t register_index, - bool is_range, - const char* descriptor, + const InstructionOperands& operands, + const char* shorty, size_t start_index, size_t* argument_index); bool HandleInvoke(HInvoke* invoke, - uint32_t number_of_vreg_arguments, - uint32_t* args, - uint32_t register_index, - bool is_range, - const char* descriptor, - HClinitCheck* clinit_check, - bool is_unresolved); + const InstructionOperands& operands, + const char* shorty, + bool is_unresolved, + HClinitCheck* clinit_check = nullptr); bool HandleStringInit(HInvoke* invoke, - uint32_t number_of_vreg_arguments, - uint32_t* args, - uint32_t register_index, - bool is_range, - const char* descriptor); + const InstructionOperands& operands, + const char* shorty); void HandleStringInitResult(HInvokeStaticOrDirect* invoke); HClinitCheck* ProcessClinitCheckForInvoke( @@ -316,8 +305,6 @@ class HInstructionBuilder : public ValueObject { HBasicBlockBuilder* const block_builder_; SsaBuilder* const ssa_builder_; - CompilerDriver* const compiler_driver_; - CodeGenerator* const code_generator_; // The compilation unit of the current method being compiled. Note that @@ -347,6 +334,10 @@ class HInstructionBuilder : public ValueObject { ScopedArenaVector<HBasicBlock*> loop_headers_; + // Cached resolved types for the current compilation unit's DexFile. + // Handle<>s reference entries in the `handles_`. + ScopedArenaSafeMap<dex::TypeIndex, Handle<mirror::Class>> class_cache_; + static constexpr int kDefaultNumberOfLoops = 2; DISALLOW_COPY_AND_ASSIGN(HInstructionBuilder); diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index a42a85dc1d..a433d7ef73 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -18,6 +18,7 @@ #include "art_method-inl.h" #include "class_linker-inl.h" +#include "class_root.h" #include "data_type-inl.h" #include "escape.h" #include "intrinsics.h" @@ -35,14 +36,12 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { public: InstructionSimplifierVisitor(HGraph* graph, CodeGenerator* codegen, - CompilerDriver* compiler_driver, OptimizingCompilerStats* stats) : HGraphDelegateVisitor(graph), codegen_(codegen), - compiler_driver_(compiler_driver), stats_(stats) {} - void Run(); + bool Run(); private: void RecordSimplification() { @@ -67,44 +66,44 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { bool TryCombineVecMultiplyAccumulate(HVecMul* mul); void VisitShift(HBinaryOperation* shift); - - void VisitEqual(HEqual* equal) OVERRIDE; - void VisitNotEqual(HNotEqual* equal) OVERRIDE; - void VisitBooleanNot(HBooleanNot* bool_not) OVERRIDE; - void VisitInstanceFieldSet(HInstanceFieldSet* equal) OVERRIDE; - void VisitStaticFieldSet(HStaticFieldSet* equal) OVERRIDE; - void VisitArraySet(HArraySet* equal) OVERRIDE; - void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; - void VisitNullCheck(HNullCheck* instruction) OVERRIDE; - void VisitArrayLength(HArrayLength* instruction) OVERRIDE; - void VisitCheckCast(HCheckCast* instruction) OVERRIDE; - void VisitAdd(HAdd* instruction) OVERRIDE; - void VisitAnd(HAnd* instruction) OVERRIDE; - void VisitCondition(HCondition* instruction) OVERRIDE; - void VisitGreaterThan(HGreaterThan* condition) OVERRIDE; - void VisitGreaterThanOrEqual(HGreaterThanOrEqual* condition) OVERRIDE; - void VisitLessThan(HLessThan* condition) OVERRIDE; - void VisitLessThanOrEqual(HLessThanOrEqual* condition) OVERRIDE; - void VisitBelow(HBelow* condition) OVERRIDE; - void VisitBelowOrEqual(HBelowOrEqual* condition) OVERRIDE; - void VisitAbove(HAbove* condition) OVERRIDE; - void VisitAboveOrEqual(HAboveOrEqual* condition) OVERRIDE; - void VisitDiv(HDiv* instruction) OVERRIDE; - void VisitMul(HMul* instruction) OVERRIDE; - void VisitNeg(HNeg* instruction) OVERRIDE; - void VisitNot(HNot* instruction) OVERRIDE; - void VisitOr(HOr* instruction) OVERRIDE; - void VisitShl(HShl* instruction) OVERRIDE; - void VisitShr(HShr* instruction) OVERRIDE; - void VisitSub(HSub* instruction) OVERRIDE; - void VisitUShr(HUShr* instruction) OVERRIDE; - void VisitXor(HXor* instruction) OVERRIDE; - void VisitSelect(HSelect* select) OVERRIDE; - void VisitIf(HIf* instruction) OVERRIDE; - void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE; - void VisitInvoke(HInvoke* invoke) OVERRIDE; - void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE; - void VisitVecMul(HVecMul* instruction) OVERRIDE; + void VisitEqual(HEqual* equal) override; + void VisitNotEqual(HNotEqual* equal) override; + void VisitBooleanNot(HBooleanNot* bool_not) override; + void VisitInstanceFieldSet(HInstanceFieldSet* equal) override; + void VisitStaticFieldSet(HStaticFieldSet* equal) override; + void VisitArraySet(HArraySet* equal) override; + void VisitTypeConversion(HTypeConversion* instruction) override; + void VisitNullCheck(HNullCheck* instruction) override; + void VisitArrayLength(HArrayLength* instruction) override; + void VisitCheckCast(HCheckCast* instruction) override; + void VisitAbs(HAbs* instruction) override; + void VisitAdd(HAdd* instruction) override; + void VisitAnd(HAnd* instruction) override; + void VisitCondition(HCondition* instruction) override; + void VisitGreaterThan(HGreaterThan* condition) override; + void VisitGreaterThanOrEqual(HGreaterThanOrEqual* condition) override; + void VisitLessThan(HLessThan* condition) override; + void VisitLessThanOrEqual(HLessThanOrEqual* condition) override; + void VisitBelow(HBelow* condition) override; + void VisitBelowOrEqual(HBelowOrEqual* condition) override; + void VisitAbove(HAbove* condition) override; + void VisitAboveOrEqual(HAboveOrEqual* condition) override; + void VisitDiv(HDiv* instruction) override; + void VisitMul(HMul* instruction) override; + void VisitNeg(HNeg* instruction) override; + void VisitNot(HNot* instruction) override; + void VisitOr(HOr* instruction) override; + void VisitShl(HShl* instruction) override; + void VisitShr(HShr* instruction) override; + void VisitSub(HSub* instruction) override; + void VisitUShr(HUShr* instruction) override; + void VisitXor(HXor* instruction) override; + void VisitSelect(HSelect* select) override; + void VisitIf(HIf* instruction) override; + void VisitInstanceOf(HInstanceOf* instruction) override; + void VisitInvoke(HInvoke* invoke) override; + void VisitDeoptimize(HDeoptimize* deoptimize) override; + void VisitVecMul(HVecMul* instruction) override; bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const; @@ -116,13 +115,16 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void SimplifyFP2Int(HInvoke* invoke); void SimplifyStringCharAt(HInvoke* invoke); void SimplifyStringIsEmptyOrLength(HInvoke* invoke); + void SimplifyStringIndexOf(HInvoke* invoke); void SimplifyNPEOnArgN(HInvoke* invoke, size_t); void SimplifyReturnThis(HInvoke* invoke); void SimplifyAllocationIntrinsic(HInvoke* invoke); void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind); + void SimplifyMin(HInvoke* invoke, DataType::Type type); + void SimplifyMax(HInvoke* invoke, DataType::Type type); + void SimplifyAbs(HInvoke* invoke, DataType::Type type); CodeGenerator* codegen_; - CompilerDriver* compiler_driver_; OptimizingCompilerStats* stats_; bool simplification_occurred_ = false; int simplifications_at_current_position_ = 0; @@ -133,17 +135,18 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { static constexpr int kMaxSamePositionSimplifications = 50; }; -void InstructionSimplifier::Run() { +bool InstructionSimplifier::Run() { if (kTestInstructionClonerExhaustively) { CloneAndReplaceInstructionVisitor visitor(graph_); visitor.VisitReversePostOrder(); } - InstructionSimplifierVisitor visitor(graph_, codegen_, compiler_driver_, stats_); - visitor.Run(); + InstructionSimplifierVisitor visitor(graph_, codegen_, stats_); + return visitor.Run(); } -void InstructionSimplifierVisitor::Run() { +bool InstructionSimplifierVisitor::Run() { + bool didSimplify = false; // Iterate in reverse post order to open up more simplifications to users // of instructions that got simplified. for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) { @@ -153,10 +156,14 @@ void InstructionSimplifierVisitor::Run() { do { simplification_occurred_ = false; VisitBasicBlock(block); + if (simplification_occurred_) { + didSimplify = true; + } } while (simplification_occurred_ && (simplifications_at_current_position_ < kMaxSamePositionSimplifications)); simplifications_at_current_position_ = 0; } + return didSimplify; } namespace { @@ -365,7 +372,7 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { // (as defined by shift semantics). This ensures other // optimizations do not need to special case for such situations. DCHECK_EQ(shift_amount->GetType(), DataType::Type::kInt32); - instruction->ReplaceInput(GetGraph()->GetIntConstant(masked_cst), /* index */ 1); + instruction->ReplaceInput(GetGraph()->GetIntConstant(masked_cst), /* index= */ 1); RecordSimplification(); return; } @@ -576,7 +583,9 @@ bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInst // Returns whether doing a type test between the class of `object` against `klass` has // a statically known outcome. The result of the test is stored in `outcome`. -static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) { +static bool TypeCheckHasKnownOutcome(ReferenceTypeInfo class_rti, + HInstruction* object, + /*out*/bool* outcome) { DCHECK(!object->IsNullConstant()) << "Null constants should be special cased"; ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo(); ScopedObjectAccess soa(Thread::Current()); @@ -586,7 +595,6 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo return false; } - ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI(); if (!class_rti.IsValid()) { // Happens when the loaded class is unresolved. return false; @@ -611,8 +619,8 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { HInstruction* object = check_cast->InputAt(0); - HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); - if (load_class->NeedsAccessCheck()) { + if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck && + check_cast->GetTargetClass()->NeedsAccessCheck()) { // If we need to perform an access check we cannot remove the instruction. return; } @@ -627,18 +635,21 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { return; } - // Note: The `outcome` is initialized to please valgrind - the compiler can reorder - // the return value check with the `outcome` check, b/27651442 . + // Historical note: The `outcome` was initialized to please Valgrind - the compiler can reorder + // the return value check with the `outcome` check, b/27651442. bool outcome = false; - if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { + if (TypeCheckHasKnownOutcome(check_cast->GetTargetClassRTI(), object, &outcome)) { if (outcome) { check_cast->GetBlock()->RemoveInstruction(check_cast); MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast); - if (!load_class->HasUses()) { - // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. - // However, here we know that it cannot because the checkcast was successfull, hence - // the class was already loaded. - load_class->GetBlock()->RemoveInstruction(load_class); + if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) { + HLoadClass* load_class = check_cast->GetTargetClass(); + if (!load_class->HasUses()) { + // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. + // However, here we know that it cannot because the checkcast was successfull, hence + // the class was already loaded. + load_class->GetBlock()->RemoveInstruction(load_class); + } } } else { // Don't do anything for exceptional cases for now. Ideally we should remove @@ -649,8 +660,8 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { HInstruction* object = instruction->InputAt(0); - HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass(); - if (load_class->NeedsAccessCheck()) { + if (instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck && + instruction->GetTargetClass()->NeedsAccessCheck()) { // If we need to perform an access check we cannot remove the instruction. return; } @@ -670,10 +681,10 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { return; } - // Note: The `outcome` is initialized to please valgrind - the compiler can reorder - // the return value check with the `outcome` check, b/27651442 . + // Historical note: The `outcome` was initialized to please Valgrind - the compiler can reorder + // the return value check with the `outcome` check, b/27651442. bool outcome = false; - if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { + if (TypeCheckHasKnownOutcome(instruction->GetTargetClassRTI(), object, &outcome)) { MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf); if (outcome && can_be_null) { // Type test will succeed, we just need a null test. @@ -686,11 +697,14 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { } RecordSimplification(); instruction->GetBlock()->RemoveInstruction(instruction); - if (outcome && !load_class->HasUses()) { - // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. - // However, here we know that it cannot because the instanceof check was successfull, hence - // the class was already loaded. - load_class->GetBlock()->RemoveInstruction(load_class); + if (outcome && instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) { + HLoadClass* load_class = instruction->GetTargetClass(); + if (!load_class->HasUses()) { + // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. + // However, here we know that it cannot because the instanceof check was successfull, hence + // the class was already loaded. + load_class->GetBlock()->RemoveInstruction(load_class); + } } } } @@ -735,8 +749,8 @@ static HCondition* GetOppositeConditionSwapOps(ArenaAllocator* allocator, HInstr return new (allocator) HBelowOrEqual(rhs, lhs); default: LOG(FATAL) << "Unknown ConditionType " << cond->GetKind(); + UNREACHABLE(); } - return nullptr; } static bool CmpHasBoolType(HInstruction* input, HInstruction* cmp) { @@ -849,35 +863,29 @@ void InstructionSimplifierVisitor::VisitBooleanNot(HBooleanNot* bool_not) { static HInstruction* NewIntegralAbs(ArenaAllocator* allocator, HInstruction* x, HInstruction* cursor) { - DataType::Type type = x->GetType(); - DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); - // Construct a fake intrinsic with as much context as is needed to allocate one. - // The intrinsic will always be lowered into code later anyway. - // TODO: b/65164101 : moving towards a real HAbs node makes more sense. - HInvokeStaticOrDirect::DispatchInfo dispatch_info = { - HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u - }; - HInvokeStaticOrDirect* invoke = new (allocator) HInvokeStaticOrDirect( - allocator, - 1, - type, - x->GetDexPc(), - /*method_idx*/ -1, - /*resolved_method*/ nullptr, - dispatch_info, - kStatic, - MethodReference(nullptr, dex::kDexNoIndex), - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); - invoke->SetArgumentAt(0, x); - invoke->SetIntrinsic(type == DataType::Type::kInt32 ? Intrinsics::kMathAbsInt - : Intrinsics::kMathAbsLong, - kNoEnvironmentOrCache, - kNoSideEffects, - kNoThrow); - cursor->GetBlock()->InsertInstructionBefore(invoke, cursor); - return invoke; + DataType::Type type = DataType::Kind(x->GetType()); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); + HAbs* abs = new (allocator) HAbs(type, x, cursor->GetDexPc()); + cursor->GetBlock()->InsertInstructionBefore(abs, cursor); + return abs; +} + +// Constructs a new MIN/MAX(x, y) node in the HIR. +static HInstruction* NewIntegralMinMax(ArenaAllocator* allocator, + HInstruction* x, + HInstruction* y, + HInstruction* cursor, + bool is_min) { + DataType::Type type = DataType::Kind(x->GetType()); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); + HBinaryOperation* minmax = nullptr; + if (is_min) { + minmax = new (allocator) HMin(type, x, y, cursor->GetDexPc()); + } else { + minmax = new (allocator) HMax(type, x, y, cursor->GetDexPc()); + } + cursor->GetBlock()->InsertInstructionBefore(minmax, cursor); + return minmax; } // Returns true if operands a and b consists of widening type conversions @@ -899,6 +907,30 @@ static bool AreLowerPrecisionArgs(DataType::Type to_type, HInstruction* a, HInst to_type == DataType::Type::kInt64); } +// Returns an acceptable substitution for "a" on the select +// construct "a <cmp> b ? c : .." during MIN/MAX recognition. +static HInstruction* AllowInMinMax(IfCondition cmp, + HInstruction* a, + HInstruction* b, + HInstruction* c) { + int64_t value = 0; + if (IsInt64AndGet(b, /*out*/ &value) && + (((cmp == kCondLT || cmp == kCondLE) && c->IsMax()) || + ((cmp == kCondGT || cmp == kCondGE) && c->IsMin()))) { + HConstant* other = c->AsBinaryOperation()->GetConstantRight(); + if (other != nullptr && a == c->AsBinaryOperation()->GetLeastConstantLeft()) { + int64_t other_value = Int64FromConstant(other); + bool is_max = (cmp == kCondLT || cmp == kCondLE); + // Allow the max for a < 100 ? max(a, -100) : .. + // or the min for a > -100 ? min(a, 100) : .. + if (is_max ? (value >= other_value) : (value <= other_value)) { + return c; + } + } + } + return nullptr; +} + void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { HInstruction* replace_with = nullptr; HInstruction* condition = select->GetCondition(); @@ -942,23 +974,35 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { DataType::Type t_type = true_value->GetType(); DataType::Type f_type = false_value->GetType(); // Here we have a <cmp> b ? true_value : false_value. - // Test if both values are same-typed int or long. - if (t_type == f_type && - (t_type == DataType::Type::kInt32 || t_type == DataType::Type::kInt64)) { - // Try to replace typical integral ABS constructs. - if (true_value->IsNeg()) { - HInstruction* negated = true_value->InputAt(0); - if ((cmp == kCondLT || cmp == kCondLE) && - (a == negated && a == false_value && IsInt64Value(b, 0))) { - // Found a < 0 ? -a : a which can be replaced by ABS(a). - replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), false_value, select); - } - } else if (false_value->IsNeg()) { - HInstruction* negated = false_value->InputAt(0); - if ((cmp == kCondGT || cmp == kCondGE) && - (a == true_value && a == negated && IsInt64Value(b, 0))) { - // Found a > 0 ? a : -a which can be replaced by ABS(a). - replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select); + // Test if both values are compatible integral types (resulting MIN/MAX/ABS + // type will be int or long, like the condition). Replacements are general, + // but assume conditions prefer constants on the right. + if (DataType::IsIntegralType(t_type) && DataType::Kind(t_type) == DataType::Kind(f_type)) { + // Allow a < 100 ? max(a, -100) : .. + // or a > -100 ? min(a, 100) : .. + // to use min/max instead of a to detect nested min/max expressions. + HInstruction* new_a = AllowInMinMax(cmp, a, b, true_value); + if (new_a != nullptr) { + a = new_a; + } + // Try to replace typical integral MIN/MAX/ABS constructs. + if ((cmp == kCondLT || cmp == kCondLE || cmp == kCondGT || cmp == kCondGE) && + ((a == true_value && b == false_value) || + (b == true_value && a == false_value))) { + // Found a < b ? a : b (MIN) or a < b ? b : a (MAX) + // or a > b ? a : b (MAX) or a > b ? b : a (MIN). + bool is_min = (cmp == kCondLT || cmp == kCondLE) == (a == true_value); + replace_with = NewIntegralMinMax(GetGraph()->GetAllocator(), a, b, select, is_min); + } else if (((cmp == kCondLT || cmp == kCondLE) && true_value->IsNeg()) || + ((cmp == kCondGT || cmp == kCondGE) && false_value->IsNeg())) { + bool negLeft = (cmp == kCondLT || cmp == kCondLE); + HInstruction* the_negated = negLeft ? true_value->InputAt(0) : false_value->InputAt(0); + HInstruction* not_negated = negLeft ? false_value : true_value; + if (a == the_negated && a == not_negated && IsInt64Value(b, 0)) { + // Found a < 0 ? -a : a + // or a > 0 ? a : -a + // which can be replaced by ABS(a). + replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), a, select); } } else if (true_value->IsSub() && false_value->IsSub()) { HInstruction* true_sub1 = true_value->InputAt(0); @@ -970,8 +1014,8 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { ((cmp == kCondLT || cmp == kCondLE) && (a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) && AreLowerPrecisionArgs(t_type, a, b)) { - // Found a > b ? a - b : b - a or - // a < b ? b - a : a - b + // Found a > b ? a - b : b - a + // or a < b ? b - a : a - b // which can be replaced by ABS(a - b) for lower precision operands a, b. replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select); } @@ -1137,8 +1181,7 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct HInstruction* input = instruction->GetInput(); DataType::Type input_type = input->GetType(); DataType::Type result_type = instruction->GetResultType(); - if (DataType::IsTypeConversionImplicit(input_type, result_type)) { - // Remove the implicit conversion; this includes conversion to the same type. + if (instruction->IsImplicitConversion()) { instruction->ReplaceWith(input); instruction->GetBlock()->RemoveInstruction(instruction); RecordSimplification(); @@ -1230,6 +1273,17 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct } } +void InstructionSimplifierVisitor::VisitAbs(HAbs* instruction) { + HInstruction* input = instruction->GetInput(); + if (DataType::IsZeroExtension(input->GetType(), instruction->GetResultType())) { + // Zero extension from narrow to wide can never set sign bit in the wider + // operand, making the subsequent Abs redundant (e.g., abs(b & 0xff) for byte b). + instruction->ReplaceWith(input); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + } +} + void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { HConstant* input_cst = instruction->GetConstantRight(); HInstruction* input_other = instruction->GetLeastConstantLeft(); @@ -1262,7 +1316,7 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { } HNeg* neg = left_is_neg ? left->AsNeg() : right->AsNeg(); - if ((left_is_neg ^ right_is_neg) && neg->HasOnlyOneNonEnvironmentUse()) { + if (left_is_neg != right_is_neg && neg->HasOnlyOneNonEnvironmentUse()) { // Replace code looking like // NEG tmp, b // ADD dst, a, tmp @@ -1507,8 +1561,7 @@ static bool RecognizeAndSimplifyClassCheck(HCondition* condition) { { ScopedObjectAccess soa(Thread::Current()); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0); + ArtField* field = GetClassRoot<mirror::Object>()->GetInstanceField(0); DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); if (field_get->GetFieldInfo().GetField() != field) { return false; @@ -2092,22 +2145,6 @@ void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) { ReferenceTypeInfo argument_rti = argument->GetReferenceTypeInfo(); if (argument_rti.IsValid() && argument_rti.IsStringClass()) { optimizations.SetArgumentIsString(); - } else if (kUseReadBarrier) { - DCHECK(instruction->GetResolvedMethod() != nullptr); - DCHECK(instruction->GetResolvedMethod()->GetDeclaringClass()->IsStringClass() || - // Object.equals() can be devirtualized to String.equals(). - instruction->GetResolvedMethod()->GetDeclaringClass()->IsObjectClass()); - Runtime* runtime = Runtime::Current(); - // For AOT, we always assume that the boot image shall contain the String.class and - // we do not need a read barrier for boot image classes as they are non-moveable. - // For JIT, check if we actually have a boot image; if we do, the String.class - // should also be non-moveable. - if (runtime->IsAotCompiler() || runtime->GetHeap()->HasBootImageSpace()) { - DCHECK(runtime->IsAotCompiler() || - !runtime->GetHeap()->IsMovableObject( - instruction->GetResolvedMethod()->GetDeclaringClass())); - optimizations.SetNoReadBarrierForStringClass(); - } } } } @@ -2214,7 +2251,7 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); PointerSize image_size = class_linker->GetImagePointerSize(); HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect(); - mirror::Class* system = invoke->GetResolvedMethod()->GetDeclaringClass(); + ObjPtr<mirror::Class> system = invoke->GetResolvedMethod()->GetDeclaringClass(); ArtMethod* method = nullptr; switch (source_component_type) { case DataType::Type::kBool: @@ -2252,7 +2289,7 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) // the invoke, as we would need to look it up in the current dex file, and it // is unlikely that it exists. The most usual situation for such typed // arraycopy methods is a direct pointer to the boot image. - HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_, compiler_driver_); + invoke->SetDispatchInfo(HSharpening::SharpenInvokeStaticOrDirect(method, codegen_)); } } } @@ -2324,17 +2361,17 @@ void InstructionSimplifierVisitor::SimplifyStringCharAt(HInvoke* invoke) { ArenaAllocator* allocator = GetGraph()->GetAllocator(); // We treat String as an array to allow DCE and BCE to seamlessly work on strings, // so create the HArrayLength, HBoundsCheck and HArrayGet. - HArrayLength* length = new (allocator) HArrayLength(str, dex_pc, /* is_string_length */ true); + HArrayLength* length = new (allocator) HArrayLength(str, dex_pc, /* is_string_length= */ true); invoke->GetBlock()->InsertInstructionBefore(length, invoke); HBoundsCheck* bounds_check = new (allocator) HBoundsCheck( - index, length, dex_pc, /* is_string_char_at */ true); + index, length, dex_pc, /* is_string_char_at= */ true); invoke->GetBlock()->InsertInstructionBefore(bounds_check, invoke); HArrayGet* array_get = new (allocator) HArrayGet(str, bounds_check, DataType::Type::kUint16, SideEffects::None(), // Strings are immutable. dex_pc, - /* is_string_char_at */ true); + /* is_string_char_at= */ true); invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, array_get); bounds_check->CopyEnvironmentFrom(invoke->GetEnvironment()); GetGraph()->SetHasBoundsChecks(true); @@ -2346,7 +2383,7 @@ void InstructionSimplifierVisitor::SimplifyStringIsEmptyOrLength(HInvoke* invoke // We treat String as an array to allow DCE and BCE to seamlessly work on strings, // so create the HArrayLength. HArrayLength* length = - new (GetGraph()->GetAllocator()) HArrayLength(str, dex_pc, /* is_string_length */ true); + new (GetGraph()->GetAllocator()) HArrayLength(str, dex_pc, /* is_string_length= */ true); HInstruction* replacement; if (invoke->GetIntrinsic() == Intrinsics::kStringIsEmpty) { // For String.isEmpty(), create the `HEqual` representing the `length == 0`. @@ -2361,6 +2398,43 @@ void InstructionSimplifierVisitor::SimplifyStringIsEmptyOrLength(HInvoke* invoke invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, replacement); } +void InstructionSimplifierVisitor::SimplifyStringIndexOf(HInvoke* invoke) { + DCHECK(invoke->GetIntrinsic() == Intrinsics::kStringIndexOf || + invoke->GetIntrinsic() == Intrinsics::kStringIndexOfAfter); + if (invoke->InputAt(0)->IsLoadString()) { + HLoadString* load_string = invoke->InputAt(0)->AsLoadString(); + const DexFile& dex_file = load_string->GetDexFile(); + uint32_t utf16_length; + const char* data = + dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), &utf16_length); + if (utf16_length == 0) { + invoke->ReplaceWith(GetGraph()->GetIntConstant(-1)); + invoke->GetBlock()->RemoveInstruction(invoke); + RecordSimplification(); + return; + } + if (utf16_length == 1 && invoke->GetIntrinsic() == Intrinsics::kStringIndexOf) { + // Simplify to HSelect(HEquals(., load_string.charAt(0)), 0, -1). + // If the sought character is supplementary, this gives the correct result, i.e. -1. + uint32_t c = GetUtf16FromUtf8(&data); + DCHECK_EQ(GetTrailingUtf16Char(c), 0u); + DCHECK_EQ(GetLeadingUtf16Char(c), c); + uint32_t dex_pc = invoke->GetDexPc(); + ArenaAllocator* allocator = GetGraph()->GetAllocator(); + HEqual* equal = + new (allocator) HEqual(invoke->InputAt(1), GetGraph()->GetIntConstant(c), dex_pc); + invoke->GetBlock()->InsertInstructionBefore(equal, invoke); + HSelect* result = new (allocator) HSelect(equal, + GetGraph()->GetIntConstant(0), + GetGraph()->GetIntConstant(-1), + dex_pc); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, result); + RecordSimplification(); + return; + } + } +} + // This method should only be used on intrinsics whose sole way of throwing an // exception is raising a NPE when the nth argument is null. If that argument // is provably non-null, we can clear the flag. @@ -2430,6 +2504,27 @@ void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke, invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, mem_barrier); } +void InstructionSimplifierVisitor::SimplifyMin(HInvoke* invoke, DataType::Type type) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + HMin* min = new (GetGraph()->GetAllocator()) + HMin(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc()); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, min); +} + +void InstructionSimplifierVisitor::SimplifyMax(HInvoke* invoke, DataType::Type type) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + HMax* max = new (GetGraph()->GetAllocator()) + HMax(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc()); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, max); +} + +void InstructionSimplifierVisitor::SimplifyAbs(HInvoke* invoke, DataType::Type type) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + HAbs* abs = new (GetGraph()->GetAllocator()) + HAbs(type, invoke->InputAt(0), invoke->GetDexPc()); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, abs); +} + void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { switch (instruction->GetIntrinsic()) { case Intrinsics::kStringEquals: @@ -2439,28 +2534,28 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { SimplifySystemArrayCopy(instruction); break; case Intrinsics::kIntegerRotateRight: - SimplifyRotate(instruction, /* is_left */ false, DataType::Type::kInt32); + SimplifyRotate(instruction, /* is_left= */ false, DataType::Type::kInt32); break; case Intrinsics::kLongRotateRight: - SimplifyRotate(instruction, /* is_left */ false, DataType::Type::kInt64); + SimplifyRotate(instruction, /* is_left= */ false, DataType::Type::kInt64); break; case Intrinsics::kIntegerRotateLeft: - SimplifyRotate(instruction, /* is_left */ true, DataType::Type::kInt32); + SimplifyRotate(instruction, /* is_left= */ true, DataType::Type::kInt32); break; case Intrinsics::kLongRotateLeft: - SimplifyRotate(instruction, /* is_left */ true, DataType::Type::kInt64); + SimplifyRotate(instruction, /* is_left= */ true, DataType::Type::kInt64); break; case Intrinsics::kIntegerCompare: - SimplifyCompare(instruction, /* is_signum */ false, DataType::Type::kInt32); + SimplifyCompare(instruction, /* is_signum= */ false, DataType::Type::kInt32); break; case Intrinsics::kLongCompare: - SimplifyCompare(instruction, /* is_signum */ false, DataType::Type::kInt64); + SimplifyCompare(instruction, /* is_signum= */ false, DataType::Type::kInt64); break; case Intrinsics::kIntegerSignum: - SimplifyCompare(instruction, /* is_signum */ true, DataType::Type::kInt32); + SimplifyCompare(instruction, /* is_signum= */ true, DataType::Type::kInt32); break; case Intrinsics::kLongSignum: - SimplifyCompare(instruction, /* is_signum */ true, DataType::Type::kInt64); + SimplifyCompare(instruction, /* is_signum= */ true, DataType::Type::kInt64); break; case Intrinsics::kFloatIsNaN: case Intrinsics::kDoubleIsNaN: @@ -2477,6 +2572,10 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { case Intrinsics::kStringLength: SimplifyStringIsEmptyOrLength(instruction); break; + case Intrinsics::kStringIndexOf: + case Intrinsics::kStringIndexOfAfter: + SimplifyStringIndexOf(instruction); + break; case Intrinsics::kStringStringIndexOf: case Intrinsics::kStringStringIndexOfAfter: SimplifyNPEOnArgN(instruction, 1); // 0th has own NullCheck @@ -2513,6 +2612,42 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { case Intrinsics::kVarHandleStoreStoreFence: SimplifyMemBarrier(instruction, MemBarrierKind::kStoreStore); break; + case Intrinsics::kMathMinIntInt: + SimplifyMin(instruction, DataType::Type::kInt32); + break; + case Intrinsics::kMathMinLongLong: + SimplifyMin(instruction, DataType::Type::kInt64); + break; + case Intrinsics::kMathMinFloatFloat: + SimplifyMin(instruction, DataType::Type::kFloat32); + break; + case Intrinsics::kMathMinDoubleDouble: + SimplifyMin(instruction, DataType::Type::kFloat64); + break; + case Intrinsics::kMathMaxIntInt: + SimplifyMax(instruction, DataType::Type::kInt32); + break; + case Intrinsics::kMathMaxLongLong: + SimplifyMax(instruction, DataType::Type::kInt64); + break; + case Intrinsics::kMathMaxFloatFloat: + SimplifyMax(instruction, DataType::Type::kFloat32); + break; + case Intrinsics::kMathMaxDoubleDouble: + SimplifyMax(instruction, DataType::Type::kFloat64); + break; + case Intrinsics::kMathAbsInt: + SimplifyAbs(instruction, DataType::Type::kInt32); + break; + case Intrinsics::kMathAbsLong: + SimplifyAbs(instruction, DataType::Type::kInt64); + break; + case Intrinsics::kMathAbsFloat: + SimplifyAbs(instruction, DataType::Type::kFloat32); + break; + case Intrinsics::kMathAbsDouble: + SimplifyAbs(instruction, DataType::Type::kFloat64); + break; default: break; } @@ -2553,10 +2688,10 @@ bool InstructionSimplifierVisitor::TryHandleAssociativeAndCommutativeOperation( HConstant* const2; HBinaryOperation* y; - if (instruction->InstructionTypeEquals(left) && right->IsConstant()) { + if (instruction->GetKind() == left->GetKind() && right->IsConstant()) { const2 = right->AsConstant(); y = left->AsBinaryOperation(); - } else if (left->IsConstant() && instruction->InstructionTypeEquals(right)) { + } else if (left->IsConstant() && instruction->GetKind() == right->GetKind()) { const2 = left->AsConstant(); y = right->AsBinaryOperation(); } else { diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index 5e2045580b..982a24a6f0 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -24,7 +24,6 @@ namespace art { class CodeGenerator; -class CompilerDriver; /** * Implements optimizations specific to each instruction. @@ -40,20 +39,17 @@ class InstructionSimplifier : public HOptimization { public: InstructionSimplifier(HGraph* graph, CodeGenerator* codegen, - CompilerDriver* compiler_driver, OptimizingCompilerStats* stats = nullptr, const char* name = kInstructionSimplifierPassName) : HOptimization(graph, name, stats), - codegen_(codegen), - compiler_driver_(compiler_driver) {} + codegen_(codegen) {} static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier"; - void Run() OVERRIDE; + bool Run() override; private: CodeGenerator* codegen_; - CompilerDriver* compiler_driver_; DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier); }; diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc index 92081e30b1..01e9cff6d8 100644 --- a/compiler/optimizing/instruction_simplifier_arm.cc +++ b/compiler/optimizing/instruction_simplifier_arm.cc @@ -43,11 +43,11 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor { bool TryMergeIntoUsersShifterOperand(HInstruction* instruction); bool TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, bool do_merge); bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { - return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false); + return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge= */ false); } bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { DCHECK(CanMergeIntoShifterOperand(use, bitfield_op)); - return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true); + return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge= */ true); } /** @@ -56,7 +56,7 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor { * (2) Since statements can be removed in a "forward" fashion, * the visitor should test if each statement is still there. */ - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + void VisitBasicBlock(HBasicBlock* block) override { // TODO: fragile iteration, provide more robust iterators? for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); @@ -66,15 +66,15 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor { } } - void VisitAnd(HAnd* instruction) OVERRIDE; - void VisitArrayGet(HArrayGet* instruction) OVERRIDE; - void VisitArraySet(HArraySet* instruction) OVERRIDE; - void VisitMul(HMul* instruction) OVERRIDE; - void VisitOr(HOr* instruction) OVERRIDE; - void VisitShl(HShl* instruction) OVERRIDE; - void VisitShr(HShr* instruction) OVERRIDE; - void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; - void VisitUShr(HUShr* instruction) OVERRIDE; + void VisitAnd(HAnd* instruction) override; + void VisitArrayGet(HArrayGet* instruction) override; + void VisitArraySet(HArraySet* instruction) override; + void VisitMul(HMul* instruction) override; + void VisitOr(HOr* instruction) override; + void VisitShl(HShl* instruction) override; + void VisitShr(HShr* instruction) override; + void VisitTypeConversion(HTypeConversion* instruction) override; + void VisitUShr(HUShr* instruction) override; OptimizingCompilerStats* stats_; }; @@ -202,6 +202,11 @@ void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) { return; } + // TODO: Support intermediate address for object arrays on arm. + if (type == DataType::Type::kReference) { + return; + } + if (type == DataType::Type::kInt64 || type == DataType::Type::kFloat32 || type == DataType::Type::kFloat64) { @@ -283,9 +288,10 @@ void InstructionSimplifierArmVisitor::VisitUShr(HUShr* instruction) { } } -void InstructionSimplifierArm::Run() { +bool InstructionSimplifierArm::Run() { InstructionSimplifierArmVisitor visitor(graph_, stats_); visitor.VisitReversePostOrder(); + return true; } } // namespace arm diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h index 2f6572931f..fca9341d59 100644 --- a/compiler/optimizing/instruction_simplifier_arm.h +++ b/compiler/optimizing/instruction_simplifier_arm.h @@ -30,7 +30,7 @@ class InstructionSimplifierArm : public HOptimization { static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm"; - void Run() OVERRIDE; + bool Run() override; }; } // namespace arm diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 1c44e5ac49..e23decbd71 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -45,11 +45,11 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { HInstruction* bitfield_op, bool do_merge); bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { - return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false); + return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge= */ false); } bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { DCHECK(CanMergeIntoShifterOperand(use, bitfield_op)); - return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true); + return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge= */ true); } /** @@ -58,7 +58,7 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { * (2) Since statements can be removed in a "forward" fashion, * the visitor should test if each statement is still there. */ - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + void VisitBasicBlock(HBasicBlock* block) override { // TODO: fragile iteration, provide more robust iterators? for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); @@ -69,18 +69,18 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { } // HInstruction visitors, sorted alphabetically. - void VisitAnd(HAnd* instruction) OVERRIDE; - void VisitArrayGet(HArrayGet* instruction) OVERRIDE; - void VisitArraySet(HArraySet* instruction) OVERRIDE; - void VisitMul(HMul* instruction) OVERRIDE; - void VisitOr(HOr* instruction) OVERRIDE; - void VisitShl(HShl* instruction) OVERRIDE; - void VisitShr(HShr* instruction) OVERRIDE; - void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; - void VisitUShr(HUShr* instruction) OVERRIDE; - void VisitXor(HXor* instruction) OVERRIDE; - void VisitVecLoad(HVecLoad* instruction) OVERRIDE; - void VisitVecStore(HVecStore* instruction) OVERRIDE; + void VisitAnd(HAnd* instruction) override; + void VisitArrayGet(HArrayGet* instruction) override; + void VisitArraySet(HArraySet* instruction) override; + void VisitMul(HMul* instruction) override; + void VisitOr(HOr* instruction) override; + void VisitShl(HShl* instruction) override; + void VisitShr(HShr* instruction) override; + void VisitTypeConversion(HTypeConversion* instruction) override; + void VisitUShr(HUShr* instruction) override; + void VisitXor(HXor* instruction) override; + void VisitVecLoad(HVecLoad* instruction) override; + void VisitVecStore(HVecStore* instruction) override; OptimizingCompilerStats* stats_; }; @@ -278,9 +278,10 @@ void InstructionSimplifierArm64Visitor::VisitVecStore(HVecStore* instruction) { } } -void InstructionSimplifierArm64::Run() { +bool InstructionSimplifierArm64::Run() { InstructionSimplifierArm64Visitor visitor(graph_, stats_); visitor.VisitReversePostOrder(); + return true; } } // namespace arm64 diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index d180a8dc46..8d93c01ebf 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -30,7 +30,7 @@ class InstructionSimplifierArm64 : public HOptimization { static constexpr const char* kInstructionSimplifierArm64PassName = "instruction_simplifier_arm64"; - void Run() OVERRIDE; + bool Run() override; }; } // namespace arm64 diff --git a/compiler/optimizing/instruction_simplifier_mips.cc b/compiler/optimizing/instruction_simplifier_mips.cc index fa97401a0c..5d0c63b76b 100644 --- a/compiler/optimizing/instruction_simplifier_mips.cc +++ b/compiler/optimizing/instruction_simplifier_mips.cc @@ -39,8 +39,8 @@ class InstructionSimplifierMipsVisitor : public HGraphVisitor { bool TryExtractArrayAccessIndex(HInstruction* access, HInstruction* index, DataType::Type packed_type); - void VisitArrayGet(HArrayGet* instruction) OVERRIDE; - void VisitArraySet(HArraySet* instruction) OVERRIDE; + void VisitArrayGet(HArrayGet* instruction) override; + void VisitArraySet(HArraySet* instruction) override; OptimizingCompilerStats* stats_; CodeGeneratorMIPS* codegen_; @@ -131,9 +131,10 @@ void InstructionSimplifierMipsVisitor::VisitArraySet(HArraySet* instruction) { } } -void InstructionSimplifierMips::Run() { +bool InstructionSimplifierMips::Run() { InstructionSimplifierMipsVisitor visitor(graph_, codegen_, stats_); visitor.VisitReversePostOrder(); + return true; } } // namespace mips diff --git a/compiler/optimizing/instruction_simplifier_mips.h b/compiler/optimizing/instruction_simplifier_mips.h index 6cb8affe85..b431334811 100644 --- a/compiler/optimizing/instruction_simplifier_mips.h +++ b/compiler/optimizing/instruction_simplifier_mips.h @@ -35,7 +35,7 @@ class InstructionSimplifierMips : public HOptimization { static constexpr const char* kInstructionSimplifierMipsPassName = "instruction_simplifier_mips"; - void Run() OVERRIDE; + bool Run() override; private: CodeGeneratorMIPS* codegen_; diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index ccdcb3532d..0f30f662cd 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -245,11 +245,11 @@ bool TryExtractArrayAccessAddress(HInstruction* access, return false; } if (kEmitCompilerReadBarrier && + !kUseBakerReadBarrier && access->IsArrayGet() && access->GetType() == DataType::Type::kReference) { - // For object arrays, the read barrier instrumentation requires + // For object arrays, the non-Baker read barrier instrumentation requires // the original array pointer. - // TODO: This can be relaxed for Baker CC. return false; } diff --git a/compiler/optimizing/instruction_simplifier_x86.cc b/compiler/optimizing/instruction_simplifier_x86.cc new file mode 100644 index 0000000000..2d8f94a85b --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_x86.cc @@ -0,0 +1,88 @@ +/* Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "instruction_simplifier_x86.h" +#include "instruction_simplifier_x86_shared.h" +#include "code_generator_x86.h" + +namespace art { + +namespace x86 { + +class InstructionSimplifierX86Visitor : public HGraphVisitor { + public: + InstructionSimplifierX86Visitor(HGraph* graph, + CodeGenerator* codegen, + OptimizingCompilerStats* stats) + : HGraphVisitor(graph), + codegen_(down_cast<CodeGeneratorX86*>(codegen)), + stats_(stats) {} + + void RecordSimplification() { + MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch); + } + + bool HasAVX2() { + return (codegen_->GetInstructionSetFeatures().HasAVX2()); + } + + void VisitBasicBlock(HBasicBlock* block) override { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (instruction->IsInBlock()) { + instruction->Accept(this); + } + } + } + + void VisitAnd(HAnd * instruction) override; + void VisitXor(HXor* instruction) override; + + private: + CodeGeneratorX86* codegen_; + OptimizingCompilerStats* stats_; +}; + + +void InstructionSimplifierX86Visitor::VisitAnd(HAnd* instruction) { + if (TryCombineAndNot(instruction)) { + RecordSimplification(); + } else if (instruction->GetResultType() == DataType::Type::kInt32) { + if (TryGenerateResetLeastSetBit(instruction)) { + RecordSimplification(); + } + } +} + +void InstructionSimplifierX86Visitor::VisitXor(HXor* instruction) { + if (instruction->GetResultType() == DataType::Type::kInt32) { + if (TryGenerateMaskUptoLeastSetBit(instruction)) { + RecordSimplification(); + } + } +} + +bool InstructionSimplifierX86::Run() { + InstructionSimplifierX86Visitor visitor(graph_, codegen_, stats_); + if (visitor.HasAVX2()) { + visitor.VisitReversePostOrder(); + return true; + } + return false; +} + +} // namespace x86 +} // namespace art + diff --git a/compiler/optimizing/instruction_simplifier_x86.h b/compiler/optimizing/instruction_simplifier_x86.h new file mode 100644 index 0000000000..6f10006db2 --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_x86.h @@ -0,0 +1,44 @@ +/*Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ +#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { + +class CodeGenerator; +namespace x86 { + +class InstructionSimplifierX86 : public HOptimization { + public: + InstructionSimplifierX86(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) + : HOptimization(graph, kInstructionSimplifierX86PassName, stats), + codegen_(codegen) {} + + static constexpr const char* kInstructionSimplifierX86PassName = "instruction_simplifier_x86"; + + bool Run() override; + + private: + CodeGenerator* codegen_; +}; + +} // namespace x86 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ diff --git a/compiler/optimizing/instruction_simplifier_x86_64.cc b/compiler/optimizing/instruction_simplifier_x86_64.cc new file mode 100644 index 0000000000..56c6b414d7 --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_x86_64.cc @@ -0,0 +1,82 @@ +/* Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "instruction_simplifier_x86_64.h" +#include "instruction_simplifier_x86_shared.h" +#include "code_generator_x86_64.h" + +namespace art { + +namespace x86_64 { + +class InstructionSimplifierX86_64Visitor : public HGraphVisitor { + public: + InstructionSimplifierX86_64Visitor(HGraph* graph, + CodeGenerator* codegen, + OptimizingCompilerStats* stats) + : HGraphVisitor(graph), + codegen_(down_cast<CodeGeneratorX86_64*>(codegen)), + stats_(stats) {} + + void RecordSimplification() { + MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch); + } + + bool HasAVX2() { + return codegen_->GetInstructionSetFeatures().HasAVX2(); + } + + void VisitBasicBlock(HBasicBlock* block) override { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (instruction->IsInBlock()) { + instruction->Accept(this); + } + } + } + + void VisitAnd(HAnd* instruction) override; + void VisitXor(HXor* instruction) override; + + private: + CodeGeneratorX86_64* codegen_; + OptimizingCompilerStats* stats_; +}; + +void InstructionSimplifierX86_64Visitor::VisitAnd(HAnd* instruction) { + if (TryCombineAndNot(instruction)) { + RecordSimplification(); + } else if (TryGenerateResetLeastSetBit(instruction)) { + RecordSimplification(); + } +} + + +void InstructionSimplifierX86_64Visitor::VisitXor(HXor* instruction) { + if (TryGenerateMaskUptoLeastSetBit(instruction)) { + RecordSimplification(); + } +} + +bool InstructionSimplifierX86_64::Run() { + InstructionSimplifierX86_64Visitor visitor(graph_, codegen_, stats_); + if (visitor.HasAVX2()) { + visitor.VisitReversePostOrder(); + return true; + } + return false; +} +} // namespace x86_64 +} // namespace art diff --git a/compiler/optimizing/instruction_simplifier_x86_64.h b/compiler/optimizing/instruction_simplifier_x86_64.h new file mode 100644 index 0000000000..6cae24d11a --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_x86_64.h @@ -0,0 +1,48 @@ +/* Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_ +#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { + +class CodeGenerator; + +namespace x86_64 { + +class InstructionSimplifierX86_64 : public HOptimization { + public: + InstructionSimplifierX86_64(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) + : HOptimization(graph, kInstructionSimplifierX86_64PassName, stats), + codegen_(codegen) {} + + static constexpr const char* kInstructionSimplifierX86_64PassName = + "instruction_simplifier_x86_64"; + + bool Run() override; + + private: + CodeGenerator* codegen_; +}; + +} // namespace x86_64 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_ + + diff --git a/compiler/optimizing/instruction_simplifier_x86_shared.cc b/compiler/optimizing/instruction_simplifier_x86_shared.cc new file mode 100644 index 0000000000..2805abb2bb --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_x86_shared.cc @@ -0,0 +1,137 @@ +/* Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "instruction_simplifier_x86_shared.h" +#include "nodes_x86.h" + +namespace art { + +bool TryCombineAndNot(HAnd* instruction) { + DataType::Type type = instruction->GetType(); + if (!DataType::IsIntOrLongType(type)) { + return false; + } + // Replace code looking like + // Not tmp, y + // And dst, x, tmp + // with + // AndNot dst, x, y + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + // Perform simplication only when either left or right + // is Not. When both are Not, instruction should be simplified with + // DeMorgan's Laws. + if (left->IsNot() ^ right->IsNot()) { + bool left_is_not = left->IsNot(); + HInstruction* other_ins = (left_is_not ? right : left); + HNot* not_ins = (left_is_not ? left : right)->AsNot(); + // Only do the simplification if instruction has only one use + // and thus can be safely removed. + if (not_ins->HasOnlyOneNonEnvironmentUse()) { + ArenaAllocator* arena = instruction->GetBlock()->GetGraph()->GetAllocator(); + HX86AndNot* and_not = new (arena) HX86AndNot(type, + not_ins->GetInput(), + other_ins, + instruction->GetDexPc()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, and_not); + DCHECK(!not_ins->HasUses()); + not_ins->GetBlock()->RemoveInstruction(not_ins); + return true; + } + } + return false; +} + +bool TryGenerateResetLeastSetBit(HAnd* instruction) { + DataType::Type type = instruction->GetType(); + if (!DataType::IsIntOrLongType(type)) { + return false; + } + // Replace code looking like + // Add tmp, x, -1 or Sub tmp, x, 1 + // And dest x, tmp + // with + // MaskOrResetLeastSetBit dest, x + HInstruction* candidate = nullptr; + HInstruction* other = nullptr; + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + if (AreLeastSetBitInputs(left, right)) { + candidate = left; + other = right; + } else if (AreLeastSetBitInputs(right, left)) { + candidate = right; + other = left; + } + if (candidate != nullptr && candidate->HasOnlyOneNonEnvironmentUse()) { + ArenaAllocator* arena = instruction->GetBlock()->GetGraph()->GetAllocator(); + HX86MaskOrResetLeastSetBit* lsb = new (arena) HX86MaskOrResetLeastSetBit( + type, HInstruction::kAnd, other, instruction->GetDexPc()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, lsb); + DCHECK(!candidate->HasUses()); + candidate->GetBlock()->RemoveInstruction(candidate); + return true; + } + return false; +} + +bool TryGenerateMaskUptoLeastSetBit(HXor* instruction) { + DataType::Type type = instruction->GetType(); + if (!DataType::IsIntOrLongType(type)) { + return false; + } + // Replace code looking like + // Add tmp, x, -1 or Sub tmp, x, 1 + // Xor dest x, tmp + // with + // MaskOrResetLeastSetBit dest, x + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + HInstruction* other = nullptr; + HInstruction* candidate = nullptr; + if (AreLeastSetBitInputs(left, right)) { + candidate = left; + other = right; + } else if (AreLeastSetBitInputs(right, left)) { + candidate = right; + other = left; + } + if (candidate != nullptr && candidate->HasOnlyOneNonEnvironmentUse()) { + ArenaAllocator* arena = instruction->GetBlock()->GetGraph()->GetAllocator(); + HX86MaskOrResetLeastSetBit* lsb = new (arena) HX86MaskOrResetLeastSetBit( + type, HInstruction::kXor, other, instruction->GetDexPc()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, lsb); + DCHECK(!candidate->HasUses()); + candidate->GetBlock()->RemoveInstruction(candidate); + return true; + } + return false; +} + +bool AreLeastSetBitInputs(HInstruction* to_test, HInstruction* other) { + if (to_test->IsAdd()) { + HAdd* add = to_test->AsAdd(); + HConstant* cst = add->GetConstantRight(); + return cst != nullptr && cst->IsMinusOne() && other == add->GetLeastConstantLeft(); + } + if (to_test->IsSub()) { + HSub* sub = to_test->AsSub(); + HConstant* cst = sub->GetConstantRight(); + return cst != nullptr && cst->IsOne() && other == sub->GetLeastConstantLeft(); + } + return false; +} + +} // namespace art diff --git a/compiler/optimizing/instruction_simplifier_x86_shared.h b/compiler/optimizing/instruction_simplifier_x86_shared.h new file mode 100644 index 0000000000..7f94d7ea4c --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_x86_shared.h @@ -0,0 +1,29 @@ +/* Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_ +#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_ + +#include "nodes.h" + +namespace art { +bool TryCombineAndNot(HAnd* instruction); +bool TryGenerateResetLeastSetBit(HAnd* instruction); +bool TryGenerateMaskUptoLeastSetBit(HXor* instruction); +bool AreLeastSetBitInputs(HInstruction* to_test, HInstruction* other); +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_ + diff --git a/compiler/optimizing/intrinsic_objects.cc b/compiler/optimizing/intrinsic_objects.cc new file mode 100644 index 0000000000..c345624a7a --- /dev/null +++ b/compiler/optimizing/intrinsic_objects.cc @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsic_objects.h" + +#include "art_field-inl.h" +#include "base/logging.h" +#include "class_root.h" +#include "handle.h" +#include "obj_ptr-inl.h" +#include "mirror/object_array-alloc-inl.h" +#include "mirror/object_array-inl.h" + +namespace art { + +static ObjPtr<mirror::ObjectArray<mirror::Object>> LookupIntegerCache(Thread* self, + ClassLinker* class_linker) + REQUIRES_SHARED(Locks::mutator_lock_) { + ObjPtr<mirror::Class> integer_cache_class = class_linker->LookupClass( + self, "Ljava/lang/Integer$IntegerCache;", /* class_loader= */ nullptr); + if (integer_cache_class == nullptr || !integer_cache_class->IsInitialized()) { + return nullptr; + } + ArtField* cache_field = + integer_cache_class->FindDeclaredStaticField("cache", "[Ljava/lang/Integer;"); + CHECK(cache_field != nullptr); + ObjPtr<mirror::ObjectArray<mirror::Object>> integer_cache = + ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast( + cache_field->GetObject(integer_cache_class)); + CHECK(integer_cache != nullptr); + return integer_cache; +} + +ObjPtr<mirror::ObjectArray<mirror::Object>> IntrinsicObjects::AllocateBootImageLiveObjects( + Thread* self, + ClassLinker* class_linker) REQUIRES_SHARED(Locks::mutator_lock_) { + // The objects used for the Integer.valueOf() intrinsic must remain live even if references + // to them are removed using reflection. Image roots are not accessible through reflection, + // so the array we construct here shall keep them alive. + StackHandleScope<1> hs(self); + Handle<mirror::ObjectArray<mirror::Object>> integer_cache = + hs.NewHandle(LookupIntegerCache(self, class_linker)); + size_t live_objects_size = + (integer_cache != nullptr) ? (/* cache */ 1u + integer_cache->GetLength()) : 0u; + ObjPtr<mirror::ObjectArray<mirror::Object>> live_objects = + mirror::ObjectArray<mirror::Object>::Alloc( + self, GetClassRoot<mirror::ObjectArray<mirror::Object>>(class_linker), live_objects_size); + int32_t index = 0; + if (integer_cache != nullptr) { + live_objects->Set(index++, integer_cache.Get()); + for (int32_t i = 0, length = integer_cache->GetLength(); i != length; ++i) { + live_objects->Set(index++, integer_cache->Get(i)); + } + } + CHECK_EQ(index, live_objects->GetLength()); + + if (kIsDebugBuild && integer_cache != nullptr) { + CHECK_EQ(integer_cache.Get(), GetIntegerValueOfCache(live_objects)); + for (int32_t i = 0, len = integer_cache->GetLength(); i != len; ++i) { + CHECK_EQ(integer_cache->GetWithoutChecks(i), GetIntegerValueOfObject(live_objects, i)); + } + } + return live_objects; +} + +ObjPtr<mirror::ObjectArray<mirror::Object>> IntrinsicObjects::GetIntegerValueOfCache( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) { + DCHECK(boot_image_live_objects != nullptr); + if (boot_image_live_objects->GetLength() == 0u) { + return nullptr; // No intrinsic objects. + } + // No need for read barrier for boot image object or for verifying the value that was just stored. + ObjPtr<mirror::Object> result = + boot_image_live_objects->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>(0); + DCHECK(result != nullptr); + DCHECK(result->IsObjectArray()); + DCHECK(result->GetClass()->DescriptorEquals("[Ljava/lang/Integer;")); + return ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast(result); +} + +ObjPtr<mirror::Object> IntrinsicObjects::GetIntegerValueOfObject( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, + uint32_t index) { + DCHECK(boot_image_live_objects != nullptr); + DCHECK_NE(boot_image_live_objects->GetLength(), 0); + DCHECK_LT(index, + static_cast<uint32_t>(GetIntegerValueOfCache(boot_image_live_objects)->GetLength())); + + // No need for read barrier for boot image object or for verifying the value that was just stored. + ObjPtr<mirror::Object> result = + boot_image_live_objects->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>( + /* skip the IntegerCache.cache */ 1u + index); + DCHECK(result != nullptr); + DCHECK(result->GetClass()->DescriptorEquals("Ljava/lang/Integer;")); + return result; +} + +MemberOffset IntrinsicObjects::GetIntegerValueOfArrayDataOffset( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) { + DCHECK_NE(boot_image_live_objects->GetLength(), 0); + MemberOffset result = mirror::ObjectArray<mirror::Object>::OffsetOfElement(1u); + DCHECK_EQ(GetIntegerValueOfObject(boot_image_live_objects, 0u), + (boot_image_live_objects + ->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(result))); + return result; +} + +} // namespace art diff --git a/compiler/optimizing/intrinsic_objects.h b/compiler/optimizing/intrinsic_objects.h new file mode 100644 index 0000000000..863017be38 --- /dev/null +++ b/compiler/optimizing/intrinsic_objects.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSIC_OBJECTS_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSIC_OBJECTS_H_ + +#include "base/bit_field.h" +#include "base/bit_utils.h" +#include "base/mutex.h" + +namespace art { + +class ClassLinker; +template <class MirrorType> class ObjPtr; +class MemberOffset; +class Thread; + +namespace mirror { +class Object; +template <class T> class ObjectArray; +} // namespace mirror + +class IntrinsicObjects { + public: + enum class PatchType { + kIntegerValueOfObject, + kIntegerValueOfArray, + + kLast = kIntegerValueOfArray + }; + + static uint32_t EncodePatch(PatchType patch_type, uint32_t index = 0u) { + DCHECK(patch_type == PatchType::kIntegerValueOfObject || index == 0u); + return PatchTypeField::Encode(static_cast<uint32_t>(patch_type)) | IndexField::Encode(index); + } + + static PatchType DecodePatchType(uint32_t intrinsic_data) { + return static_cast<PatchType>(PatchTypeField::Decode(intrinsic_data)); + } + + static uint32_t DecodePatchIndex(uint32_t intrinsic_data) { + return IndexField::Decode(intrinsic_data); + } + + static ObjPtr<mirror::ObjectArray<mirror::Object>> AllocateBootImageLiveObjects( + Thread* self, + ClassLinker* class_linker) REQUIRES_SHARED(Locks::mutator_lock_); + + // Functions for retrieving data for Integer.valueOf(). + static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerValueOfCache( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) + REQUIRES_SHARED(Locks::mutator_lock_); + static ObjPtr<mirror::Object> GetIntegerValueOfObject( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, + uint32_t index) REQUIRES_SHARED(Locks::mutator_lock_); + static MemberOffset GetIntegerValueOfArrayDataOffset( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) + REQUIRES_SHARED(Locks::mutator_lock_); + + private: + static constexpr size_t kPatchTypeBits = + MinimumBitsToStore(static_cast<uint32_t>(PatchType::kLast)); + static constexpr size_t kIndexBits = BitSizeOf<uint32_t>() - kPatchTypeBits; + using PatchTypeField = BitField<uint32_t, 0u, kPatchTypeBits>; + using IndexField = BitField<uint32_t, kPatchTypeBits, kIndexBits>; +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSIC_OBJECTS_H_ diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index f8dc316e45..d9401050df 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -20,309 +20,355 @@ #include "art_method-inl.h" #include "base/utils.h" #include "class_linker.h" +#include "class_root.h" #include "dex/invoke_type.h" -#include "driver/compiler_driver.h" #include "driver/compiler_options.h" -#include "mirror/dex_cache-inl.h" +#include "gc/space/image_space.h" +#include "image-inl.h" +#include "intrinsic_objects.h" #include "nodes.h" +#include "obj_ptr-inl.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" namespace art { -// Check that intrinsic enum values fit within space set aside in ArtMethod modifier flags. -#define CHECK_INTRINSICS_ENUM_VALUES(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - static_assert( \ - static_cast<uint32_t>(Intrinsics::k ## Name) <= (kAccIntrinsicBits >> CTZ(kAccIntrinsicBits)), \ - "Instrinsics enumeration space overflow."); -#include "intrinsics_list.h" - INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES) -#undef INTRINSICS_LIST -#undef CHECK_INTRINSICS_ENUM_VALUES - -// Function that returns whether an intrinsic is static/direct or virtual. -static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) { - switch (i) { +std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { + switch (intrinsic) { case Intrinsics::kNone: - return kInterface; // Non-sensical for intrinsic. + os << "None"; + break; #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ - return IsStatic; + os << # Name; \ + break; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST +#undef STATIC_INTRINSICS_LIST +#undef VIRTUAL_INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS } - return kInterface; + return os; } -// Function that returns whether an intrinsic needs an environment or not. -static inline IntrinsicNeedsEnvironmentOrCache NeedsEnvironmentOrCache(Intrinsics i) { - switch (i) { - case Intrinsics::kNone: - return kNeedsEnvironmentOrCache; // Non-sensical for intrinsic. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - case Intrinsics::k ## Name: \ - return NeedsEnvironmentOrCache; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST -#undef OPTIMIZING_INTRINSICS - } - return kNeedsEnvironmentOrCache; +static const char kIntegerCacheDescriptor[] = "Ljava/lang/Integer$IntegerCache;"; +static const char kIntegerDescriptor[] = "Ljava/lang/Integer;"; +static const char kIntegerArrayDescriptor[] = "[Ljava/lang/Integer;"; +static const char kLowFieldName[] = "low"; +static const char kHighFieldName[] = "high"; +static const char kValueFieldName[] = "value"; + +static ObjPtr<mirror::ObjectArray<mirror::Object>> GetBootImageLiveObjects() + REQUIRES_SHARED(Locks::mutator_lock_) { + gc::Heap* heap = Runtime::Current()->GetHeap(); + const std::vector<gc::space::ImageSpace*>& boot_image_spaces = heap->GetBootImageSpaces(); + DCHECK(!boot_image_spaces.empty()); + const ImageHeader& main_header = boot_image_spaces[0]->GetImageHeader(); + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects = + ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast( + main_header.GetImageRoot<kWithoutReadBarrier>(ImageHeader::kBootImageLiveObjects)); + DCHECK(boot_image_live_objects != nullptr); + DCHECK(heap->ObjectIsInBootImageSpace(boot_image_live_objects)); + return boot_image_live_objects; } -// Function that returns whether an intrinsic has side effects. -static inline IntrinsicSideEffects GetSideEffects(Intrinsics i) { - switch (i) { - case Intrinsics::kNone: - return kAllSideEffects; -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - case Intrinsics::k ## Name: \ - return SideEffects; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST -#undef OPTIMIZING_INTRINSICS - } - return kAllSideEffects; +static ObjPtr<mirror::Class> LookupInitializedClass(Thread* self, + ClassLinker* class_linker, + const char* descriptor) + REQUIRES_SHARED(Locks::mutator_lock_) { + ObjPtr<mirror::Class> klass = + class_linker->LookupClass(self, descriptor, /* class_loader= */ nullptr); + DCHECK(klass != nullptr); + DCHECK(klass->IsInitialized()); + return klass; } -// Function that returns whether an intrinsic can throw exceptions. -static inline IntrinsicExceptions GetExceptions(Intrinsics i) { - switch (i) { - case Intrinsics::kNone: - return kCanThrow; -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - case Intrinsics::k ## Name: \ - return Exceptions; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST -#undef OPTIMIZING_INTRINSICS - } - return kCanThrow; +static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerCacheArray( + ObjPtr<mirror::Class> cache_class) REQUIRES_SHARED(Locks::mutator_lock_) { + ArtField* cache_field = cache_class->FindDeclaredStaticField("cache", kIntegerArrayDescriptor); + DCHECK(cache_field != nullptr); + return ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast(cache_field->GetObject(cache_class)); } -static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) +static int32_t GetIntegerCacheField(ObjPtr<mirror::Class> cache_class, const char* field_name) REQUIRES_SHARED(Locks::mutator_lock_) { - // Whenever the intrinsic is marked as static, report an error if we find an InvokeVirtual. - // - // Whenever the intrinsic is marked as direct and we find an InvokeVirtual, a devirtualization - // failure occured. We might be in a situation where we have inlined a method that calls an - // intrinsic, but that method is in a different dex file on which we do not have a - // verified_method that would have helped the compiler driver sharpen the call. In that case, - // make sure that the intrinsic is actually for some final method (or in a final class), as - // otherwise the intrinsics setup is broken. - // - // For the last direction, we have intrinsics for virtual functions that will perform a check - // inline. If the precise type is known, however, the instruction will be sharpened to an - // InvokeStaticOrDirect. - InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic); - InvokeType invoke_type = invoke->GetInvokeType(); - - switch (intrinsic_type) { - case kStatic: - return (invoke_type == kStatic); - - case kDirect: - if (invoke_type == kDirect) { - return true; - } - if (invoke_type == kVirtual) { - ArtMethod* art_method = invoke->GetResolvedMethod(); - return (art_method->IsFinal() || art_method->GetDeclaringClass()->IsFinal()); - } - return false; - - case kVirtual: - // Call might be devirtualized. - return (invoke_type == kVirtual || invoke_type == kDirect || invoke_type == kInterface); - - case kSuper: - case kInterface: - case kPolymorphic: - return false; - } - LOG(FATAL) << "Unknown intrinsic invoke type: " << intrinsic_type; - UNREACHABLE(); + ArtField* field = cache_class->FindDeclaredStaticField(field_name, "I"); + DCHECK(field != nullptr); + return field->GetInt(cache_class); } -bool IntrinsicsRecognizer::Recognize(HInvoke* invoke, - ArtMethod* art_method, - /*out*/ bool* wrong_invoke_type) { - if (art_method == nullptr) { - art_method = invoke->GetResolvedMethod(); - } - *wrong_invoke_type = false; - if (art_method == nullptr || !art_method->IsIntrinsic()) { - return false; - } - - // TODO: b/65872996 The intent is that polymorphic signature methods should - // be compiler intrinsics. At present, they are only interpreter intrinsics. - if (art_method->IsPolymorphicSignature()) { - return false; +static bool CheckIntegerCache(Thread* self, + ClassLinker* class_linker, + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_cache) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(boot_image_cache != nullptr); + + // Since we have a cache in the boot image, both java.lang.Integer and + // java.lang.Integer$IntegerCache must be initialized in the boot image. + ObjPtr<mirror::Class> cache_class = + LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor); + ObjPtr<mirror::Class> integer_class = + LookupInitializedClass(self, class_linker, kIntegerDescriptor); + + // Check that the current cache is the same as the `boot_image_cache`. + ObjPtr<mirror::ObjectArray<mirror::Object>> current_cache = GetIntegerCacheArray(cache_class); + if (current_cache != boot_image_cache) { + return false; // Messed up IntegerCache.cache. } - Intrinsics intrinsic = static_cast<Intrinsics>(art_method->GetIntrinsic()); - if (CheckInvokeType(intrinsic, invoke) == false) { - *wrong_invoke_type = true; - return false; + // Check that the range matches the boot image cache length. + int32_t low = GetIntegerCacheField(cache_class, kLowFieldName); + int32_t high = GetIntegerCacheField(cache_class, kHighFieldName); + if (boot_image_cache->GetLength() != high - low + 1) { + return false; // Messed up IntegerCache.low or IntegerCache.high. } - invoke->SetIntrinsic(intrinsic, - NeedsEnvironmentOrCache(intrinsic), - GetSideEffects(intrinsic), - GetExceptions(intrinsic)); - return true; -} - -void IntrinsicsRecognizer::Run() { - ScopedObjectAccess soa(Thread::Current()); - for (HBasicBlock* block : graph_->GetReversePostOrder()) { - for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); - inst_it.Advance()) { - HInstruction* inst = inst_it.Current(); - if (inst->IsInvoke()) { - bool wrong_invoke_type = false; - if (Recognize(inst->AsInvoke(), /* art_method */ nullptr, &wrong_invoke_type)) { - MaybeRecordStat(stats_, MethodCompilationStat::kIntrinsicRecognized); - } else if (wrong_invoke_type) { - LOG(WARNING) - << "Found an intrinsic with unexpected invoke type: " - << inst->AsInvoke()->GetResolvedMethod()->PrettyMethod() << " " - << inst->DebugName(); - } - } + // Check that the elements match the boot image intrinsic objects and check their values as well. + ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); + DCHECK(value_field != nullptr); + for (int32_t i = 0, len = boot_image_cache->GetLength(); i != len; ++i) { + ObjPtr<mirror::Object> boot_image_object = + IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, i); + DCHECK(Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boot_image_object)); + // No need for read barrier for comparison with a boot image object. + ObjPtr<mirror::Object> current_object = + boot_image_cache->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>(i); + if (boot_image_object != current_object) { + return false; // Messed up IntegerCache.cache[i] + } + if (value_field->GetInt(boot_image_object) != low + i) { + return false; // Messed up IntegerCache.cache[i].value. } } -} -std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { - switch (intrinsic) { - case Intrinsics::kNone: - os << "None"; - break; -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - case Intrinsics::k ## Name: \ - os << # Name; \ - break; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef STATIC_INTRINSICS_LIST -#undef VIRTUAL_INTRINSICS_LIST -#undef OPTIMIZING_INTRINSICS - } - return os; + return true; } void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke, CodeGenerator* codegen, Location return_location, Location first_argument_location) { - if (Runtime::Current()->IsAotCompiler()) { - if (codegen->GetCompilerOptions().IsBootImage() || - codegen->GetCompilerOptions().GetCompilePic()) { - // TODO(ngeoffray): Support boot image compilation. + // The intrinsic will call if it needs to allocate a j.l.Integer. + LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly; + const CompilerOptions& compiler_options = codegen->GetCompilerOptions(); + if (compiler_options.IsBootImage()) { + // Piggyback on the method load kind to determine whether we can use PC-relative addressing. + // This should cover both the testing config (non-PIC boot image) and codegens that reject + // PC-relative load kinds and fall back to the runtime call. + if (!invoke->AsInvokeStaticOrDirect()->HasPcRelativeMethodLoadKind()) { + return; + } + if (!compiler_options.IsImageClass(kIntegerCacheDescriptor) || + !compiler_options.IsImageClass(kIntegerDescriptor)) { + return; + } + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + Thread* self = Thread::Current(); + ScopedObjectAccess soa(self); + ObjPtr<mirror::Class> cache_class = class_linker->LookupClass( + self, kIntegerCacheDescriptor, /* class_loader= */ nullptr); + DCHECK(cache_class != nullptr); + if (UNLIKELY(!cache_class->IsInitialized())) { + LOG(WARNING) << "Image class " << cache_class->PrettyDescriptor() << " is uninitialized."; + return; + } + ObjPtr<mirror::Class> integer_class = + class_linker->LookupClass(self, kIntegerDescriptor, /* class_loader= */ nullptr); + DCHECK(integer_class != nullptr); + if (UNLIKELY(!integer_class->IsInitialized())) { + LOG(WARNING) << "Image class " << integer_class->PrettyDescriptor() << " is uninitialized."; return; } + int32_t low = GetIntegerCacheField(cache_class, kLowFieldName); + int32_t high = GetIntegerCacheField(cache_class, kHighFieldName); + if (kIsDebugBuild) { + ObjPtr<mirror::ObjectArray<mirror::Object>> current_cache = GetIntegerCacheArray(cache_class); + CHECK(current_cache != nullptr); + CHECK_EQ(current_cache->GetLength(), high - low + 1); + ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); + CHECK(value_field != nullptr); + for (int32_t i = 0, len = current_cache->GetLength(); i != len; ++i) { + ObjPtr<mirror::Object> current_object = current_cache->GetWithoutChecks(i); + CHECK(current_object != nullptr); + CHECK_EQ(value_field->GetInt(current_object), low + i); + } + } + if (invoke->InputAt(0)->IsIntConstant()) { + int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) < + static_cast<uint32_t>(high - low + 1)) { + // No call, we shall use direct pointer to the Integer object. + call_kind = LocationSummary::kNoCall; + } + } + } else { + Runtime* runtime = Runtime::Current(); + if (runtime->GetHeap()->GetBootImageSpaces().empty()) { + return; // Running without boot image, cannot use required boot image objects. + } + Thread* self = Thread::Current(); + ScopedObjectAccess soa(self); + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects = GetBootImageLiveObjects(); + ObjPtr<mirror::ObjectArray<mirror::Object>> cache = + IntrinsicObjects::GetIntegerValueOfCache(boot_image_live_objects); + if (cache == nullptr) { + return; // No cache in the boot image. + } + if (runtime->UseJitCompilation()) { + if (!CheckIntegerCache(self, runtime->GetClassLinker(), boot_image_live_objects, cache)) { + return; // The cache was somehow messed up, probably by using reflection. + } + } else { + DCHECK(runtime->IsAotCompiler()); + DCHECK(CheckIntegerCache(self, runtime->GetClassLinker(), boot_image_live_objects, cache)); + if (invoke->InputAt(0)->IsIntConstant()) { + int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + // Retrieve the `value` from the lowest cached Integer. + ObjPtr<mirror::Object> low_integer = + IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, 0u); + ObjPtr<mirror::Class> integer_class = + low_integer->GetClass<kVerifyNone, kWithoutReadBarrier>(); + ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); + DCHECK(value_field != nullptr); + int32_t low = value_field->GetInt(low_integer); + if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) < + static_cast<uint32_t>(cache->GetLength())) { + // No call, we shall use direct pointer to the Integer object. Note that we cannot + // do this for JIT as the "low" can change through reflection before emitting the code. + call_kind = LocationSummary::kNoCall; + } + } + } } - IntegerValueOfInfo info = ComputeIntegerValueOfInfo(); - - // Most common case is that we have found all we needed (classes are initialized - // and in the boot image). Bail if not. - if (info.integer_cache == nullptr || - info.integer == nullptr || - info.cache == nullptr || - info.value_offset == 0 || - // low and high cannot be 0, per the spec. - info.low == 0 || - info.high == 0) { - LOG(INFO) << "Integer.valueOf will not be optimized"; - return; + ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); + LocationSummary* locations = new (allocator) LocationSummary(invoke, call_kind, kIntrinsified); + if (call_kind == LocationSummary::kCallOnMainOnly) { + locations->SetInAt(0, Location::RegisterOrConstant(invoke->InputAt(0))); + locations->AddTemp(first_argument_location); + locations->SetOut(return_location); + } else { + locations->SetInAt(0, Location::ConstantLocation(invoke->InputAt(0)->AsConstant())); + locations->SetOut(Location::RequiresRegister()); } +} - // The intrinsic will call if it needs to allocate a j.l.Integer. - LocationSummary* locations = new (invoke->GetBlock()->GetGraph()->GetAllocator()) LocationSummary( - invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); - if (!invoke->InputAt(0)->IsConstant()) { - locations->SetInAt(0, Location::RequiresRegister()); - } - locations->AddTemp(first_argument_location); - locations->SetOut(return_location); +static int32_t GetIntegerCacheLowFromIntegerCache(Thread* self, ClassLinker* class_linker) + REQUIRES_SHARED(Locks::mutator_lock_) { + ObjPtr<mirror::Class> cache_class = + LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor); + return GetIntegerCacheField(cache_class, kLowFieldName); } -IntrinsicVisitor::IntegerValueOfInfo IntrinsicVisitor::ComputeIntegerValueOfInfo() { +static uint32_t CalculateBootImageOffset(ObjPtr<mirror::Object> object) + REQUIRES_SHARED(Locks::mutator_lock_) { + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(heap->ObjectIsInBootImageSpace(object)); + return reinterpret_cast<const uint8_t*>(object.Ptr()) - heap->GetBootImageSpaces()[0]->Begin(); +} + +inline IntrinsicVisitor::IntegerValueOfInfo::IntegerValueOfInfo() + : value_offset(0), + low(0), + length(0u), + integer_boot_image_offset(kInvalidReference), + value_boot_image_reference(kInvalidReference) {} + +IntrinsicVisitor::IntegerValueOfInfo IntrinsicVisitor::ComputeIntegerValueOfInfo( + HInvoke* invoke, const CompilerOptions& compiler_options) { // Note that we could cache all of the data looked up here. but there's no good // location for it. We don't want to add it to WellKnownClasses, to avoid creating global // jni values. Adding it as state to the compiler singleton seems like wrong // separation of concerns. // The need for this data should be pretty rare though. - // The most common case is that the classes are in the boot image and initialized, - // which is easy to generate code for. We bail if not. - Thread* self = Thread::Current(); - ScopedObjectAccess soa(self); + // Note that at this point we can no longer abort the code generation. Therefore, + // we need to provide data that shall not lead to a crash even if the fields were + // modified through reflection since ComputeIntegerValueOfLocations() when JITting. + Runtime* runtime = Runtime::Current(); ClassLinker* class_linker = runtime->GetClassLinker(); - gc::Heap* heap = runtime->GetHeap(); - IntegerValueOfInfo info; - info.integer_cache = class_linker->FindSystemClass(self, "Ljava/lang/Integer$IntegerCache;"); - if (info.integer_cache == nullptr) { - self->ClearException(); - return info; - } - if (!heap->ObjectIsInBootImageSpace(info.integer_cache) || !info.integer_cache->IsInitialized()) { - // Optimization only works if the class is initialized and in the boot image. - return info; - } - info.integer = class_linker->FindSystemClass(self, "Ljava/lang/Integer;"); - if (info.integer == nullptr) { - self->ClearException(); - return info; - } - if (!heap->ObjectIsInBootImageSpace(info.integer) || !info.integer->IsInitialized()) { - // Optimization only works if the class is initialized and in the boot image. - return info; - } - - ArtField* field = info.integer_cache->FindDeclaredStaticField("cache", "[Ljava/lang/Integer;"); - if (field == nullptr) { - return info; - } - info.cache = static_cast<mirror::ObjectArray<mirror::Object>*>( - field->GetObject(info.integer_cache).Ptr()); - if (info.cache == nullptr) { - return info; - } - - if (!heap->ObjectIsInBootImageSpace(info.cache)) { - // Optimization only works if the object is in the boot image. - return info; - } + Thread* self = Thread::Current(); + ScopedObjectAccess soa(self); - field = info.integer->FindDeclaredInstanceField("value", "I"); - if (field == nullptr) { - return info; + IntegerValueOfInfo info; + if (compiler_options.IsBootImage()) { + ObjPtr<mirror::Class> integer_class = + LookupInitializedClass(self, class_linker, kIntegerDescriptor); + ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); + DCHECK(value_field != nullptr); + info.value_offset = value_field->GetOffset().Uint32Value(); + ObjPtr<mirror::Class> cache_class = + LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor); + info.low = GetIntegerCacheField(cache_class, kLowFieldName); + int32_t high = GetIntegerCacheField(cache_class, kHighFieldName); + info.length = dchecked_integral_cast<uint32_t>(high - info.low + 1); + + info.integer_boot_image_offset = IntegerValueOfInfo::kInvalidReference; + if (invoke->InputAt(0)->IsIntConstant()) { + int32_t input_value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + uint32_t index = static_cast<uint32_t>(input_value) - static_cast<uint32_t>(info.low); + if (index < static_cast<uint32_t>(info.length)) { + info.value_boot_image_reference = IntrinsicObjects::EncodePatch( + IntrinsicObjects::PatchType::kIntegerValueOfObject, index); + } else { + // Not in the cache. + info.value_boot_image_reference = IntegerValueOfInfo::kInvalidReference; + } + } else { + info.array_data_boot_image_reference = + IntrinsicObjects::EncodePatch(IntrinsicObjects::PatchType::kIntegerValueOfArray); + } + } else { + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects = GetBootImageLiveObjects(); + ObjPtr<mirror::Object> low_integer = + IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, 0u); + ObjPtr<mirror::Class> integer_class = low_integer->GetClass<kVerifyNone, kWithoutReadBarrier>(); + ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); + DCHECK(value_field != nullptr); + info.value_offset = value_field->GetOffset().Uint32Value(); + if (runtime->UseJitCompilation()) { + // Use the current `IntegerCache.low` for JIT to avoid truly surprising behavior if the + // code messes up the `value` field in the lowest cached Integer using reflection. + info.low = GetIntegerCacheLowFromIntegerCache(self, class_linker); + } else { + // For app AOT, the `low_integer->value` should be the same as `IntegerCache.low`. + info.low = value_field->GetInt(low_integer); + DCHECK_EQ(info.low, GetIntegerCacheLowFromIntegerCache(self, class_linker)); + } + // Do not look at `IntegerCache.high`, use the immutable length of the cache array instead. + info.length = dchecked_integral_cast<uint32_t>( + IntrinsicObjects::GetIntegerValueOfCache(boot_image_live_objects)->GetLength()); + + info.integer_boot_image_offset = CalculateBootImageOffset(integer_class); + if (invoke->InputAt(0)->IsIntConstant()) { + int32_t input_value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + uint32_t index = static_cast<uint32_t>(input_value) - static_cast<uint32_t>(info.low); + if (index < static_cast<uint32_t>(info.length)) { + ObjPtr<mirror::Object> integer = + IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, index); + info.value_boot_image_reference = CalculateBootImageOffset(integer); + } else { + // Not in the cache. + info.value_boot_image_reference = IntegerValueOfInfo::kInvalidReference; + } + } else { + info.array_data_boot_image_reference = + CalculateBootImageOffset(boot_image_live_objects) + + IntrinsicObjects::GetIntegerValueOfArrayDataOffset(boot_image_live_objects).Uint32Value(); + } } - info.value_offset = field->GetOffset().Int32Value(); - field = info.integer_cache->FindDeclaredStaticField("low", "I"); - if (field == nullptr) { - return info; - } - info.low = field->GetInt(info.integer_cache); + return info; +} - field = info.integer_cache->FindDeclaredStaticField("high", "I"); - if (field == nullptr) { - return info; +void IntrinsicVisitor::AssertNonMovableStringClass() { + if (kIsDebugBuild) { + ScopedObjectAccess soa(Thread::Current()); + ObjPtr<mirror::Class> string_class = GetClassRoot<art::mirror::String>(); + CHECK(!art::Runtime::Current()->GetHeap()->IsMovableObject(string_class)); } - info.high = field->GetInt(info.integer_cache); - - DCHECK_EQ(info.cache->GetLength(), info.high - info.low + 1); - return info; } } // namespace art diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 62991435c7..ab68cce304 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -24,7 +24,6 @@ namespace art { -class CompilerDriver; class DexFile; // Positive floating-point infinities. @@ -34,28 +33,6 @@ static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000) static constexpr uint32_t kNanFloat = 0x7fc00000U; static constexpr uint64_t kNanDouble = 0x7ff8000000000000; -// Recognize intrinsics from HInvoke nodes. -class IntrinsicsRecognizer : public HOptimization { - public: - IntrinsicsRecognizer(HGraph* graph, - OptimizingCompilerStats* stats, - const char* name = kIntrinsicsRecognizerPassName) - : HOptimization(graph, name, stats) {} - - void Run() OVERRIDE; - - // Static helper that recognizes intrinsic call. Returns true on success. - // If it fails due to invoke type mismatch, wrong_invoke_type is set. - // Useful to recognize intrinsics on individual calls outside this full pass. - static bool Recognize(HInvoke* invoke, ArtMethod* method, /*out*/ bool* wrong_invoke_type) - REQUIRES_SHARED(Locks::mutator_lock_); - - static constexpr const char* kIntrinsicsRecognizerPassName = "intrinsics_recognition"; - - private: - DISALLOW_COPY_AND_ASSIGN(IntrinsicsRecognizer); -}; - class IntrinsicVisitor : public ValueObject { public: virtual ~IntrinsicVisitor() {} @@ -126,37 +103,47 @@ class IntrinsicVisitor : public ValueObject { Location return_location, Location first_argument_location); - // Temporary data structure for holding Integer.valueOf useful data. We only - // use it if the mirror::Class* are in the boot image, so it is fine to keep raw - // mirror::Class pointers in this structure. + // Temporary data structure for holding Integer.valueOf data for generating code. + // We only use it if the boot image contains the IntegerCache objects. struct IntegerValueOfInfo { - IntegerValueOfInfo() - : integer_cache(nullptr), - integer(nullptr), - cache(nullptr), - low(0), - high(0), - value_offset(0) {} - - // The java.lang.IntegerCache class. - mirror::Class* integer_cache; - // The java.lang.Integer class. - mirror::Class* integer; - // Value of java.lang.IntegerCache#cache. - mirror::ObjectArray<mirror::Object>* cache; - // Value of java.lang.IntegerCache#low. + static constexpr uint32_t kInvalidReference = static_cast<uint32_t>(-1); + + IntegerValueOfInfo(); + + // Offset of the Integer.value field for initializing a newly allocated instance. + uint32_t value_offset; + // The low value in the cache. int32_t low; - // Value of java.lang.IntegerCache#high. - int32_t high; - // The offset of java.lang.Integer.value. - int32_t value_offset; + // The length of the cache array. + uint32_t length; + + // Boot image offset of java.lang.Integer for allocating an instance. + uint32_t integer_boot_image_offset; // Set to kInvalidReference when compiling the boot image. + + // This union contains references to the boot image. For app AOT or JIT compilation, + // these are the boot image offsets of the target. For boot image compilation, the + // location shall be known only at link time, so we encode a symbolic reference using + // IntrinsicObjects::EncodePatch(). + union { + // The target value for a constant input in the cache range. If the constant input + // is out of range (use `low` and `length` to check), this value is bogus (set to + // kInvalidReference) and the code must allocate a new Integer. + uint32_t value_boot_image_reference; + + // The cache array data used for a non-constant input in the cache range. + // If the input is out of range, the code must allocate a new Integer. + uint32_t array_data_boot_image_reference; + }; }; - static IntegerValueOfInfo ComputeIntegerValueOfInfo(); + static IntegerValueOfInfo ComputeIntegerValueOfInfo( + HInvoke* invoke, const CompilerOptions& compiler_options); protected: IntrinsicVisitor() {} + static void AssertNonMovableStringClass(); + private: DISALLOW_COPY_AND_ASSIGN(IntrinsicVisitor); }; @@ -211,7 +198,6 @@ class StringEqualsOptimizations : public IntrinsicOptimizations { INTRINSIC_OPTIMIZATION(ArgumentNotNull, 0); INTRINSIC_OPTIMIZATION(ArgumentIsString, 1); - INTRINSIC_OPTIMIZATION(NoReadBarrierForStringClass, 2); private: DISALLOW_COPY_AND_ASSIGN(StringEqualsOptimizations); @@ -255,17 +241,33 @@ void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNU // Defines a list of unreached intrinsics: that is, method calls that are recognized as // an intrinsic, and then always converted into HIR instructions before they reach any -// architecture-specific intrinsics code generator. +// architecture-specific intrinsics code generator. This only applies to non-baseline +// compilation. #define UNREACHABLE_INTRINSIC(Arch, Name) \ void IntrinsicLocationsBuilder ## Arch::Visit ## Name(HInvoke* invoke) { \ - LOG(FATAL) << "Unreachable: intrinsic " << invoke->GetIntrinsic() \ - << " should have been converted to HIR"; \ + if (Runtime::Current()->IsAotCompiler() && \ + !codegen_->GetCompilerOptions().IsBaseline()) { \ + LOG(FATAL) << "Unreachable: intrinsic " << invoke->GetIntrinsic() \ + << " should have been converted to HIR"; \ + } \ } \ void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke) { \ LOG(FATAL) << "Unreachable: intrinsic " << invoke->GetIntrinsic() \ << " should have been converted to HIR"; \ } #define UNREACHABLE_INTRINSICS(Arch) \ +UNREACHABLE_INTRINSIC(Arch, MathMinIntInt) \ +UNREACHABLE_INTRINSIC(Arch, MathMinLongLong) \ +UNREACHABLE_INTRINSIC(Arch, MathMinFloatFloat) \ +UNREACHABLE_INTRINSIC(Arch, MathMinDoubleDouble) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxIntInt) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxLongLong) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxFloatFloat) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxDoubleDouble) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsInt) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsLong) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsFloat) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsDouble) \ UNREACHABLE_INTRINSIC(Arch, FloatFloatToIntBits) \ UNREACHABLE_INTRINSIC(Arch, DoubleDoubleToLongBits) \ UNREACHABLE_INTRINSIC(Arch, FloatIsNaN) \ diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 0e6485be9f..ec5d17a443 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -112,7 +112,7 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { explicit IntrinsicSlowPathARM64(HInvoke* invoke) : SlowPathCodeARM64(invoke), invoke_(invoke) { } - void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen_in) override { CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in); __ Bind(GetEntryLabel()); @@ -145,7 +145,7 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathARM64"; } + const char* GetDescription() const override { return "IntrinsicSlowPathARM64"; } private: // The instruction where this slow path is happening. @@ -163,7 +163,7 @@ class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 { DCHECK(kUseBakerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen_in) override { CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in); LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); @@ -216,7 +216,7 @@ class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; } + const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathARM64"; } private: Location tmp_; @@ -272,10 +272,10 @@ void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke } void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler()); } void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { @@ -286,10 +286,10 @@ void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler()); } void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler()); } static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -344,14 +344,6 @@ void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler()); } -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - static void GenNumberOfLeadingZeros(LocationSummary* locations, DataType::Type type, MacroAssembler* masm) { @@ -536,168 +528,6 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } -static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) { - Location in = locations->InAt(0); - Location out = locations->Out(); - - FPRegister in_reg = is64bit ? DRegisterFrom(in) : SRegisterFrom(in); - FPRegister out_reg = is64bit ? DRegisterFrom(out) : SRegisterFrom(out); - - __ Fabs(out_reg, in_reg); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); -} - -static void GenAbsInteger(LocationSummary* locations, - bool is64bit, - MacroAssembler* masm) { - Location in = locations->InAt(0); - Location output = locations->Out(); - - Register in_reg = is64bit ? XRegisterFrom(in) : WRegisterFrom(in); - Register out_reg = is64bit ? XRegisterFrom(output) : WRegisterFrom(output); - - __ Cmp(in_reg, Operand(0)); - __ Cneg(out_reg, in_reg, lt); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - bool is_double, - MacroAssembler* masm) { - Location op1 = locations->InAt(0); - Location op2 = locations->InAt(1); - Location out = locations->Out(); - - FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1); - FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2); - FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out); - if (is_min) { - __ Fmin(out_reg, op1_reg, op2_reg); - } else { - __ Fmax(out_reg, op1_reg, op2_reg); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler()); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - bool is_long, - MacroAssembler* masm) { - Location op1 = locations->InAt(0); - Location op2 = locations->InAt(1); - Location out = locations->Out(); - - Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1); - Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2); - Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out); - - __ Cmp(op1_reg, op2_reg); - __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler()); -} - void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) { CreateFPToFPLocations(allocator_, invoke); } @@ -788,7 +618,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) { - GenMathRound(invoke, /* is_double */ true, GetVIXLAssembler()); + GenMathRound(invoke, /* is_double= */ true, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) { @@ -796,7 +626,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) { - GenMathRound(invoke, /* is_double */ false, GetVIXLAssembler()); + GenMathRound(invoke, /* is_double= */ false, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) { @@ -915,20 +745,20 @@ static void GenUnsafeGet(HInvoke* invoke, if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case. Register temp = WRegisterFrom(locations->GetTemp(0)); - codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, - trg_loc, - base, - /* offset */ 0u, - /* index */ offset_loc, - /* scale_factor */ 0u, - temp, - /* needs_null_check */ false, - is_volatile); + MacroAssembler* masm = codegen->GetVIXLAssembler(); + // Piggy-back on the field load path using introspection for the Baker read barrier. + __ Add(temp, base, offset.W()); // Offset should not exceed 32 bits. + codegen->GenerateFieldLoadWithBakerReadBarrier(invoke, + trg_loc, + base, + MemOperand(temp.X()), + /* needs_null_check= */ false, + is_volatile); } else { // Other cases. MemOperand mem_op(base.X(), offset); if (is_volatile) { - codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true); + codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check= */ true); } else { codegen->Load(type, trg, mem_op); } @@ -952,9 +782,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* in kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + // We need a temporary register for the read barrier load in order to use + // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(). + locations->AddTemp(FixedTempLocation()); } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -983,22 +813,22 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invok } void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_); } static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) { @@ -1066,7 +896,7 @@ static void GenUnsafePut(HInvoke* invoke, } if (is_volatile || is_ordered) { - codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check */ false); + codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check= */ false); } else { codegen->Store(type, source, mem_op); } @@ -1081,64 +911,64 @@ static void GenUnsafePut(HInvoke* invoke, void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kInt32, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kInt32, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kInt32, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kReference, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kReference, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kReference, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kInt64, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kInt64, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kInt64, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } @@ -1154,106 +984,155 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall, kIntrinsified); + if (can_call) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - // If heap poisoning is enabled, we don't want the unpoisoning - // operations to potentially clobber the output. Likewise when - // emitting a (Baker) read barrier, which may call. - Location::OutputOverlap overlaps = - ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call) - ? Location::kOutputOverlap - : Location::kNoOutputOverlap; - locations->SetOut(Location::RequiresRegister(), overlaps); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // Temporary register for (Baker) read barrier. + // We need two non-scratch temporary registers for (Baker) read barrier. + locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); } } +class BakerReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 { + public: + explicit BakerReadBarrierCasSlowPathARM64(HInvoke* invoke) + : SlowPathCodeARM64(invoke) {} + + const char* GetDescription() const override { return "BakerReadBarrierCasSlowPathARM64"; } + + void EmitNativeCode(CodeGenerator* codegen) override { + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + Arm64Assembler* assembler = arm64_codegen->GetAssembler(); + MacroAssembler* masm = assembler->GetVIXLAssembler(); + __ Bind(GetEntryLabel()); + + // Get the locations. + LocationSummary* locations = instruction_->GetLocations(); + Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. + Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. + Register expected = WRegisterFrom(locations->InAt(3)); // Expected. + Register value = WRegisterFrom(locations->InAt(4)); // Value. + + Register old_value = WRegisterFrom(locations->GetTemp(0)); // The old value from main path. + Register marked = WRegisterFrom(locations->GetTemp(1)); // The marked old value. + + // Mark the `old_value` from the main path and compare with `expected`. This clobbers the + // `tmp_ptr` scratch register but we do not want to allocate another non-scratch temporary. + arm64_codegen->GenerateUnsafeCasOldValueMovWithBakerReadBarrier(marked, old_value); + __ Cmp(marked, expected); + __ B(GetExitLabel(), ne); // If taken, Z=false indicates failure. + + // The `old_value` we have read did not match `expected` (which is always a to-space reference) + // but after the read barrier in GenerateUnsafeCasOldValueMovWithBakerReadBarrier() the marked + // to-space value matched, so the `old_value` must be a from-space reference to the same + // object. Do the same CAS loop as the main path but check for both `expected` and the unmarked + // old value representing the to-space and from-space references for the same object. + + UseScratchRegisterScope temps(masm); + Register tmp_ptr = temps.AcquireX(); + Register tmp = temps.AcquireSameSizeAs(value); + + // Recalculate the `tmp_ptr` clobbered above. + __ Add(tmp_ptr, base.X(), Operand(offset)); + + // do { + // tmp_value = [tmp_ptr]; + // } while ((tmp_value == expected || tmp == old_value) && failure([tmp_ptr] <- r_new_value)); + // result = (tmp_value == expected || tmp == old_value); + + vixl::aarch64::Label loop_head; + __ Bind(&loop_head); + __ Ldaxr(tmp, MemOperand(tmp_ptr)); + assembler->MaybeUnpoisonHeapReference(tmp); + __ Cmp(tmp, expected); + __ Ccmp(tmp, old_value, ZFlag, ne); + __ B(GetExitLabel(), ne); // If taken, Z=false indicates failure. + assembler->MaybePoisonHeapReference(value); + __ Stlxr(tmp.W(), value, MemOperand(tmp_ptr)); + assembler->MaybeUnpoisonHeapReference(value); + __ Cbnz(tmp.W(), &loop_head); + + // Z=true from the above CMP+CCMP indicates success. + __ B(GetExitLabel()); + } +}; + static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) { - MacroAssembler* masm = codegen->GetVIXLAssembler(); + Arm64Assembler* assembler = codegen->GetAssembler(); + MacroAssembler* masm = assembler->GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); - Location out_loc = locations->Out(); - Register out = WRegisterFrom(out_loc); // Boolean result. - - Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. - Location offset_loc = locations->InAt(2); - Register offset = XRegisterFrom(offset_loc); // Long offset. - Register expected = RegisterFrom(locations->InAt(3), type); // Expected. - Register value = RegisterFrom(locations->InAt(4), type); // Value. + Register out = WRegisterFrom(locations->Out()); // Boolean result. + Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. + Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. + Register expected = RegisterFrom(locations->InAt(3), type); // Expected. + Register value = RegisterFrom(locations->InAt(4), type); // Value. // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps. if (type == DataType::Type::kReference) { // Mark card for object assuming new value is stored. bool value_can_be_null = true; // TODO: Worth finding out this information? codegen->MarkGCCard(base, value, value_can_be_null); - - // The only read barrier implementation supporting the - // UnsafeCASObject intrinsic is the Baker-style read barriers. - DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Register temp = WRegisterFrom(locations->GetTemp(0)); - // Need to make sure the reference stored in the field is a to-space - // one before attempting the CAS or the CAS could fail incorrectly. - codegen->UpdateReferenceFieldWithBakerReadBarrier( - invoke, - out_loc, // Unused, used only as a "temporary" within the read barrier. - base, - /* field_offset */ offset_loc, - temp, - /* needs_null_check */ false, - /* use_load_acquire */ false); - } } UseScratchRegisterScope temps(masm); Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory. - Register tmp_value = temps.AcquireSameSizeAs(value); // Value in memory. + Register old_value; // Value in memory. - Register tmp_32 = tmp_value.W(); + vixl::aarch64::Label exit_loop_label; + vixl::aarch64::Label* exit_loop = &exit_loop_label; + vixl::aarch64::Label* failure = &exit_loop_label; - __ Add(tmp_ptr, base.X(), Operand(offset)); + if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(kUseBakerReadBarrier); - if (kPoisonHeapReferences && type == DataType::Type::kReference) { - codegen->GetAssembler()->PoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not poison `value`, as it is the same register as - // `expected`, which has just been poisoned. - } else { - codegen->GetAssembler()->PoisonHeapReference(value); - } + BakerReadBarrierCasSlowPathARM64* slow_path = + new (codegen->GetScopedAllocator()) BakerReadBarrierCasSlowPathARM64(invoke); + codegen->AddSlowPath(slow_path); + exit_loop = slow_path->GetExitLabel(); + failure = slow_path->GetEntryLabel(); + // We need to store the `old_value` in a non-scratch register to make sure + // the Baker read barrier in the slow path does not clobber it. + old_value = WRegisterFrom(locations->GetTemp(0)); + } else { + old_value = temps.AcquireSameSizeAs(value); } + __ Add(tmp_ptr, base.X(), Operand(offset)); + // do { - // tmp_value = [tmp_ptr] - expected; - // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); - // result = tmp_value != 0; + // tmp_value = [tmp_ptr]; + // } while (tmp_value == expected && failure([tmp_ptr] <- r_new_value)); + // result = tmp_value == expected; - vixl::aarch64::Label loop_head, exit_loop; + vixl::aarch64::Label loop_head; __ Bind(&loop_head); - __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); - __ Cmp(tmp_value, expected); - __ B(&exit_loop, ne); - __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); - __ Cbnz(tmp_32, &loop_head); - __ Bind(&exit_loop); - __ Cset(out, eq); - - if (kPoisonHeapReferences && type == DataType::Type::kReference) { - codegen->GetAssembler()->UnpoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not unpoison `value`, as it is the same register as - // `expected`, which has just been unpoisoned. - } else { - codegen->GetAssembler()->UnpoisonHeapReference(value); - } + __ Ldaxr(old_value, MemOperand(tmp_ptr)); + if (type == DataType::Type::kReference) { + assembler->MaybeUnpoisonHeapReference(old_value); + } + __ Cmp(old_value, expected); + __ B(failure, ne); + if (type == DataType::Type::kReference) { + assembler->MaybePoisonHeapReference(value); } + __ Stlxr(old_value.W(), value, MemOperand(tmp_ptr)); // Reuse `old_value` for STLXR result. + if (type == DataType::Type::kReference) { + assembler->MaybeUnpoisonHeapReference(value); + } + __ Cbnz(old_value.W(), &loop_head); + __ Bind(exit_loop); + __ Cset(out, eq); } void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) { @@ -1519,13 +1398,6 @@ static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_lengt } void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) { - if (kEmitCompilerReadBarrier && - !StringEqualsOptimizations(invoke).GetArgumentIsString() && - !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { - // No support for this odd case (String class is moveable, not in the boot image). - return; - } - LocationSummary* locations = new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); @@ -1592,8 +1464,16 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { // All string objects must have the same type since String cannot be subclassed. // Receiver must be a string object, so its class field is equal to all strings' class fields. // If the argument is a string object, its class field must be equal to receiver's class field. + // + // As the String class is expected to be non-movable, we can read the class + // field from String.equals' arguments without read barriers. + AssertNonMovableStringClass(); + // /* HeapReference<Class> */ temp = str->klass_ __ Ldr(temp, MemOperand(str.X(), class_offset)); + // /* HeapReference<Class> */ temp1 = arg->klass_ __ Ldr(temp1, MemOperand(arg.X(), class_offset)); + // Also, because we use the previously loaded class references only in the + // following comparison, we don't need to unpoison them. __ Cmp(temp, temp1); __ B(&return_false, ne); } @@ -1766,7 +1646,7 @@ void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) { - GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero */ true); + GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ true); } void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) { @@ -1782,7 +1662,7 @@ void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero */ false); + GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ false); } void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { @@ -2584,8 +2464,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { src.W(), class_offset, temp3_loc, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); // Bail out if the source is not a non primitive array. // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, @@ -2593,8 +2473,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { temp1, component_offset, temp3_loc, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp1` has been unpoisoned // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. @@ -2610,8 +2490,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { dest.W(), class_offset, temp3_loc, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); if (!optimizations.GetDestinationIsNonPrimitiveArray()) { // Bail out if the destination is not a non primitive array. @@ -2627,8 +2507,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { temp1, component_offset, temp3_loc, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp2` has been unpoisoned // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. @@ -2646,8 +2526,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { src.W(), class_offset, temp3_loc, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); // Note: if heap poisoning is on, we are comparing two unpoisoned references here. __ Cmp(temp1, temp2); @@ -2660,8 +2540,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { temp1, component_offset, temp3_loc, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); // /* HeapReference<Class> */ temp1 = temp1->super_class_ // We do not need to emit a read barrier for the following // heap reference load, as `temp1` is only used in a @@ -2744,16 +2624,16 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { src.W(), class_offset, temp3_loc, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); // /* HeapReference<Class> */ temp2 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, temp2_loc, temp1, component_offset, temp3_loc, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp2` has been unpoisoned // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. @@ -2860,7 +2740,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { codegen_->AddSlowPath(read_barrier_slow_path); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel()); @@ -2907,7 +2787,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { } // We only need one card marking on the destination array. - codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false); + codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null= */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } @@ -2940,7 +2820,7 @@ void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) { - GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); + GenIsInfinite(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) { @@ -2948,7 +2828,7 @@ void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) { - GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); + GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) { @@ -2961,33 +2841,27 @@ void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); MacroAssembler* masm = GetVIXLAssembler(); Register out = RegisterFrom(locations->Out(), DataType::Type::kReference); UseScratchRegisterScope temps(masm); Register temp = temps.AcquireW(); - InvokeRuntimeCallingConvention calling_convention; - Register argument = calling_convention.GetRegisterAt(0); if (invoke->InputAt(0)->IsConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - uint32_t address = - dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ Mov(temp.W(), value); __ Str(temp.W(), HeapOperand(out.W(), info.value_offset)); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation @@ -2995,16 +2869,15 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } } else { + DCHECK(locations->CanCall()); Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32); // Check bounds of our cache. __ Add(out.W(), in.W(), -info.low); - __ Cmp(out.W(), info.high - info.low + 1); + __ Cmp(out.W(), info.length); vixl::aarch64::Label allocate, done; __ B(&allocate, hs); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address)); + codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference); MemOperand source = HeapOperand( temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference)); codegen_->Load(DataType::Type::kReference, out, source); @@ -3012,10 +2885,8 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { __ B(&done); __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ Str(in.W(), HeapOperand(out.W(), info.value_offset)); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. @@ -3053,6 +2924,251 @@ void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) { void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } +void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasCRC()) { + return; + } + + LocationSummary* locations = new (allocator_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +// Lower the invoke of CRC32.update(int crc, int b). +void IntrinsicCodeGeneratorARM64::VisitCRC32Update(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasCRC()); + + MacroAssembler* masm = GetVIXLAssembler(); + + Register crc = InputRegisterAt(invoke, 0); + Register val = InputRegisterAt(invoke, 1); + Register out = OutputRegister(invoke); + + // The general algorithm of the CRC32 calculation is: + // crc = ~crc + // result = crc32_for_byte(crc, b) + // crc = ~result + // It is directly lowered to three instructions. + + UseScratchRegisterScope temps(masm); + Register tmp = temps.AcquireSameSizeAs(out); + + __ Mvn(tmp, crc); + __ Crc32b(tmp, tmp, val); + __ Mvn(out, tmp); +} + +// Generate code using CRC32 instructions which calculates +// a CRC32 value of a byte. +// +// Parameters: +// masm - VIXL macro assembler +// crc - a register holding an initial CRC value +// ptr - a register holding a memory address of bytes +// length - a register holding a number of bytes to process +// out - a register to put a result of calculation +static void GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler* masm, + const Register& crc, + const Register& ptr, + const Register& length, + const Register& out) { + // The algorithm of CRC32 of bytes is: + // crc = ~crc + // process a few first bytes to make the array 8-byte aligned + // while array has 8 bytes do: + // crc = crc32_of_8bytes(crc, 8_bytes(array)) + // if array has 4 bytes: + // crc = crc32_of_4bytes(crc, 4_bytes(array)) + // if array has 2 bytes: + // crc = crc32_of_2bytes(crc, 2_bytes(array)) + // if array has a byte: + // crc = crc32_of_byte(crc, 1_byte(array)) + // crc = ~crc + + vixl::aarch64::Label loop, done; + vixl::aarch64::Label process_4bytes, process_2bytes, process_1byte; + vixl::aarch64::Label aligned2, aligned4, aligned8; + + // Use VIXL scratch registers as the VIXL macro assembler won't use them in + // instructions below. + UseScratchRegisterScope temps(masm); + Register len = temps.AcquireW(); + Register array_elem = temps.AcquireW(); + + __ Mvn(out, crc); + __ Mov(len, length); + + __ Tbz(ptr, 0, &aligned2); + __ Subs(len, len, 1); + __ B(&done, lo); + __ Ldrb(array_elem, MemOperand(ptr, 1, PostIndex)); + __ Crc32b(out, out, array_elem); + + __ Bind(&aligned2); + __ Tbz(ptr, 1, &aligned4); + __ Subs(len, len, 2); + __ B(&process_1byte, lo); + __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex)); + __ Crc32h(out, out, array_elem); + + __ Bind(&aligned4); + __ Tbz(ptr, 2, &aligned8); + __ Subs(len, len, 4); + __ B(&process_2bytes, lo); + __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex)); + __ Crc32w(out, out, array_elem); + + __ Bind(&aligned8); + __ Subs(len, len, 8); + // If len < 8 go to process data by 4 bytes, 2 bytes and a byte. + __ B(&process_4bytes, lo); + + // The main loop processing data by 8 bytes. + __ Bind(&loop); + __ Ldr(array_elem.X(), MemOperand(ptr, 8, PostIndex)); + __ Subs(len, len, 8); + __ Crc32x(out, out, array_elem.X()); + // if len >= 8, process the next 8 bytes. + __ B(&loop, hs); + + // Process the data which is less than 8 bytes. + // The code generated below works with values of len + // which come in the range [-8, 0]. + // The first three bits are used to detect whether 4 bytes or 2 bytes or + // a byte can be processed. + // The checking order is from bit 2 to bit 0: + // bit 2 is set: at least 4 bytes available + // bit 1 is set: at least 2 bytes available + // bit 0 is set: at least a byte available + __ Bind(&process_4bytes); + // Goto process_2bytes if less than four bytes available + __ Tbz(len, 2, &process_2bytes); + __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex)); + __ Crc32w(out, out, array_elem); + + __ Bind(&process_2bytes); + // Goto process_1bytes if less than two bytes available + __ Tbz(len, 1, &process_1byte); + __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex)); + __ Crc32h(out, out, array_elem); + + __ Bind(&process_1byte); + // Goto done if no bytes available + __ Tbz(len, 0, &done); + __ Ldrb(array_elem, MemOperand(ptr)); + __ Crc32b(out, out, array_elem); + + __ Bind(&done); + __ Mvn(out, out); +} + +// The threshold for sizes of arrays to use the library provided implementation +// of CRC32.updateBytes instead of the intrinsic. +static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024; + +void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasCRC()) { + return; + } + + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2))); + locations->SetInAt(3, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +// Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len) +// +// Note: The intrinsic is not used if len exceeds a threshold. +void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasCRC()); + + MacroAssembler* masm = GetVIXLAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + SlowPathCodeARM64* slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); + codegen_->AddSlowPath(slow_path); + + Register length = WRegisterFrom(locations->InAt(3)); + __ Cmp(length, kCRC32UpdateBytesThreshold); + __ B(slow_path->GetEntryLabel(), hi); + + const uint32_t array_data_offset = + mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value(); + Register ptr = XRegisterFrom(locations->GetTemp(0)); + Register array = XRegisterFrom(locations->InAt(1)); + Location offset = locations->InAt(2); + if (offset.IsConstant()) { + int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue(); + __ Add(ptr, array, array_data_offset + offset_value); + } else { + __ Add(ptr, array, array_data_offset); + __ Add(ptr, ptr, XRegisterFrom(offset)); + } + + Register crc = WRegisterFrom(locations->InAt(0)); + Register out = WRegisterFrom(locations->Out()); + + GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out); + + __ Bind(slow_path->GetExitLabel()); +} + +void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasCRC()) { + return; + } + + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +// Lower the invoke of CRC32.updateByteBuffer(int crc, long addr, int off, int len) +// +// There is no need to generate code checking if addr is 0. +// The method updateByteBuffer is a private method of java.util.zip.CRC32. +// This guarantees no calls outside of the CRC32 class. +// An address of DirectBuffer is always passed to the call of updateByteBuffer. +// It might be an implementation of an empty DirectBuffer which can use a zero +// address but it must have the length to be zero. The current generated code +// correctly works with the zero length. +void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasCRC()); + + MacroAssembler* masm = GetVIXLAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register addr = XRegisterFrom(locations->InAt(1)); + Register ptr = XRegisterFrom(locations->GetTemp(0)); + __ Add(ptr, addr, XRegisterFrom(locations->InAt(2))); + + Register crc = WRegisterFrom(locations->InAt(0)); + Register length = WRegisterFrom(locations->InAt(3)); + Register out = WRegisterFrom(locations->Out()); + GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out); +} + UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf); diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h index 033a644f34..9c46efddec 100644 --- a/compiler/optimizing/intrinsics_arm64.h +++ b/compiler/optimizing/intrinsics_arm64.h @@ -37,7 +37,7 @@ namespace arm64 { class CodeGeneratorARM64; -class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor { +class IntrinsicLocationsBuilderARM64 final : public IntrinsicVisitor { public: explicit IntrinsicLocationsBuilderARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen) : allocator_(allocator), codegen_(codegen) {} @@ -45,7 +45,7 @@ class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor { // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST @@ -63,14 +63,14 @@ class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor { DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARM64); }; -class IntrinsicCodeGeneratorARM64 FINAL : public IntrinsicVisitor { +class IntrinsicCodeGeneratorARM64 final : public IntrinsicVisitor { public: explicit IntrinsicCodeGeneratorARM64(CodeGeneratorARM64* codegen) : codegen_(codegen) {} // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 97a145664c..f0aa92e981 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -25,7 +25,7 @@ #include "mirror/array-inl.h" #include "mirror/object_array-inl.h" #include "mirror/reference.h" -#include "mirror/string.h" +#include "mirror/string-inl.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" @@ -41,19 +41,15 @@ using helpers::HighRegisterFrom; using helpers::InputDRegisterAt; using helpers::InputRegisterAt; using helpers::InputSRegisterAt; -using helpers::InputVRegisterAt; using helpers::Int32ConstantFrom; using helpers::LocationFrom; using helpers::LowRegisterFrom; using helpers::LowSRegisterFrom; using helpers::HighSRegisterFrom; using helpers::OutputDRegister; -using helpers::OutputSRegister; using helpers::OutputRegister; -using helpers::OutputVRegister; using helpers::RegisterFrom; using helpers::SRegisterFrom; -using helpers::DRegisterFromS; using namespace vixl::aarch32; // NOLINT(build/namespaces) @@ -89,7 +85,7 @@ class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL { return calling_convention_visitor.GetMethodLocation(); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler()); __ Bind(GetEntryLabel()); @@ -115,7 +111,7 @@ class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; } + const char* GetDescription() const override { return "IntrinsicSlowPath"; } private: // The instruction where this slow path is happening. @@ -177,7 +173,7 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { DCHECK(kUseBakerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); ArmVIXLAssembler* assembler = arm_codegen->GetAssembler(); LocationSummary* locations = instruction_->GetLocations(); @@ -233,11 +229,11 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { assembler->MaybePoisonHeapReference(tmp); __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); __ Cmp(src_curr_addr, src_stop_addr); - __ B(ne, &loop, /* far_target */ false); + __ B(ne, &loop, /* is_far_target= */ false); __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { + const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathARMVIXL"; } @@ -302,10 +298,10 @@ void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invo } void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) { @@ -316,10 +312,10 @@ void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) } void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -359,7 +355,7 @@ static void GenNumberOfLeadingZeros(HInvoke* invoke, vixl32::Label end; vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end); __ Clz(out, in_reg_hi); - __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* far_target */ false); + __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* is_far_target= */ false); __ Clz(out, in_reg_lo); __ Add(out, out, 32); if (end.IsReferenced()) { @@ -402,7 +398,7 @@ static void GenNumberOfTrailingZeros(HInvoke* invoke, vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end); __ Rbit(out, in_reg_lo); __ Clz(out, out); - __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* far_target */ false); + __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* is_far_target= */ false); __ Rbit(out, in_reg_hi); __ Clz(out, out); __ Add(out, out, 32); @@ -432,341 +428,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invo GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_); } -static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) { - __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0)); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke, GetAssembler()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke, GetAssembler()); -} - -static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - - locations->AddTemp(Location::RequiresRegister()); -} - -static void GenAbsInteger(LocationSummary* locations, - bool is64bit, - ArmVIXLAssembler* assembler) { - Location in = locations->InAt(0); - Location output = locations->Out(); - - vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); - - if (is64bit) { - vixl32::Register in_reg_lo = LowRegisterFrom(in); - vixl32::Register in_reg_hi = HighRegisterFrom(in); - vixl32::Register out_reg_lo = LowRegisterFrom(output); - vixl32::Register out_reg_hi = HighRegisterFrom(output); - - DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected."; - - __ Asr(mask, in_reg_hi, 31); - __ Adds(out_reg_lo, in_reg_lo, mask); - __ Adc(out_reg_hi, in_reg_hi, mask); - __ Eor(out_reg_lo, mask, out_reg_lo); - __ Eor(out_reg_hi, mask, out_reg_hi); - } else { - vixl32::Register in_reg = RegisterFrom(in); - vixl32::Register out_reg = RegisterFrom(output); - - __ Asr(mask, in_reg, 31); - __ Add(out_reg, in_reg, mask); - __ Eor(out_reg, mask, out_reg); - } -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { - ArmVIXLAssembler* assembler = codegen->GetAssembler(); - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. - return; - } - - vixl32::SRegister op1 = SRegisterFrom(op1_loc); - vixl32::SRegister op2 = SRegisterFrom(op2_loc); - vixl32::SRegister out = OutputSRegister(invoke); - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - const vixl32::Register temp1 = temps.Acquire(); - vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0)); - vixl32::Label nan, done; - vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); - - DCHECK(op1.Is(out)); - - __ Vcmp(op1, op2); - __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); - __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling. - - // op1 <> op2 - vixl32::ConditionType cond = is_min ? gt : lt; - { - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 2 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ it(cond); - __ vmov(cond, F32, out, op2); - } - // for <>(not equal), we've done min/max calculation. - __ B(ne, final_label, /* far_target */ false); - - // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0). - __ Vmov(temp1, op1); - __ Vmov(temp2, op2); - if (is_min) { - __ Orr(temp1, temp1, temp2); - } else { - __ And(temp1, temp1, temp2); - } - __ Vmov(out, temp1); - __ B(final_label); - - // handle NaN input. - __ Bind(&nan); - __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN. - __ Vmov(out, temp1); - - if (done.IsReferenced()) { - __ Bind(&done); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); - invoke->GetLocations()->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ true, codegen_); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); - invoke->GetLocations()->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ false, codegen_); -} - -static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { - ArmVIXLAssembler* assembler = codegen->GetAssembler(); - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in. - return; - } - - vixl32::DRegister op1 = DRegisterFrom(op1_loc); - vixl32::DRegister op2 = DRegisterFrom(op2_loc); - vixl32::DRegister out = OutputDRegister(invoke); - vixl32::Label handle_nan_eq, done; - vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); - - DCHECK(op1.Is(out)); - - __ Vcmp(op1, op2); - __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); - __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling. - - // op1 <> op2 - vixl32::ConditionType cond = is_min ? gt : lt; - { - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 2 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ it(cond); - __ vmov(cond, F64, out, op2); - } - // for <>(not equal), we've done min/max calculation. - __ B(ne, final_label, /* far_target */ false); - - // handle op1 == op2, max(+0.0,-0.0). - if (!is_min) { - __ Vand(F64, out, op1, op2); - __ B(final_label); - } - - // handle op1 == op2, min(+0.0,-0.0), NaN input. - __ Bind(&handle_nan_eq); - __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN. - - if (done.IsReferenced()) { - __ Bind(&done); - } -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ true , codegen_); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ false, codegen_); -} - -static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. - return; - } - - vixl32::Register op1_lo = LowRegisterFrom(op1_loc); - vixl32::Register op1_hi = HighRegisterFrom(op1_loc); - vixl32::Register op2_lo = LowRegisterFrom(op2_loc); - vixl32::Register op2_hi = HighRegisterFrom(op2_loc); - vixl32::Register out_lo = LowRegisterFrom(out_loc); - vixl32::Register out_hi = HighRegisterFrom(out_loc); - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - const vixl32::Register temp = temps.Acquire(); - - DCHECK(op1_lo.Is(out_lo)); - DCHECK(op1_hi.Is(out_hi)); - - // Compare op1 >= op2, or op1 < op2. - __ Cmp(out_lo, op2_lo); - __ Sbcs(temp, out_hi, op2_hi); - - // Now GE/LT condition code is correct for the long comparison. - { - vixl32::ConditionType cond = is_min ? ge : lt; - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 3 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ itt(cond); - __ mov(cond, out_lo, op2_lo); - __ mov(cond, out_hi, op2_hi); - } -} - -static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMaxLong(invoke, /* is_min */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMaxLong(invoke, /* is_min */ false, GetAssembler()); -} - -static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { - vixl32::Register op1 = InputRegisterAt(invoke, 0); - vixl32::Register op2 = InputRegisterAt(invoke, 1); - vixl32::Register out = OutputRegister(invoke); - - __ Cmp(op1, op2); - - { - ExactAssemblyScope aas(assembler->GetVIXLAssembler(), - 3 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - - __ ite(is_min ? lt : gt); - __ mov(is_min ? lt : gt, out, op1); - __ mov(is_min ? ge : le, out, op2); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke, /* is_min */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke, /* is_min */ false, GetAssembler()); -} - void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) { CreateFPToFPLocations(allocator_, invoke); } @@ -785,7 +446,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) { void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) { DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions()); ArmVIXLAssembler* assembler = GetAssembler(); - __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); + __ Vrintn(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); } void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) { @@ -815,12 +476,12 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) { // For positive, zero or NaN inputs, rounding is done. __ Cmp(out_reg, 0); - __ B(ge, final_label, /* far_target */ false); + __ B(ge, final_label, /* is_far_target= */ false); // Handle input < 0 cases. // If input is negative but not a tie, previous result (round to nearest) is valid. // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1. - __ Vrinta(F32, F32, temp1, in_reg); + __ Vrinta(F32, temp1, in_reg); __ Vmov(temp2, 0.5); __ Vsub(F32, temp1, in_reg, temp1); __ Vcmp(F32, temp1, temp2); @@ -977,8 +638,11 @@ static void GenUnsafeGet(HInvoke* invoke, if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { Location temp = locations->GetTemp(0); - codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false); + // Piggy-back on the field load path using introspection for the Baker read barrier. + __ Add(RegisterFrom(temp), base, Operand(offset)); + MemOperand src(RegisterFrom(temp), 0); + codegen->GenerateFieldLoadWithBakerReadBarrier( + invoke, trg_loc, base, src, /* needs_null_check= */ false); if (is_volatile) { __ Dmb(vixl32::ISH); } @@ -1069,22 +733,22 @@ void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* inv } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_); } static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, @@ -1114,39 +778,39 @@ static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kInt32, /* is_volatile */ false, invoke); + allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kInt32, /* is_volatile */ false, invoke); + allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kInt32, /* is_volatile */ true, invoke); + allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ true, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kReference, /* is_volatile */ false, invoke); + allocator_, features_, DataType::Type::kReference, /* is_volatile= */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kReference, /* is_volatile */ false, invoke); + allocator_, features_, DataType::Type::kReference, /* is_volatile= */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kReference, /* is_volatile */ true, invoke); + allocator_, features_, DataType::Type::kReference, /* is_volatile= */ true, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kInt64, /* is_volatile */ false, invoke); + allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kInt64, /* is_volatile */ false, invoke); + allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kInt64, /* is_volatile */ true, invoke); + allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ true, invoke); } static void GenUnsafePut(LocationSummary* locations, @@ -1180,7 +844,7 @@ static void GenUnsafePut(LocationSummary* locations, __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg)); __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg)); __ Cmp(temp_lo, 0); - __ B(ne, &loop_head, /* far_target */ false); + __ B(ne, &loop_head, /* is_far_target= */ false); } else { __ Strd(value_lo, value_hi, MemOperand(base, offset)); } @@ -1211,70 +875,68 @@ static void GenUnsafePut(LocationSummary* locations, void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } -static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, - HInvoke* invoke, - DataType::Type type) { +static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) { bool can_call = kEmitCompilerReadBarrier && kUseBakerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); @@ -1284,20 +946,16 @@ static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall, kIntrinsified); + if (can_call) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - // If heap poisoning is enabled, we don't want the unpoisoning - // operations to potentially clobber the output. Likewise when - // emitting a (Baker) read barrier, which may call. - Location::OutputOverlap overlaps = - ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call) - ? Location::kOutputOverlap - : Location::kNoOutputOverlap; - locations->SetOut(Location::RequiresRegister(), overlaps); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); // Temporary registers used in CAS. In the object case // (UnsafeCASObject intrinsic), these are also used for @@ -1306,24 +964,92 @@ static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, locations->AddTemp(Location::RequiresRegister()); // Temp 1. } +class BakerReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + explicit BakerReadBarrierCasSlowPathARMVIXL(HInvoke* invoke) + : SlowPathCodeARMVIXL(invoke) {} + + const char* GetDescription() const override { return "BakerReadBarrierCasSlowPathARMVIXL"; } + + void EmitNativeCode(CodeGenerator* codegen) override { + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + ArmVIXLAssembler* assembler = arm_codegen->GetAssembler(); + __ Bind(GetEntryLabel()); + + LocationSummary* locations = instruction_->GetLocations(); + vixl32::Register base = InputRegisterAt(instruction_, 1); // Object pointer. + vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B). + vixl32::Register expected = InputRegisterAt(instruction_, 3); // Expected. + vixl32::Register value = InputRegisterAt(instruction_, 4); // Value. + + vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0)); // Pointer to actual memory. + vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Temporary. + + // The `tmp` is initialized to `[tmp_ptr] - expected` in the main path. Reconstruct + // and mark the old value and compare with `expected`. We clobber `tmp_ptr` in the + // process due to lack of other temps suitable for the read barrier. + arm_codegen->GenerateUnsafeCasOldValueAddWithBakerReadBarrier(tmp_ptr, tmp, expected); + __ Cmp(tmp_ptr, expected); + __ B(ne, GetExitLabel()); + + // The old value we have read did not match `expected` (which is always a to-space reference) + // but after the read barrier in GenerateUnsafeCasOldValueAddWithBakerReadBarrier() the marked + // to-space value matched, so the old value must be a from-space reference to the same object. + // Do the same CAS loop as the main path but check for both `expected` and the unmarked + // old value representing the to-space and from-space references for the same object. + + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + vixl32::Register adjusted_old_value = temps.Acquire(); // For saved `tmp` from main path. + + // Recalculate the `tmp_ptr` clobbered above and store the `adjusted_old_value`, i.e. IP. + __ Add(tmp_ptr, base, offset); + __ Mov(adjusted_old_value, tmp); + + // do { + // tmp = [r_ptr] - expected; + // } while ((tmp == 0 || tmp == adjusted_old_value) && failure([r_ptr] <- r_new_value)); + // result = (tmp == 0 || tmp == adjusted_old_value); + + vixl32::Label loop_head; + __ Bind(&loop_head); + __ Ldrex(tmp, MemOperand(tmp_ptr)); // This can now load null stored by another thread. + assembler->MaybeUnpoisonHeapReference(tmp); + __ Subs(tmp, tmp, expected); // Use SUBS to get non-zero value if both compares fail. + { + // If the newly loaded value did not match `expected`, compare with `adjusted_old_value`. + ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes); + __ it(ne); + __ cmp(ne, tmp, adjusted_old_value); + } + __ B(ne, GetExitLabel()); + assembler->MaybePoisonHeapReference(value); + __ Strex(tmp, value, MemOperand(tmp_ptr)); + assembler->MaybeUnpoisonHeapReference(value); + __ Cmp(tmp, 0); + __ B(ne, &loop_head, /* is_far_target= */ false); + __ B(GetExitLabel()); + } +}; + static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) { DCHECK_NE(type, DataType::Type::kInt64); ArmVIXLAssembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); - Location out_loc = locations->Out(); vixl32::Register out = OutputRegister(invoke); // Boolean result. vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer. - Location offset_loc = locations->InAt(2); - vixl32::Register offset = LowRegisterFrom(offset_loc); // Offset (discard high 4B). + vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B). vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected. vixl32::Register value = InputRegisterAt(invoke, 4); // Value. - Location tmp_ptr_loc = locations->GetTemp(0); - vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc); // Pointer to actual memory. - vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Value in memory. + vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0)); // Pointer to actual memory. + vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Temporary. + + vixl32::Label loop_exit_label; + vixl32::Label* loop_exit = &loop_exit_label; + vixl32::Label* failure = &loop_exit_label; if (type == DataType::Type::kReference) { // The only read barrier implementation supporting the @@ -1336,87 +1062,63 @@ static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* c codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null); if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // Need to make sure the reference stored in the field is a to-space - // one before attempting the CAS or the CAS could fail incorrectly. - codegen->UpdateReferenceFieldWithBakerReadBarrier( - invoke, - out_loc, // Unused, used only as a "temporary" within the read barrier. - base, - /* field_offset */ offset_loc, - tmp_ptr_loc, - /* needs_null_check */ false, - tmp); + // If marking, check if the stored reference is a from-space reference to the same + // object as the to-space reference `expected`. If so, perform a custom CAS loop. + BakerReadBarrierCasSlowPathARMVIXL* slow_path = + new (codegen->GetScopedAllocator()) BakerReadBarrierCasSlowPathARMVIXL(invoke); + codegen->AddSlowPath(slow_path); + failure = slow_path->GetEntryLabel(); + loop_exit = slow_path->GetExitLabel(); } } // Prevent reordering with prior memory operations. // Emit a DMB ISH instruction instead of an DMB ISHST one, as the - // latter allows a preceding load to be delayed past the STXR + // latter allows a preceding load to be delayed past the STREX // instruction below. __ Dmb(vixl32::ISH); __ Add(tmp_ptr, base, offset); - if (kPoisonHeapReferences && type == DataType::Type::kReference) { - codegen->GetAssembler()->PoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not poison `value`, as it is the same register as - // `expected`, which has just been poisoned. - } else { - codegen->GetAssembler()->PoisonHeapReference(value); - } - } - // do { // tmp = [r_ptr] - expected; // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); - // result = tmp != 0; + // result = tmp == 0; vixl32::Label loop_head; __ Bind(&loop_head); - __ Ldrex(tmp, MemOperand(tmp_ptr)); - + if (type == DataType::Type::kReference) { + assembler->MaybeUnpoisonHeapReference(tmp); + } __ Subs(tmp, tmp, expected); - - { - ExactAssemblyScope aas(assembler->GetVIXLAssembler(), - 3 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - - __ itt(eq); - __ strex(eq, tmp, value, MemOperand(tmp_ptr)); - __ cmp(eq, tmp, 1); + static_cast<vixl32::MacroAssembler*>(assembler->GetVIXLAssembler())-> + B(ne, failure, /* hint= */ (failure == loop_exit) ? kNear : kBranchWithoutHint); + if (type == DataType::Type::kReference) { + assembler->MaybePoisonHeapReference(value); + } + __ Strex(tmp, value, MemOperand(tmp_ptr)); + if (type == DataType::Type::kReference) { + assembler->MaybeUnpoisonHeapReference(value); } + __ Cmp(tmp, 0); + __ B(ne, &loop_head, /* is_far_target= */ false); - __ B(eq, &loop_head, /* far_target */ false); + __ Bind(loop_exit); __ Dmb(vixl32::ISH); - __ Rsbs(out, tmp, 1); + // out = tmp == 0. + __ Clz(out, tmp); + __ Lsr(out, out, WhichPowerOf2(out.GetSizeInBits())); - { - ExactAssemblyScope aas(assembler->GetVIXLAssembler(), - 2 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - - __ it(cc); - __ mov(cc, out, 0); - } - - if (kPoisonHeapReferences && type == DataType::Type::kReference) { - codegen->GetAssembler()->UnpoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not unpoison `value`, as it is the same register as - // `expected`, which has just been unpoisoned. - } else { - codegen->GetAssembler()->UnpoisonHeapReference(value); - } + if (type == DataType::Type::kReference) { + codegen->MaybeGenerateMarkingRegisterCheck(/* code= */ 128); } } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) { - CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kInt32); + CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) { // The only read barrier implementation supporting the @@ -1425,7 +1127,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) { return; } - CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kReference); + CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) { GenCas(invoke, DataType::Type::kInt32, codegen_); @@ -1606,23 +1308,23 @@ static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler, __ Ldr(temp_reg, MemOperand(str, temp1)); __ Ldr(temp2, MemOperand(arg, temp1)); __ Cmp(temp_reg, temp2); - __ B(ne, &find_char_diff, /* far_target */ false); + __ B(ne, &find_char_diff, /* is_far_target= */ false); __ Add(temp1, temp1, char_size * 2); __ Ldr(temp_reg, MemOperand(str, temp1)); __ Ldr(temp2, MemOperand(arg, temp1)); __ Cmp(temp_reg, temp2); - __ B(ne, &find_char_diff_2nd_cmp, /* far_target */ false); + __ B(ne, &find_char_diff_2nd_cmp, /* is_far_target= */ false); __ Add(temp1, temp1, char_size * 2); // With string compression, we have compared 8 bytes, otherwise 4 chars. __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4)); - __ B(hi, &loop, /* far_target */ false); + __ B(hi, &loop, /* is_far_target= */ false); __ B(end); __ Bind(&find_char_diff_2nd_cmp); if (mirror::kUseStringCompression) { __ Subs(temp0, temp0, 4); // 4 bytes previously compared. - __ B(ls, end, /* far_target */ false); // Was the second comparison fully beyond the end? + __ B(ls, end, /* is_far_target= */ false); // Was the second comparison fully beyond the end? } else { // Without string compression, we can start treating temp0 as signed // and rely on the signed comparison below. @@ -1650,7 +1352,7 @@ static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler, // the remaining string data, so just return length diff (out). // The comparison is unsigned for string compression, otherwise signed. __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4))); - __ B((mirror::kUseStringCompression ? ls : le), end, /* far_target */ false); + __ B((mirror::kUseStringCompression ? ls : le), end, /* is_far_target= */ false); // Extract the characters and calculate the difference. if (mirror::kUseStringCompression) { @@ -1717,9 +1419,9 @@ static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler, __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex)); __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex)); __ Cmp(temp_reg, temp3); - __ B(ne, &different_compression_diff, /* far_target */ false); + __ B(ne, &different_compression_diff, /* is_far_target= */ false); __ Subs(temp0, temp0, 2); - __ B(hi, &different_compression_loop, /* far_target */ false); + __ B(hi, &different_compression_loop, /* is_far_target= */ false); __ B(end); // Calculate the difference. @@ -1757,13 +1459,6 @@ static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_lengt } void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) { - if (kEmitCompilerReadBarrier && - !StringEqualsOptimizations(invoke).GetArgumentIsString() && - !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { - // No support for this odd case (String class is moveable, not in the boot image). - return; - } - LocationSummary* locations = new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); InvokeRuntimeCallingConventionARMVIXL calling_convention; @@ -1822,22 +1517,30 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { StringEqualsOptimizations optimizations(invoke); if (!optimizations.GetArgumentNotNull()) { // Check if input is null, return false if it is. - __ CompareAndBranchIfZero(arg, &return_false, /* far_target */ false); + __ CompareAndBranchIfZero(arg, &return_false, /* is_far_target= */ false); } // Reference equality check, return true if same reference. __ Cmp(str, arg); - __ B(eq, &return_true, /* far_target */ false); + __ B(eq, &return_true, /* is_far_target= */ false); if (!optimizations.GetArgumentIsString()) { // Instanceof check for the argument by comparing class fields. // All string objects must have the same type since String cannot be subclassed. // Receiver must be a string object, so its class field is equal to all strings' class fields. // If the argument is a string object, its class field must be equal to receiver's class field. + // + // As the String class is expected to be non-movable, we can read the class + // field from String.equals' arguments without read barriers. + AssertNonMovableStringClass(); + // /* HeapReference<Class> */ temp = str->klass_ __ Ldr(temp, MemOperand(str, class_offset)); + // /* HeapReference<Class> */ out = arg->klass_ __ Ldr(out, MemOperand(arg, class_offset)); + // Also, because we use the previously loaded class references only in the + // following comparison, we don't need to unpoison them. __ Cmp(temp, out); - __ B(ne, &return_false, /* far_target */ false); + __ B(ne, &return_false, /* is_far_target= */ false); } // Check if one of the inputs is a const string. Do not special-case both strings @@ -1860,7 +1563,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { // Also compares the compression style, if differs return false. __ Ldr(temp, MemOperand(arg, count_offset)); __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed))); - __ B(ne, &return_false, /* far_target */ false); + __ B(ne, &return_false, /* is_far_target= */ false); } else { // Load `count` fields of this and argument strings. __ Ldr(temp, MemOperand(str, count_offset)); @@ -1868,7 +1571,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { // Check if `count` fields are equal, return false if they're not. // Also compares the compression style, if differs return false. __ Cmp(temp, out); - __ B(ne, &return_false, /* far_target */ false); + __ B(ne, &return_false, /* is_far_target= */ false); } // Assertions that must hold in order to compare strings 4 bytes at a time. @@ -1891,9 +1594,9 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { __ Ldrd(temp, temp1, MemOperand(str, offset)); __ Ldrd(temp2, out, MemOperand(arg, offset)); __ Cmp(temp, temp2); - __ B(ne, &return_false, /* far_label */ false); + __ B(ne, &return_false, /* is_far_target= */ false); __ Cmp(temp1, out); - __ B(ne, &return_false, /* far_label */ false); + __ B(ne, &return_false, /* is_far_target= */ false); offset += 2u * sizeof(uint32_t); remaining_bytes -= 2u * sizeof(uint32_t); } @@ -1901,13 +1604,13 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { __ Ldr(temp, MemOperand(str, offset)); __ Ldr(out, MemOperand(arg, offset)); __ Cmp(temp, out); - __ B(ne, &return_false, /* far_label */ false); + __ B(ne, &return_false, /* is_far_target= */ false); } } else { // Return true if both strings are empty. Even with string compression `count == 0` means empty. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); - __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false); + __ CompareAndBranchIfZero(temp, &return_true, /* is_far_target= */ false); if (mirror::kUseStringCompression) { // For string compression, calculate the number of bytes to compare (not chars). @@ -1933,10 +1636,10 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { __ Ldr(temp2, MemOperand(arg, temp1)); __ Add(temp1, temp1, Operand::From(sizeof(uint32_t))); __ Cmp(out, temp2); - __ B(ne, &return_false, /* far_target */ false); + __ B(ne, &return_false, /* is_far_target= */ false); // With string compression, we have compared 4 bytes, otherwise 2 chars. __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2); - __ B(hi, &loop, /* far_target */ false); + __ B(hi, &loop, /* is_far_target= */ false); } // Return true and exit the function. @@ -2017,7 +1720,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) { - GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true); + GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true); } void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) { @@ -2033,7 +1736,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) } void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false); + GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false); } void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) { @@ -2137,8 +1840,6 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // is clobbered by ReadBarrierMarkRegX entry points). Get an extra // temporary register from the register allocator. locations->AddTemp(Location::RequiresRegister()); - CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_); - arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations); } } @@ -2257,7 +1958,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } else { if (!optimizations.GetDestinationIsSource()) { __ Cmp(src, dest); - __ B(ne, &conditions_on_positions_validated, /* far_target */ false); + __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false); } __ Cmp(RegisterFrom(dest_pos), src_pos_constant); __ B(gt, intrinsic_slow_path->GetEntryLabel()); @@ -2265,7 +1966,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } else { if (!optimizations.GetDestinationIsSource()) { __ Cmp(src, dest); - __ B(ne, &conditions_on_positions_validated, /* far_target */ false); + __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false); } if (dest_pos.IsConstant()) { int32_t dest_pos_constant = Int32ConstantFrom(dest_pos); @@ -2325,11 +2026,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetSourceIsNonPrimitiveArray()) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false); // Bail out if the source is not a non primitive array. // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false); __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp1` has been unpoisoned // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. @@ -2341,7 +2042,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // /* HeapReference<Class> */ temp1 = dest->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check= */ false); if (!optimizations.GetDestinationIsNonPrimitiveArray()) { // Bail out if the destination is not a non primitive array. @@ -2353,7 +2054,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // temporaries such a `temp1`. // /* HeapReference<Class> */ temp2 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false); + invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check= */ false); __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp2` has been unpoisoned // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. @@ -2367,16 +2068,16 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. // /* HeapReference<Class> */ temp2 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false); + invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check= */ false); // Note: if heap poisoning is on, we are comparing two unpoisoned references here. __ Cmp(temp1, temp2); if (optimizations.GetDestinationIsTypedObjectArray()) { vixl32::Label do_copy; - __ B(eq, &do_copy, /* far_target */ false); + __ B(eq, &do_copy, /* is_far_target= */ false); // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false); // /* HeapReference<Class> */ temp1 = temp1->super_class_ // We do not need to emit a read barrier for the following // heap reference load, as `temp1` is only used in a @@ -2433,7 +2134,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { if (optimizations.GetDestinationIsTypedObjectArray()) { vixl32::Label do_copy; - __ B(eq, &do_copy, /* far_target */ false); + __ B(eq, &do_copy, /* is_far_target= */ false); if (!did_unpoison) { assembler->MaybeUnpoisonHeapReference(temp1); } @@ -2455,10 +2156,10 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false); // /* HeapReference<Class> */ temp3 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false); __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp3` has been unpoisoned // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. @@ -2486,7 +2187,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { if (length.IsRegister()) { // Don't enter the copy loop if the length is null. - __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target */ false); + __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target= */ false); } if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { @@ -2543,7 +2244,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // Given the numeric representation, it's enough to check the low bit of the // rb_state. We do that by shifting the bit out of the lock word with LSRS // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); // Carry flag is the last bit shifted out by LSRS. @@ -2563,7 +2264,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); } __ Cmp(temp1, temp3); - __ B(ne, &loop, /* far_target */ false); + __ B(ne, &loop, /* is_far_target= */ false); __ Bind(read_barrier_slow_path->GetExitLabel()); } else { @@ -2585,13 +2286,13 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); } __ Cmp(temp1, temp3); - __ B(ne, &loop, /* far_target */ false); + __ B(ne, &loop, /* is_far_target= */ false); } __ Bind(&done); } // We only need one card marking on the destination array. - codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false); + codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* can_be_null= */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } @@ -3121,7 +2822,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Subs(num_chr, srcEnd, srcBegin); // Early out for valid zero-length retrievals. - __ B(eq, final_label, /* far_target */ false); + __ B(eq, final_label, /* is_far_target= */ false); // src range to copy. __ Add(src_ptr, srcObj, value_offset); @@ -3137,7 +2838,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Ldr(temp, MemOperand(srcObj, count_offset)); __ Tst(temp, 1); temps.Release(temp); - __ B(eq, &compressed_string_preloop, /* far_target */ false); + __ B(eq, &compressed_string_preloop, /* is_far_target= */ false); } __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1)); @@ -3147,7 +2848,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) temp = temps.Acquire(); // Save repairing the value of num_chr on the < 4 character path. __ Subs(temp, num_chr, 4); - __ B(lt, &remainder, /* far_target */ false); + __ B(lt, &remainder, /* is_far_target= */ false); // Keep the result of the earlier subs, we are going to fetch at least 4 characters. __ Mov(num_chr, temp); @@ -3162,10 +2863,10 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex)); __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex)); temps.Release(temp); - __ B(ge, &loop, /* far_target */ false); + __ B(ge, &loop, /* is_far_target= */ false); __ Adds(num_chr, num_chr, 4); - __ B(eq, final_label, /* far_target */ false); + __ B(eq, final_label, /* is_far_target= */ false); // Main loop for < 4 character case and remainder handling. Loads and stores one // 16-bit Java character at a time. @@ -3175,7 +2876,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Subs(num_chr, num_chr, 1); __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex)); temps.Release(temp); - __ B(gt, &remainder, /* far_target */ false); + __ B(gt, &remainder, /* is_far_target= */ false); if (mirror::kUseStringCompression) { __ B(final_label); @@ -3191,7 +2892,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex)); temps.Release(temp); __ Subs(num_chr, num_chr, 1); - __ B(gt, &compressed_string_loop, /* far_target */ false); + __ B(gt, &compressed_string_loop, /* is_far_target= */ false); } if (done.IsReferenced()) { @@ -3252,7 +2953,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) { void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) { ArmVIXLAssembler* assembler = GetAssembler(); DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions()); - __ Vrintp(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); + __ Vrintp(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); } void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) { @@ -3264,7 +2965,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) { void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) { ArmVIXLAssembler* assembler = GetAssembler(); DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions()); - __ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); + __ Vrintm(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); } void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { @@ -3277,33 +2978,27 @@ void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); ArmVIXLAssembler* const assembler = GetAssembler(); vixl32::Register out = RegisterFrom(locations->Out()); UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); - InvokeRuntimeCallingConventionARMVIXL calling_convention; - vixl32::Register argument = calling_convention.GetRegisterAt(0); if (invoke->InputAt(0)->IsConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - uint32_t address = - dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ Mov(temp, value); assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation @@ -3311,25 +3006,22 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } } else { + DCHECK(locations->CanCall()); vixl32::Register in = RegisterFrom(locations->InAt(0)); // Check bounds of our cache. __ Add(out, in, -info.low); - __ Cmp(out, info.high - info.low + 1); + __ Cmp(out, info.length); vixl32::Label allocate, done; - __ B(hs, &allocate, /* is_far_target */ false); + __ B(hs, &allocate, /* is_far_target= */ false); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address)); + codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference); codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out); assembler->MaybeUnpoisonHeapReference(out); __ B(&done); __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); assembler->StoreToOffset(kStoreWord, in, out, info.value_offset); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. @@ -3353,7 +3045,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) { vixl32::Register temp = temps.Acquire(); vixl32::Label done; vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done); - __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); + __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false); __ Dmb(vixl32::ISH); __ Mov(temp, 0); assembler->StoreToOffset(kStoreWord, temp, tr, offset); @@ -3375,6 +3067,9 @@ UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h index 9c02d0a4ad..1fea776f0d 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.h +++ b/compiler/optimizing/intrinsics_arm_vixl.h @@ -27,14 +27,14 @@ namespace arm { class ArmVIXLAssembler; class CodeGeneratorARMVIXL; -class IntrinsicLocationsBuilderARMVIXL FINAL : public IntrinsicVisitor { +class IntrinsicLocationsBuilderARMVIXL final : public IntrinsicVisitor { public: explicit IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen); // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST @@ -54,14 +54,14 @@ class IntrinsicLocationsBuilderARMVIXL FINAL : public IntrinsicVisitor { DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARMVIXL); }; -class IntrinsicCodeGeneratorARMVIXL FINAL : public IntrinsicVisitor { +class IntrinsicCodeGeneratorARMVIXL final : public IntrinsicVisitor { public: explicit IntrinsicCodeGeneratorARMVIXL(CodeGeneratorARMVIXL* codegen) : codegen_(codegen) {} // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index b7936b9c8e..3da0e578bf 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -58,6 +58,10 @@ inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const { return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint(); } +inline bool IntrinsicCodeGeneratorMIPS::HasMsa() const { + return codegen_->GetInstructionSetFeatures().HasMsa(); +} + #define __ codegen->GetAssembler()-> static void MoveFromReturnRegister(Location trg, @@ -104,7 +108,7 @@ class IntrinsicSlowPathMIPS : public SlowPathCodeMIPS { public: explicit IntrinsicSlowPathMIPS(HInvoke* invoke) : SlowPathCodeMIPS(invoke), invoke_(invoke) { } - void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen_in) override { CodeGeneratorMIPS* codegen = down_cast<CodeGeneratorMIPS*>(codegen_in); __ Bind(GetEntryLabel()); @@ -133,7 +137,7 @@ class IntrinsicSlowPathMIPS : public SlowPathCodeMIPS { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS"; } + const char* GetDescription() const override { return "IntrinsicSlowPathMIPS"; } private: // The instruction where this slow path is happening. @@ -181,7 +185,7 @@ void IntrinsicLocationsBuilderMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invo } void IntrinsicCodeGeneratorMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } // int java.lang.Float.floatToRawIntBits(float) @@ -190,7 +194,7 @@ void IntrinsicLocationsBuilderMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) } void IntrinsicCodeGeneratorMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -222,7 +226,7 @@ void IntrinsicLocationsBuilderMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) } void IntrinsicCodeGeneratorMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } // float java.lang.Float.intBitsToFloat(int) @@ -231,7 +235,7 @@ void IntrinsicLocationsBuilderMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } static void CreateIntToIntLocations(ArenaAllocator* allocator, @@ -407,7 +411,7 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { DataType::Type::kInt32, IsR2OrNewer(), IsR6(), - /* reverseBits */ false, + /* reverseBits= */ false, GetAssembler()); } @@ -421,7 +425,7 @@ void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) { DataType::Type::kInt64, IsR2OrNewer(), IsR6(), - /* reverseBits */ false, + /* reverseBits= */ false, GetAssembler()); } @@ -435,7 +439,7 @@ void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) { DataType::Type::kInt16, IsR2OrNewer(), IsR6(), - /* reverseBits */ false, + /* reverseBits= */ false, GetAssembler()); } @@ -475,7 +479,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* in } void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ false, IsR6(), GetAssembler()); + GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit= */ false, IsR6(), GetAssembler()); } // int java.lang.Long.numberOfLeadingZeros(long i) @@ -484,7 +488,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invok } void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ true, IsR6(), GetAssembler()); + GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit= */ true, IsR6(), GetAssembler()); } static void GenNumberOfTrailingZeroes(LocationSummary* locations, @@ -562,7 +566,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* i } void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ false, IsR6(), GetAssembler()); + GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit= */ false, IsR6(), GetAssembler()); } // int java.lang.Long.numberOfTrailingZeros(long i) @@ -571,7 +575,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invo } void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ true, IsR6(), GetAssembler()); + GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit= */ true, IsR6(), GetAssembler()); } // int java.lang.Integer.reverse(int) @@ -584,7 +588,7 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerReverse(HInvoke* invoke) { DataType::Type::kInt32, IsR2OrNewer(), IsR6(), - /* reverseBits */ true, + /* reverseBits= */ true, GetAssembler()); } @@ -598,7 +602,7 @@ void IntrinsicCodeGeneratorMIPS::VisitLongReverse(HInvoke* invoke) { DataType::Type::kInt64, IsR2OrNewer(), IsR6(), - /* reverseBits */ true, + /* reverseBits= */ true, GetAssembler()); } @@ -612,6 +616,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { static void GenBitCount(LocationSummary* locations, DataType::Type type, bool isR6, + bool hasMsa, MipsAssembler* assembler) { Register out = locations->Out().AsRegister<Register>(); @@ -637,85 +642,102 @@ static void GenBitCount(LocationSummary* locations, // instructions compared to a loop-based algorithm which required 47 // instructions. - if (type == DataType::Type::kInt32) { - Register in = locations->InAt(0).AsRegister<Register>(); - - __ Srl(TMP, in, 1); - __ LoadConst32(AT, 0x55555555); - __ And(TMP, TMP, AT); - __ Subu(TMP, in, TMP); - __ LoadConst32(AT, 0x33333333); - __ And(out, TMP, AT); - __ Srl(TMP, TMP, 2); - __ And(TMP, TMP, AT); - __ Addu(TMP, out, TMP); - __ Srl(out, TMP, 4); - __ Addu(out, out, TMP); - __ LoadConst32(AT, 0x0F0F0F0F); - __ And(out, out, AT); - __ LoadConst32(TMP, 0x01010101); - if (isR6) { - __ MulR6(out, out, TMP); + if (hasMsa) { + if (type == DataType::Type::kInt32) { + Register in = locations->InAt(0).AsRegister<Register>(); + __ Mtc1(in, FTMP); + __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); + __ Mfc1(out, FTMP); } else { - __ MulR2(out, out, TMP); + DCHECK_EQ(type, DataType::Type::kInt64); + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + __ Mtc1(in_lo, FTMP); + __ Mthc1(in_hi, FTMP); + __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); + __ Mfc1(out, FTMP); } - __ Srl(out, out, 24); } else { - DCHECK_EQ(type, DataType::Type::kInt64); - Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register tmp_hi = locations->GetTemp(0).AsRegister<Register>(); - Register out_hi = locations->GetTemp(1).AsRegister<Register>(); - Register tmp_lo = TMP; - Register out_lo = out; + if (type == DataType::Type::kInt32) { + Register in = locations->InAt(0).AsRegister<Register>(); - __ Srl(tmp_lo, in_lo, 1); - __ Srl(tmp_hi, in_hi, 1); + __ Srl(TMP, in, 1); + __ LoadConst32(AT, 0x55555555); + __ And(TMP, TMP, AT); + __ Subu(TMP, in, TMP); + __ LoadConst32(AT, 0x33333333); + __ And(out, TMP, AT); + __ Srl(TMP, TMP, 2); + __ And(TMP, TMP, AT); + __ Addu(TMP, out, TMP); + __ Srl(out, TMP, 4); + __ Addu(out, out, TMP); + __ LoadConst32(AT, 0x0F0F0F0F); + __ And(out, out, AT); + __ LoadConst32(TMP, 0x01010101); + if (isR6) { + __ MulR6(out, out, TMP); + } else { + __ MulR2(out, out, TMP); + } + __ Srl(out, out, 24); + } else { + DCHECK_EQ(type, DataType::Type::kInt64); + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register tmp_hi = locations->GetTemp(0).AsRegister<Register>(); + Register out_hi = locations->GetTemp(1).AsRegister<Register>(); + Register tmp_lo = TMP; + Register out_lo = out; - __ LoadConst32(AT, 0x55555555); + __ Srl(tmp_lo, in_lo, 1); + __ Srl(tmp_hi, in_hi, 1); - __ And(tmp_lo, tmp_lo, AT); - __ Subu(tmp_lo, in_lo, tmp_lo); + __ LoadConst32(AT, 0x55555555); - __ And(tmp_hi, tmp_hi, AT); - __ Subu(tmp_hi, in_hi, tmp_hi); + __ And(tmp_lo, tmp_lo, AT); + __ Subu(tmp_lo, in_lo, tmp_lo); - __ LoadConst32(AT, 0x33333333); + __ And(tmp_hi, tmp_hi, AT); + __ Subu(tmp_hi, in_hi, tmp_hi); - __ And(out_lo, tmp_lo, AT); - __ Srl(tmp_lo, tmp_lo, 2); - __ And(tmp_lo, tmp_lo, AT); - __ Addu(tmp_lo, out_lo, tmp_lo); + __ LoadConst32(AT, 0x33333333); - __ And(out_hi, tmp_hi, AT); - __ Srl(tmp_hi, tmp_hi, 2); - __ And(tmp_hi, tmp_hi, AT); - __ Addu(tmp_hi, out_hi, tmp_hi); + __ And(out_lo, tmp_lo, AT); + __ Srl(tmp_lo, tmp_lo, 2); + __ And(tmp_lo, tmp_lo, AT); + __ Addu(tmp_lo, out_lo, tmp_lo); - // Here we deviate from the original algorithm a bit. We've reached - // the stage where the bitfields holding the subtotals are large - // enough to hold the combined subtotals for both the low word, and - // the high word. This means that we can add the subtotals for the - // the high, and low words into a single word, and compute the final - // result for both the high, and low words using fewer instructions. - __ LoadConst32(AT, 0x0F0F0F0F); + __ And(out_hi, tmp_hi, AT); + __ Srl(tmp_hi, tmp_hi, 2); + __ And(tmp_hi, tmp_hi, AT); + __ Addu(tmp_hi, out_hi, tmp_hi); - __ Addu(TMP, tmp_hi, tmp_lo); + // Here we deviate from the original algorithm a bit. We've reached + // the stage where the bitfields holding the subtotals are large + // enough to hold the combined subtotals for both the low word, and + // the high word. This means that we can add the subtotals for the + // the high, and low words into a single word, and compute the final + // result for both the high, and low words using fewer instructions. + __ LoadConst32(AT, 0x0F0F0F0F); - __ Srl(out, TMP, 4); - __ And(out, out, AT); - __ And(TMP, TMP, AT); - __ Addu(out, out, TMP); + __ Addu(TMP, tmp_hi, tmp_lo); - __ LoadConst32(AT, 0x01010101); + __ Srl(out, TMP, 4); + __ And(out, out, AT); + __ And(TMP, TMP, AT); + __ Addu(out, out, TMP); - if (isR6) { - __ MulR6(out, out, AT); - } else { - __ MulR2(out, out, AT); - } + __ LoadConst32(AT, 0x01010101); - __ Srl(out, out, 24); + if (isR6) { + __ MulR6(out, out, AT); + } else { + __ MulR2(out, out, AT); + } + + __ Srl(out, out, 24); + } } } @@ -725,7 +747,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), HasMsa(), GetAssembler()); } // int java.lang.Long.bitCount(int) @@ -739,575 +761,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), GetAssembler()); -} - -static void MathAbsFP(LocationSummary* locations, - bool is64bit, - bool isR2OrNewer, - bool isR6, - MipsAssembler* assembler) { - FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - - // Note, as a "quality of implementation", rather than pure "spec compliance", we require that - // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN - // (signaling NaN may become quiet though). - // - // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case, - // both regular floating point numbers and NAN values are treated alike, only the sign bit is - // affected by this instruction. - // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any - // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be - // changed when doing abs(NaN). Because of that, we clear sign bit in a different way. - if (isR6) { - if (is64bit) { - __ AbsD(out, in); - } else { - __ AbsS(out, in); - } - } else { - if (is64bit) { - if (in != out) { - __ MovD(out, in); - } - __ MoveFromFpuHigh(TMP, in); - // ins instruction is not available for R1. - if (isR2OrNewer) { - __ Ins(TMP, ZERO, 31, 1); - } else { - __ Sll(TMP, TMP, 1); - __ Srl(TMP, TMP, 1); - } - __ MoveToFpuHigh(TMP, out); - } else { - __ Mfc1(TMP, in); - // ins instruction is not available for R1. - if (isR2OrNewer) { - __ Ins(TMP, ZERO, 31, 1); - } else { - __ Sll(TMP, TMP, 1); - __ Srl(TMP, TMP, 1); - } - __ Mtc1(TMP, out); - } - } -} - -// double java.lang.Math.abs(double) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, IsR2OrNewer(), IsR6(), GetAssembler()); -} - -// float java.lang.Math.abs(float) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, IsR2OrNewer(), IsR6(), GetAssembler()); -} - -static void GenAbsInteger(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { - if (is64bit) { - Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - // The comments in this section show the analogous operations which would - // be performed if we had 64-bit registers "in", and "out". - // __ Dsra32(AT, in, 31); - __ Sra(AT, in_hi, 31); - // __ Xor(out, in, AT); - __ Xor(TMP, in_lo, AT); - __ Xor(out_hi, in_hi, AT); - // __ Dsubu(out, out, AT); - __ Subu(out_lo, TMP, AT); - __ Sltu(TMP, out_lo, TMP); - __ Addu(out_hi, out_hi, TMP); - } else { - Register in = locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - __ Sra(AT, in, 31); - __ Xor(out, in, AT); - __ Subu(out, out, AT); - } -} - -// int java.lang.Math.abs(int) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -// long java.lang.Math.abs(long) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - DataType::Type type, - bool is_R6, - MipsAssembler* assembler) { - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - FRegister a = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister b = locations->InAt(1).AsFpuRegister<FRegister>(); - - if (is_R6) { - MipsLabel noNaNs; - MipsLabel done; - FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; - - // When Java computes min/max it prefers a NaN to a number; the - // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of - // the inputs is a NaN and the other is a valid number, the MIPS - // instruction will return the number; Java wants the NaN value - // returned. This is why there is extra logic preceding the use of - // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a - // NaN, return the NaN, otherwise return the min/max. - if (type == DataType::Type::kFloat64) { - __ CmpUnD(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqD(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelD(ftmp, a, b); - - if (ftmp != out) { - __ MovD(out, ftmp); - } - - __ B(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinD(out, a, b); - } else { - __ MaxD(out, a, b); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CmpUnS(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqS(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelS(ftmp, a, b); - - if (ftmp != out) { - __ MovS(out, ftmp); - } - - __ B(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinS(out, a, b); - } else { - __ MaxS(out, a, b); - } - } - - __ Bind(&done); - } else { - MipsLabel ordered; - MipsLabel compare; - MipsLabel select; - MipsLabel done; - - if (type == DataType::Type::kFloat64) { - __ CunD(a, b); - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CunS(a, b); - } - __ Bc1f(&ordered); - - // a or b (or both) is a NaN. Return one, which is a NaN. - if (type == DataType::Type::kFloat64) { - __ CeqD(b, b); - } else { - __ CeqS(b, b); - } - __ B(&select); - - __ Bind(&ordered); - - // Neither is a NaN. - // a == b? (-0.0 compares equal with +0.0) - // If equal, handle zeroes, else compare further. - if (type == DataType::Type::kFloat64) { - __ CeqD(a, b); - } else { - __ CeqS(a, b); - } - __ Bc1f(&compare); - - // a == b either bit for bit or one is -0.0 and the other is +0.0. - if (type == DataType::Type::kFloat64) { - __ MoveFromFpuHigh(TMP, a); - __ MoveFromFpuHigh(AT, b); - } else { - __ Mfc1(TMP, a); - __ Mfc1(AT, b); - } - - if (is_min) { - // -0.0 prevails over +0.0. - __ Or(TMP, TMP, AT); - } else { - // +0.0 prevails over -0.0. - __ And(TMP, TMP, AT); - } - - if (type == DataType::Type::kFloat64) { - __ Mfc1(AT, a); - __ Mtc1(AT, out); - __ MoveToFpuHigh(TMP, out); - } else { - __ Mtc1(TMP, out); - } - __ B(&done); - - __ Bind(&compare); - - if (type == DataType::Type::kFloat64) { - if (is_min) { - // return (a <= b) ? a : b; - __ ColeD(a, b); - } else { - // return (a >= b) ? a : b; - __ ColeD(b, a); // b <= a - } - } else { - if (is_min) { - // return (a <= b) ? a : b; - __ ColeS(a, b); - } else { - // return (a >= b) ? a : b; - __ ColeS(b, a); // b <= a - } - } - - __ Bind(&select); - - if (type == DataType::Type::kFloat64) { - __ MovtD(out, a); - __ MovfD(out, b); - } else { - __ MovtS(out, a); - __ MovfS(out, b); - } - - __ Bind(&done); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); -} - -// double java.lang.Math.min(double, double) -void IntrinsicLocationsBuilderMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kFloat64, - IsR6(), - GetAssembler()); -} - -// float java.lang.Math.min(float, float) -void IntrinsicLocationsBuilderMIPS::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kFloat32, - IsR6(), - GetAssembler()); -} - -// double java.lang.Math.max(double, double) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kFloat64, - IsR6(), - GetAssembler()); -} - -// float java.lang.Math.max(float, float) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kFloat32, - IsR6(), - GetAssembler()); -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - DataType::Type type, - bool is_R6, - MipsAssembler* assembler) { - if (is_R6) { - // Some architectures, such as ARM and MIPS (prior to r6), have a - // conditional move instruction which only changes the target - // (output) register if the condition is true (MIPS prior to r6 had - // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions - // always change the target (output) register. If the condition is - // true the output register gets the contents of the "rs" register; - // otherwise, the output register is set to zero. One consequence - // of this is that to implement something like "rd = c==0 ? rs : rt" - // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions. - // After executing this pair of instructions one of the output - // registers from the pair will necessarily contain zero. Then the - // code ORs the output registers from the SELEQZ/SELNEZ instructions - // to get the final result. - // - // The initial test to see if the output register is same as the - // first input register is needed to make sure that value in the - // first input register isn't clobbered before we've finished - // computing the output value. The logic in the corresponding else - // clause performs the same task but makes sure the second input - // register isn't clobbered in the event that it's the same register - // as the output register; the else clause also handles the case - // where the output register is distinct from both the first, and the - // second input registers. - if (type == DataType::Type::kInt64) { - Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); - Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - MipsLabel compare_done; - - if (a_lo == b_lo) { - if (out_lo != a_lo) { - __ Move(out_lo, a_lo); - __ Move(out_hi, a_hi); - } - } else { - __ Slt(TMP, b_hi, a_hi); - __ Bne(b_hi, a_hi, &compare_done); - - __ Sltu(TMP, b_lo, a_lo); - - __ Bind(&compare_done); - - if (is_min) { - __ Seleqz(AT, a_lo, TMP); - __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo - // because at this point we're - // done using a_lo/b_lo. - } else { - __ Selnez(AT, a_lo, TMP); - __ Seleqz(out_lo, b_lo, TMP); // ditto - } - __ Or(out_lo, out_lo, AT); - if (is_min) { - __ Seleqz(AT, a_hi, TMP); - __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi - } else { - __ Selnez(AT, a_hi, TMP); - __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi - } - __ Or(out_hi, out_hi, AT); - } - } else { - DCHECK_EQ(type, DataType::Type::kInt32); - Register a = locations->InAt(0).AsRegister<Register>(); - Register b = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - if (a == b) { - if (out != a) { - __ Move(out, a); - } - } else { - __ Slt(AT, b, a); - if (is_min) { - __ Seleqz(TMP, a, AT); - __ Selnez(AT, b, AT); - } else { - __ Selnez(TMP, a, AT); - __ Seleqz(AT, b, AT); - } - __ Or(out, TMP, AT); - } - } - } else { - if (type == DataType::Type::kInt64) { - Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); - Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - MipsLabel compare_done; - - if (a_lo == b_lo) { - if (out_lo != a_lo) { - __ Move(out_lo, a_lo); - __ Move(out_hi, a_hi); - } - } else { - __ Slt(TMP, a_hi, b_hi); - __ Bne(a_hi, b_hi, &compare_done); - - __ Sltu(TMP, a_lo, b_lo); - - __ Bind(&compare_done); - - if (is_min) { - if (out_lo != a_lo) { - __ Movn(out_hi, a_hi, TMP); - __ Movn(out_lo, a_lo, TMP); - } - if (out_lo != b_lo) { - __ Movz(out_hi, b_hi, TMP); - __ Movz(out_lo, b_lo, TMP); - } - } else { - if (out_lo != a_lo) { - __ Movz(out_hi, a_hi, TMP); - __ Movz(out_lo, a_lo, TMP); - } - if (out_lo != b_lo) { - __ Movn(out_hi, b_hi, TMP); - __ Movn(out_lo, b_lo, TMP); - } - } - } - } else { - DCHECK_EQ(type, DataType::Type::kInt32); - Register a = locations->InAt(0).AsRegister<Register>(); - Register b = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - if (a == b) { - if (out != a) { - __ Move(out, a); - } - } else { - __ Slt(AT, a, b); - if (is_min) { - if (out != a) { - __ Movn(out, a, AT); - } - if (out != b) { - __ Movz(out, b, AT); - } - } else { - if (out != a) { - __ Movz(out, a, AT); - } - if (out != b) { - __ Movn(out, b, AT); - } - } - } - } - } -} - -// int java.lang.Math.min(int, int) -void IntrinsicLocationsBuilderMIPS::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kInt32, - IsR6(), - GetAssembler()); -} - -// long java.lang.Math.min(long, long) -void IntrinsicLocationsBuilderMIPS::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kInt64, - IsR6(), - GetAssembler()); -} - -// int java.lang.Math.max(int, int) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kInt32, - IsR6(), - GetAssembler()); -} - -// long java.lang.Math.max(long, long) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kInt64, - IsR6(), - GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), HasMsa(), GetAssembler()); } // double java.lang.Math.sqrt(double) @@ -1601,11 +1055,11 @@ static void GenUnsafeGet(HInvoke* invoke, codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, trg_loc, base, - /* offset */ 0U, - /* index */ offset_loc, + /* offset= */ 0U, + /* index= */ offset_loc, TIMES_1, temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); if (is_volatile) { __ Sync(0); } @@ -1623,8 +1077,8 @@ static void GenUnsafeGet(HInvoke* invoke, trg_loc, trg_loc, base_loc, - /* offset */ 0U, - /* index */ offset_loc); + /* offset= */ 0U, + /* index= */ offset_loc); } } else { if (is_R6) { @@ -1653,7 +1107,7 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafeGet(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, IsR6(), codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, IsR6(), codegen_); } // int sun.misc.Unsafe.getIntVolatile(Object o, long offset) @@ -1662,7 +1116,7 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, IsR6(), codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, IsR6(), codegen_); } // long sun.misc.Unsafe.getLong(Object o, long offset) @@ -1671,7 +1125,7 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, IsR6(), codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, IsR6(), codegen_); } // Object sun.misc.Unsafe.getObject(Object o, long offset) @@ -1680,7 +1134,7 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObject(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, IsR6(), codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, IsR6(), codegen_); } // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset) @@ -1689,7 +1143,7 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, IsR6(), codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, IsR6(), codegen_); } static void CreateIntIntIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -1771,8 +1225,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePut(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS::VisitUnsafePut(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, IsR6(), codegen_); } @@ -1785,8 +1239,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutOrdered(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS::VisitUnsafePutOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, IsR6(), codegen_); } @@ -1799,8 +1253,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutVolatile(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS::VisitUnsafePutVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, IsR6(), codegen_); } @@ -1813,8 +1267,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObject(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObject(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, IsR6(), codegen_); } @@ -1827,8 +1281,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObjectOrdered(HInvoke* invoke) void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObjectOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, IsR6(), codegen_); } @@ -1841,8 +1295,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObjectVolatile(HInvoke* invoke void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObjectVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, IsR6(), codegen_); } @@ -1855,8 +1309,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLong(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLong(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, IsR6(), codegen_); } @@ -1869,8 +1323,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLongOrdered(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLongOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, IsR6(), codegen_); } @@ -1934,12 +1388,12 @@ static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorMIPS* code invoke, out_loc, // Unused, used only as a "temporary" within the read barrier. base, - /* offset */ 0u, - /* index */ offset_loc, + /* offset= */ 0u, + /* index= */ offset_loc, ScaleFactor::TIMES_1, temp, - /* needs_null_check */ false, - /* always_update_field */ true); + /* needs_null_check= */ false, + /* always_update_field= */ true); } } @@ -2062,13 +1516,6 @@ void IntrinsicCodeGeneratorMIPS::VisitStringCompareTo(HInvoke* invoke) { // boolean java.lang.String.equals(Object anObject) void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) { - if (kEmitCompilerReadBarrier && - !StringEqualsOptimizations(invoke).GetArgumentIsString() && - !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { - // No support for this odd case (String class is moveable, not in the boot image). - return; - } - LocationSummary* locations = new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); @@ -2128,8 +1575,16 @@ void IntrinsicCodeGeneratorMIPS::VisitStringEquals(HInvoke* invoke) { // All string objects must have the same type since String cannot be subclassed. // Receiver must be a string object, so its class field is equal to all strings' class fields. // If the argument is a string object, its class field must be equal to receiver's class field. + // + // As the String class is expected to be non-movable, we can read the class + // field from String.equals' arguments without read barriers. + AssertNonMovableStringClass(); + // /* HeapReference<Class> */ temp1 = str->klass_ __ Lw(temp1, str, class_offset); + // /* HeapReference<Class> */ temp2 = arg->klass_ __ Lw(temp2, arg, class_offset); + // Also, because we use the previously loaded class references only in the + // following comparison, we don't need to unpoison them. __ Bne(temp1, temp2, &return_false); } @@ -2259,7 +1714,7 @@ void IntrinsicLocationsBuilderMIPS::VisitStringIndexOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitStringIndexOf(HInvoke* invoke) { - GenerateStringIndexOf(invoke, /* start_at_zero */ true, GetAssembler(), codegen_); + GenerateStringIndexOf(invoke, /* start_at_zero= */ true, GetAssembler(), codegen_); } // int java.lang.String.indexOf(int ch, int fromIndex) @@ -2280,7 +1735,7 @@ void IntrinsicLocationsBuilderMIPS::VisitStringIndexOfAfter(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateStringIndexOf(invoke, /* start_at_zero */ false, GetAssembler(), codegen_); + GenerateStringIndexOf(invoke, /* start_at_zero= */ false, GetAssembler(), codegen_); } // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount) @@ -3147,59 +2602,50 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); MipsAssembler* assembler = GetAssembler(); InstructionCodeGeneratorMIPS* icodegen = down_cast<InstructionCodeGeneratorMIPS*>(codegen_->GetInstructionVisitor()); Register out = locations->Out().AsRegister<Register>(); - InvokeRuntimeCallingConvention calling_convention; if (invoke->InputAt(0)->IsConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ LoadConst32(out, address); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - uint32_t address = - dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ LoadConst32(calling_convention.GetRegisterAt(0), address); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } } else { + DCHECK(locations->CanCall()); Register in = locations->InAt(0).AsRegister<Register>(); MipsLabel allocate, done; - int32_t count = static_cast<uint32_t>(info.high) - info.low + 1; - // Is (info.low <= in) && (in <= info.high)? __ Addiu32(out, in, -info.low); - // As unsigned quantities is out < (info.high - info.low + 1)? - if (IsInt<16>(count)) { - __ Sltiu(AT, out, count); + // As unsigned quantities is out < info.length ? + if (IsUint<15>(info.length)) { + __ Sltiu(AT, out, info.length); } else { - __ LoadConst32(AT, count); + __ LoadConst32(AT, info.length); __ Sltu(AT, out, AT); } - // Branch if out >= (info.high - info.low + 1). - // This means that "in" is outside of the range [info.low, info.high]. + // Branch if out >= info.length. This means that "in" is outside of the valid range. __ Beqz(AT, &allocate); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - __ LoadConst32(TMP, data_offset + address); + codegen_->LoadBootImageAddress(TMP, info.array_data_boot_image_reference); __ ShiftAndAdd(out, out, TMP, TIMES_4); __ Lw(out, out, 0); __ MaybeUnpoisonHeapReference(out); @@ -3207,10 +2653,8 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerValueOf(HInvoke* invoke) { __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ LoadConst32(calling_convention.GetRegisterAt(0), address); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ StoreToOffset(kStoreWord, in, out, info.value_offset); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. @@ -3260,6 +2704,10 @@ UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong) UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(MIPS, SystemArrayCopy) +UNIMPLEMENTED_INTRINSIC(MIPS, CRC32Update) +UNIMPLEMENTED_INTRINSIC(MIPS, CRC32UpdateBytes) +UNIMPLEMENTED_INTRINSIC(MIPS, CRC32UpdateByteBuffer) + UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOfAfter); UNIMPLEMENTED_INTRINSIC(MIPS, StringBufferAppend); diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h index 13397f11d4..08d4e82139 100644 --- a/compiler/optimizing/intrinsics_mips.h +++ b/compiler/optimizing/intrinsics_mips.h @@ -30,14 +30,14 @@ namespace mips { class CodeGeneratorMIPS; class MipsAssembler; -class IntrinsicLocationsBuilderMIPS FINAL : public IntrinsicVisitor { +class IntrinsicLocationsBuilderMIPS final : public IntrinsicVisitor { public: explicit IntrinsicLocationsBuilderMIPS(CodeGeneratorMIPS* codegen); // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST @@ -55,14 +55,14 @@ class IntrinsicLocationsBuilderMIPS FINAL : public IntrinsicVisitor { DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS); }; -class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor { +class IntrinsicCodeGeneratorMIPS final : public IntrinsicVisitor { public: explicit IntrinsicCodeGeneratorMIPS(CodeGeneratorMIPS* codegen) : codegen_(codegen) {} // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST @@ -71,6 +71,7 @@ class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor { bool IsR2OrNewer() const; bool IsR6() const; bool Is32BitFPU() const; + bool HasMsa() const; private: MipsAssembler* GetAssembler(); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 4668c561ed..3e687652d3 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -46,6 +46,10 @@ ArenaAllocator* IntrinsicCodeGeneratorMIPS64::GetAllocator() { return codegen_->GetGraph()->GetAllocator(); } +inline bool IntrinsicCodeGeneratorMIPS64::HasMsa() const { + return codegen_->GetInstructionSetFeatures().HasMsa(); +} + #define __ codegen->GetAssembler()-> static void MoveFromReturnRegister(Location trg, @@ -93,7 +97,7 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { explicit IntrinsicSlowPathMIPS64(HInvoke* invoke) : SlowPathCodeMIPS64(invoke), invoke_(invoke) { } - void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen_in) override { CodeGeneratorMIPS64* codegen = down_cast<CodeGeneratorMIPS64*>(codegen_in); __ Bind(GetEntryLabel()); @@ -122,7 +126,7 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { __ Bc(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS64"; } + const char* GetDescription() const override { return "IntrinsicSlowPathMIPS64"; } private: // The instruction where this slow path is happening. @@ -165,7 +169,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* in } void IntrinsicCodeGeneratorMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } // int java.lang.Float.floatToRawIntBits(float) @@ -174,7 +178,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invok } void IntrinsicCodeGeneratorMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -201,7 +205,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invok } void IntrinsicCodeGeneratorMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } // float java.lang.Float.intBitsToFloat(int) @@ -210,7 +214,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) } void IntrinsicCodeGeneratorMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -291,7 +295,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* } void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } // int java.lang.Long.numberOfLeadingZeros(long i) @@ -300,7 +304,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* inv } void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } static void GenNumberOfTrailingZeroes(LocationSummary* locations, @@ -328,7 +332,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* } void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } // int java.lang.Long.numberOfTrailingZeros(long i) @@ -337,7 +341,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* in } void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } static void GenReverse(LocationSummary* locations, @@ -386,6 +390,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { static void GenBitCount(LocationSummary* locations, const DataType::Type type, + const bool hasMsa, Mips64Assembler* assembler) { GpuRegister out = locations->Out().AsRegister<GpuRegister>(); GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); @@ -414,41 +419,52 @@ static void GenBitCount(LocationSummary* locations, // bits are set but the algorithm here attempts to minimize the total // number of instructions executed even when a large number of bits // are set. - - if (type == DataType::Type::kInt32) { - __ Srl(TMP, in, 1); - __ LoadConst32(AT, 0x55555555); - __ And(TMP, TMP, AT); - __ Subu(TMP, in, TMP); - __ LoadConst32(AT, 0x33333333); - __ And(out, TMP, AT); - __ Srl(TMP, TMP, 2); - __ And(TMP, TMP, AT); - __ Addu(TMP, out, TMP); - __ Srl(out, TMP, 4); - __ Addu(out, out, TMP); - __ LoadConst32(AT, 0x0F0F0F0F); - __ And(out, out, AT); - __ LoadConst32(TMP, 0x01010101); - __ MulR6(out, out, TMP); - __ Srl(out, out, 24); - } else if (type == DataType::Type::kInt64) { - __ Dsrl(TMP, in, 1); - __ LoadConst64(AT, 0x5555555555555555L); - __ And(TMP, TMP, AT); - __ Dsubu(TMP, in, TMP); - __ LoadConst64(AT, 0x3333333333333333L); - __ And(out, TMP, AT); - __ Dsrl(TMP, TMP, 2); - __ And(TMP, TMP, AT); - __ Daddu(TMP, out, TMP); - __ Dsrl(out, TMP, 4); - __ Daddu(out, out, TMP); - __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL); - __ And(out, out, AT); - __ LoadConst64(TMP, 0x0101010101010101L); - __ Dmul(out, out, TMP); - __ Dsrl32(out, out, 24); + if (hasMsa) { + if (type == DataType::Type::kInt32) { + __ Mtc1(in, FTMP); + __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); + __ Mfc1(out, FTMP); + } else { + __ Dmtc1(in, FTMP); + __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); + __ Dmfc1(out, FTMP); + } + } else { + if (type == DataType::Type::kInt32) { + __ Srl(TMP, in, 1); + __ LoadConst32(AT, 0x55555555); + __ And(TMP, TMP, AT); + __ Subu(TMP, in, TMP); + __ LoadConst32(AT, 0x33333333); + __ And(out, TMP, AT); + __ Srl(TMP, TMP, 2); + __ And(TMP, TMP, AT); + __ Addu(TMP, out, TMP); + __ Srl(out, TMP, 4); + __ Addu(out, out, TMP); + __ LoadConst32(AT, 0x0F0F0F0F); + __ And(out, out, AT); + __ LoadConst32(TMP, 0x01010101); + __ MulR6(out, out, TMP); + __ Srl(out, out, 24); + } else { + __ Dsrl(TMP, in, 1); + __ LoadConst64(AT, 0x5555555555555555L); + __ And(TMP, TMP, AT); + __ Dsubu(TMP, in, TMP); + __ LoadConst64(AT, 0x3333333333333333L); + __ And(out, TMP, AT); + __ Dsrl(TMP, TMP, 2); + __ And(TMP, TMP, AT); + __ Daddu(TMP, out, TMP); + __ Dsrl(out, TMP, 4); + __ Daddu(out, out, TMP); + __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL); + __ And(out, out, AT); + __ LoadConst64(TMP, 0x0101010101010101L); + __ Dmul(out, out, TMP); + __ Dsrl32(out, out, 24); + } } } @@ -458,7 +474,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, HasMsa(), GetAssembler()); } // int java.lang.Long.bitCount(long) @@ -467,291 +483,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); -} - -static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { - FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - - if (is64bit) { - __ AbsD(out, in); - } else { - __ AbsS(out, in); - } -} - -// double java.lang.Math.abs(double) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -// float java.lang.Math.abs(float) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -static void CreateIntToInt(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -static void GenAbsInteger(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { - GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (is64bit) { - __ Dsra32(AT, in, 31); - __ Xor(out, in, AT); - __ Dsubu(out, out, AT); - } else { - __ Sra(AT, in, 31); - __ Xor(out, in, AT); - __ Subu(out, out, AT); - } -} - -// int java.lang.Math.abs(int) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToInt(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -// long java.lang.Math.abs(long) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToInt(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - DataType::Type type, - Mips64Assembler* assembler) { - FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - - Mips64Label noNaNs; - Mips64Label done; - FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; - - // When Java computes min/max it prefers a NaN to a number; the - // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of - // the inputs is a NaN and the other is a valid number, the MIPS - // instruction will return the number; Java wants the NaN value - // returned. This is why there is extra logic preceding the use of - // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a - // NaN, return the NaN, otherwise return the min/max. - if (type == DataType::Type::kFloat64) { - __ CmpUnD(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqD(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelD(ftmp, a, b); - - if (ftmp != out) { - __ MovD(out, ftmp); - } - - __ Bc(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinD(out, a, b); - } else { - __ MaxD(out, a, b); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CmpUnS(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqS(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelS(ftmp, a, b); - - if (ftmp != out) { - __ MovS(out, ftmp); - } - - __ Bc(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinS(out, a, b); - } else { - __ MaxS(out, a, b); - } - } - - __ Bind(&done); -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); -} - -// double java.lang.Math.min(double, double) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat64, GetAssembler()); -} - -// float java.lang.Math.min(float, float) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat32, GetAssembler()); -} - -// double java.lang.Math.max(double, double) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat64, GetAssembler()); -} - -// float java.lang.Math.max(float, float) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat32, GetAssembler()); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - Mips64Assembler* assembler) { - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (lhs == rhs) { - if (out != lhs) { - __ Move(out, lhs); - } - } else { - // Some architectures, such as ARM and MIPS (prior to r6), have a - // conditional move instruction which only changes the target - // (output) register if the condition is true (MIPS prior to r6 had - // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always - // change the target (output) register. If the condition is true the - // output register gets the contents of the "rs" register; otherwise, - // the output register is set to zero. One consequence of this is - // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6 - // needs to use a pair of SELEQZ/SELNEZ instructions. After - // executing this pair of instructions one of the output registers - // from the pair will necessarily contain zero. Then the code ORs the - // output registers from the SELEQZ/SELNEZ instructions to get the - // final result. - // - // The initial test to see if the output register is same as the - // first input register is needed to make sure that value in the - // first input register isn't clobbered before we've finished - // computing the output value. The logic in the corresponding else - // clause performs the same task but makes sure the second input - // register isn't clobbered in the event that it's the same register - // as the output register; the else clause also handles the case - // where the output register is distinct from both the first, and the - // second input registers. - if (out == lhs) { - __ Slt(AT, rhs, lhs); - if (is_min) { - __ Seleqz(out, lhs, AT); - __ Selnez(AT, rhs, AT); - } else { - __ Selnez(out, lhs, AT); - __ Seleqz(AT, rhs, AT); - } - } else { - __ Slt(AT, lhs, rhs); - if (is_min) { - __ Seleqz(out, rhs, AT); - __ Selnez(AT, lhs, AT); - } else { - __ Selnez(out, rhs, AT); - __ Seleqz(AT, lhs, AT); - } - } - __ Or(out, out, AT); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -// int java.lang.Math.min(int, int) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler()); -} - -// long java.lang.Math.min(long, long) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler()); -} - -// int java.lang.Math.max(int, int) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler()); -} - -// long java.lang.Math.max(long, long) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, HasMsa(), GetAssembler()); } // double java.lang.Math.sqrt(double) @@ -1179,11 +911,11 @@ static void GenUnsafeGet(HInvoke* invoke, codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, trg_loc, base, - /* offset */ 0U, - /* index */ offset_loc, + /* offset= */ 0U, + /* index= */ offset_loc, TIMES_1, temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); if (is_volatile) { __ Sync(0); } @@ -1196,8 +928,8 @@ static void GenUnsafeGet(HInvoke* invoke, trg_loc, trg_loc, base_loc, - /* offset */ 0U, - /* index */ offset_loc); + /* offset= */ 0U, + /* index= */ offset_loc); } } else { __ Lwu(trg, TMP, 0); @@ -1220,7 +952,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGet(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } // int sun.misc.Unsafe.getIntVolatile(Object o, long offset) @@ -1229,7 +961,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_); } // long sun.misc.Unsafe.getLong(Object o, long offset) @@ -1238,7 +970,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } // long sun.misc.Unsafe.getLongVolatile(Object o, long offset) @@ -1247,7 +979,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_); } // Object sun.misc.Unsafe.getObject(Object o, long offset) @@ -1256,7 +988,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_); } // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset) @@ -1265,7 +997,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invo } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_); } static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) { @@ -1335,8 +1067,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePut(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS64::VisitUnsafePut(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } @@ -1348,8 +1080,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } @@ -1361,8 +1093,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } @@ -1374,8 +1106,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObject(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObject(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } @@ -1387,8 +1119,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invok void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } @@ -1400,8 +1132,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invo void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } @@ -1413,8 +1145,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLong(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLong(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } @@ -1426,8 +1158,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } @@ -1439,8 +1171,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } @@ -1502,12 +1234,12 @@ static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorMIPS64* co invoke, out_loc, // Unused, used only as a "temporary" within the read barrier. base, - /* offset */ 0u, - /* index */ offset_loc, + /* offset= */ 0u, + /* index= */ offset_loc, ScaleFactor::TIMES_1, temp, - /* needs_null_check */ false, - /* always_update_field */ true); + /* needs_null_check= */ false, + /* always_update_field= */ true); } } @@ -1637,13 +1369,6 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringCompareTo(HInvoke* invoke) { // boolean java.lang.String.equals(Object anObject) void IntrinsicLocationsBuilderMIPS64::VisitStringEquals(HInvoke* invoke) { - if (kEmitCompilerReadBarrier && - !StringEqualsOptimizations(invoke).GetArgumentIsString() && - !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { - // No support for this odd case (String class is moveable, not in the boot image). - return; - } - LocationSummary* locations = new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); @@ -1704,8 +1429,16 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) { // All string objects must have the same type since String cannot be subclassed. // Receiver must be a string object, so its class field is equal to all strings' class fields. // If the argument is a string object, its class field must be equal to receiver's class field. + // + // As the String class is expected to be non-movable, we can read the class + // field from String.equals' arguments without read barriers. + AssertNonMovableStringClass(); + // /* HeapReference<Class> */ temp1 = str->klass_ __ Lw(temp1, str, class_offset); + // /* HeapReference<Class> */ temp2 = arg->klass_ __ Lw(temp2, arg, class_offset); + // Also, because we use the previously loaded class references only in the + // following comparison, we don't need to unpoison them. __ Bnec(temp1, temp2, &return_false); } @@ -1823,7 +1556,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOf(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true); } // int java.lang.String.indexOf(int ch, int fromIndex) @@ -1841,7 +1574,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false); } // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount) @@ -1942,7 +1675,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitFloatIsInfinite(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitFloatIsInfinite(HInvoke* invoke) { - GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + GenIsInfinite(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } // boolean java.lang.Double.isInfinite(double) @@ -1951,7 +1684,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) { - GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } // void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) @@ -2535,54 +2268,45 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); Mips64Assembler* assembler = GetAssembler(); InstructionCodeGeneratorMIPS64* icodegen = down_cast<InstructionCodeGeneratorMIPS64*>(codegen_->GetInstructionVisitor()); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - InvokeRuntimeCallingConvention calling_convention; if (invoke->InputAt(0)->IsConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ LoadConst64(out, address); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - uint32_t address = - dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ LoadConst64(calling_convention.GetRegisterAt(0), address); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } } else { + DCHECK(locations->CanCall()); GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); Mips64Label allocate, done; - int32_t count = static_cast<uint32_t>(info.high) - info.low + 1; - // Is (info.low <= in) && (in <= info.high)? __ Addiu32(out, in, -info.low); - // As unsigned quantities is out < (info.high - info.low + 1)? - __ LoadConst32(AT, count); - // Branch if out >= (info.high - info.low + 1). - // This means that "in" is outside of the range [info.low, info.high]. + // As unsigned quantities is out < info.length ? + __ LoadConst32(AT, info.length); + // Branch if out >= info.length . This means that "in" is outside of the valid range. __ Bgeuc(out, AT, &allocate); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - __ LoadConst64(TMP, data_offset + address); + codegen_->LoadBootImageAddress(TMP, info.array_data_boot_image_reference); __ Dlsa(out, out, TMP, TIMES_4); __ Lwu(out, out, 0); __ MaybeUnpoisonHeapReference(out); @@ -2590,10 +2314,8 @@ void IntrinsicCodeGeneratorMIPS64::VisitIntegerValueOf(HInvoke* invoke) { __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ LoadConst64(calling_convention.GetRegisterAt(0), address); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ StoreToOffset(kStoreWord, in, out, info.value_offset); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. @@ -2632,6 +2354,9 @@ void IntrinsicCodeGeneratorMIPS64::VisitReachabilityFence(HInvoke* invoke ATTRIB UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy) +UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32Update) +UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32UpdateBytes) +UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h index 6f40d90ddb..ca8bc8f55a 100644 --- a/compiler/optimizing/intrinsics_mips64.h +++ b/compiler/optimizing/intrinsics_mips64.h @@ -30,14 +30,14 @@ namespace mips64 { class CodeGeneratorMIPS64; class Mips64Assembler; -class IntrinsicLocationsBuilderMIPS64 FINAL : public IntrinsicVisitor { +class IntrinsicLocationsBuilderMIPS64 final : public IntrinsicVisitor { public: explicit IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen); // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST @@ -55,19 +55,21 @@ class IntrinsicLocationsBuilderMIPS64 FINAL : public IntrinsicVisitor { DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS64); }; -class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor { +class IntrinsicCodeGeneratorMIPS64 final : public IntrinsicVisitor { public: explicit IntrinsicCodeGeneratorMIPS64(CodeGeneratorMIPS64* codegen) : codegen_(codegen) {} // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS + bool HasMsa() const; + private: Mips64Assembler* GetAssembler(); diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h index 8c69d9b643..41947f1ccd 100644 --- a/compiler/optimizing/intrinsics_utils.h +++ b/compiler/optimizing/intrinsics_utils.h @@ -47,7 +47,7 @@ class IntrinsicSlowPath : public SlowPathCode { return calling_convention_visitor.GetMethodLocation(); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { Assembler* assembler = codegen->GetAssembler(); assembler->Bind(GetEntryLabel()); @@ -73,7 +73,7 @@ class IntrinsicSlowPath : public SlowPathCode { assembler->Jump(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; } + const char* GetDescription() const override { return "IntrinsicSlowPath"; } private: // The instruction where this slow path is happening. diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 0763ef2352..de697f0f96 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -40,11 +40,6 @@ namespace art { namespace x86 { -static constexpr int kDoubleNaNHigh = 0x7FF80000; -static constexpr int kDoubleNaNLow = 0x00000000; -static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); -static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); - IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen) : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) { @@ -87,7 +82,7 @@ class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode { DCHECK(kUseBakerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); @@ -165,7 +160,7 @@ class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; } + const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86"; } private: DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86); @@ -228,31 +223,31 @@ static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* } void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - CreateFPToIntLocations(allocator_, invoke, /* is64bit */ true); + CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ true); } void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - CreateIntToFPLocations(allocator_, invoke, /* is64bit */ true); + CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ true); } void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - CreateFPToIntLocations(allocator_, invoke, /* is64bit */ false); + CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ false); } void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { - CreateIntToFPLocations(allocator_, invoke, /* is64bit */ false); + CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ false); } void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -333,432 +328,6 @@ void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } - -// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we -// need is 64b. - -static void CreateFloatToFloat(ArenaAllocator* allocator, HInvoke* invoke) { - // TODO: Enable memory operations when the assembler supports them. - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); - HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(static_or_direct != nullptr); - if (static_or_direct->HasSpecialInput() && - invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { - // We need addressibility for the constant area. - locations->SetInAt(1, Location::RequiresRegister()); - // We need a temporary to hold the constant. - locations->AddTemp(Location::RequiresFpuRegister()); - } -} - -static void MathAbsFP(HInvoke* invoke, - bool is64bit, - X86Assembler* assembler, - CodeGeneratorX86* codegen) { - LocationSummary* locations = invoke->GetLocations(); - Location output = locations->Out(); - - DCHECK(output.IsFpuRegister()); - if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { - HX86ComputeBaseMethodAddress* method_address = - invoke->InputAt(1)->AsX86ComputeBaseMethodAddress(); - DCHECK(locations->InAt(1).IsRegister()); - // We also have a constant area pointer. - Register constant_area = locations->InAt(1).AsRegister<Register>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - if (is64bit) { - __ movsd(temp, codegen->LiteralInt64Address( - INT64_C(0x7FFFFFFFFFFFFFFF), method_address, constant_area)); - __ andpd(output.AsFpuRegister<XmmRegister>(), temp); - } else { - __ movss(temp, codegen->LiteralInt32Address( - INT32_C(0x7FFFFFFF), method_address, constant_area)); - __ andps(output.AsFpuRegister<XmmRegister>(), temp); - } - } else { - // Create the right constant on an aligned stack. - if (is64bit) { - __ subl(ESP, Immediate(8)); - __ pushl(Immediate(0x7FFFFFFF)); - __ pushl(Immediate(0xFFFFFFFF)); - __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); - } else { - __ subl(ESP, Immediate(12)); - __ pushl(Immediate(0x7FFFFFFF)); - __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); - } - __ addl(ESP, Immediate(16)); - } -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) { - CreateFloatToFloat(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke, /* is64bit */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) { - CreateFloatToFloat(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_); -} - -static void CreateAbsIntLocation(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RegisterLocation(EAX)); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RegisterLocation(EDX)); -} - -static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) { - Location output = locations->Out(); - Register out = output.AsRegister<Register>(); - DCHECK_EQ(out, EAX); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - DCHECK_EQ(temp, EDX); - - // Sign extend EAX into EDX. - __ cdq(); - - // XOR EAX with sign. - __ xorl(EAX, EDX); - - // Subtract out sign to correct. - __ subl(EAX, EDX); - - // The result is in EAX. -} - -static void CreateAbsLongLocation(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - locations->AddTemp(Location::RequiresRegister()); -} - -static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) { - Location input = locations->InAt(0); - Register input_lo = input.AsRegisterPairLow<Register>(); - Register input_hi = input.AsRegisterPairHigh<Register>(); - Location output = locations->Out(); - Register output_lo = output.AsRegisterPairLow<Register>(); - Register output_hi = output.AsRegisterPairHigh<Register>(); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - - // Compute the sign into the temporary. - __ movl(temp, input_hi); - __ sarl(temp, Immediate(31)); - - // Store the sign into the output. - __ movl(output_lo, temp); - __ movl(output_hi, temp); - - // XOR the input to the output. - __ xorl(output_lo, input_lo); - __ xorl(output_hi, input_hi); - - // Subtract the sign. - __ subl(output_lo, temp); - __ sbbl(output_hi, temp); -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) { - CreateAbsIntLocation(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) { - CreateAbsLongLocation(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) { - GenAbsLong(invoke->GetLocations(), GetAssembler()); -} - -static void GenMinMaxFP(HInvoke* invoke, - bool is_min, - bool is_double, - X86Assembler* assembler, - CodeGeneratorX86* codegen) { - LocationSummary* locations = invoke->GetLocations(); - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - Location out_loc = locations->Out(); - XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); - return; - } - - // (out := op1) - // out <=? op2 - // if Nan jmp Nan_label - // if out is min jmp done - // if op2 is min jmp op2_label - // handle -0/+0 - // jmp done - // Nan_label: - // out := NaN - // op2_label: - // out := op2 - // done: - // - // This removes one jmp, but needs to copy one input (op1) to out. - // - // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? - - XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); - - NearLabel nan, done, op2_label; - if (is_double) { - __ ucomisd(out, op2); - } else { - __ ucomiss(out, op2); - } - - __ j(Condition::kParityEven, &nan); - - __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); - __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); - - // Handle 0.0/-0.0. - if (is_min) { - if (is_double) { - __ orpd(out, op2); - } else { - __ orps(out, op2); - } - } else { - if (is_double) { - __ andpd(out, op2); - } else { - __ andps(out, op2); - } - } - __ jmp(&done); - - // NaN handling. - __ Bind(&nan); - // Do we have a constant area pointer? - if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) { - HX86ComputeBaseMethodAddress* method_address = - invoke->InputAt(2)->AsX86ComputeBaseMethodAddress(); - DCHECK(locations->InAt(2).IsRegister()); - Register constant_area = locations->InAt(2).AsRegister<Register>(); - if (is_double) { - __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area)); - } else { - __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area)); - } - } else { - if (is_double) { - __ pushl(Immediate(kDoubleNaNHigh)); - __ pushl(Immediate(kDoubleNaNLow)); - __ movsd(out, Address(ESP, 0)); - __ addl(ESP, Immediate(8)); - } else { - __ pushl(Immediate(kFloatNaN)); - __ movss(out, Address(ESP, 0)); - __ addl(ESP, Immediate(4)); - } - } - __ jmp(&done); - - // out := op2; - __ Bind(&op2_label); - if (is_double) { - __ movsd(out, op2); - } else { - __ movss(out, op2); - } - - // Done. - __ Bind(&done); -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - // The following is sub-optimal, but all we can do for now. It would be fine to also accept - // the second input to be the output (we can simply swap inputs). - locations->SetOut(Location::SameAsFirstInput()); - HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(static_or_direct != nullptr); - if (static_or_direct->HasSpecialInput() && - invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { - locations->SetInAt(2, Location::RequiresRegister()); - } -} - -void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ true, - /* is_double */ true, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ true, - /* is_double */ false, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ false, - /* is_double */ true, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ false, - /* is_double */ false, - GetAssembler(), - codegen_); -} - -static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, - X86Assembler* assembler) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - // Can return immediately, as op1_loc == out_loc. - // Note: if we ever support separate registers, e.g., output into memory, we need to check for - // a copy here. - DCHECK(locations->Out().Equals(op1_loc)); - return; - } - - if (is_long) { - // Need to perform a subtract to get the sign right. - // op1 is already in the same location as the output. - Location output = locations->Out(); - Register output_lo = output.AsRegisterPairLow<Register>(); - Register output_hi = output.AsRegisterPairHigh<Register>(); - - Register op2_lo = op2_loc.AsRegisterPairLow<Register>(); - Register op2_hi = op2_loc.AsRegisterPairHigh<Register>(); - - // Spare register to compute the subtraction to set condition code. - Register temp = locations->GetTemp(0).AsRegister<Register>(); - - // Subtract off op2_low. - __ movl(temp, output_lo); - __ subl(temp, op2_lo); - - // Now use the same tempo and the borrow to finish the subtraction of op2_hi. - __ movl(temp, output_hi); - __ sbbl(temp, op2_hi); - - // Now the condition code is correct. - Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess; - __ cmovl(cond, output_lo, op2_lo); - __ cmovl(cond, output_hi, op2_hi); - } else { - Register out = locations->Out().AsRegister<Register>(); - Register op2 = op2_loc.AsRegister<Register>(); - - // (out := op1) - // out <=? op2 - // if out is min jmp done - // out := op2 - // done: - - __ cmpl(out, op2); - Condition cond = is_min ? Condition::kGreater : Condition::kLess; - __ cmovl(cond, out, op2); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - // Register to use to perform a long subtract to set cc. - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); -} - static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); @@ -976,6 +545,96 @@ static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntry __ cfi().AdjustCFAOffset(-16); } +static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + if (is_long) { + locations->SetInAt(0, Location::RequiresRegister()); + } else { + locations->SetInAt(0, Location::Any()); + } + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +static void GenLowestOneBit(X86Assembler* assembler, + CodeGeneratorX86* codegen, + bool is_long, + HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Location src = locations->InAt(0); + Location out_loc = locations->Out(); + + if (invoke->InputAt(0)->IsConstant()) { + // Evaluate this at compile time. + int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); + if (value == 0) { + if (is_long) { + __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>()); + __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>()); + } else { + __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>()); + } + return; + } + // Nonzero value. + value = is_long ? CTZ(static_cast<uint64_t>(value)) + : CTZ(static_cast<uint32_t>(value)); + if (is_long) { + if (value >= 32) { + int shift = value-32; + codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0); + codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift); + } else { + codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value); + codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0); + } + } else { + codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value); + } + return; + } + // Handle non constant case + if (is_long) { + DCHECK(src.IsRegisterPair()); + Register src_lo = src.AsRegisterPairLow<Register>(); + Register src_hi = src.AsRegisterPairHigh<Register>(); + + Register out_lo = out_loc.AsRegisterPairLow<Register>(); + Register out_hi = out_loc.AsRegisterPairHigh<Register>(); + + __ movl(out_lo, src_lo); + __ movl(out_hi, src_hi); + + __ negl(out_lo); + __ adcl(out_hi, Immediate(0)); + __ negl(out_hi); + + __ andl(out_lo, src_lo); + __ andl(out_hi, src_hi); + } else { + if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) { + Register out = out_loc.AsRegister<Register>(); + __ blsi(out, src.AsRegister<Register>()); + } else { + Register out = out_loc.AsRegister<Register>(); + // Do tmp & -tmp + if (src.IsRegister()) { + __ movl(out, src.AsRegister<Register>()); + } else { + DCHECK(src.IsStackSlot()); + __ movl(out, Address(ESP, src.GetStackIndex())); + } + __ negl(out); + + if (src.IsRegister()) { + __ andl(out, src.AsRegister<Register>()); + } else { + __ andl(out, Address(ESP, src.GetStackIndex())); + } + } + } +} + void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) { CreateFPToFPCallLocations(allocator_, invoke); } @@ -1088,6 +747,21 @@ void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickTanh); } +void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) { + CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke); +} +void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) { + GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke); +} + +void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) { + CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) { + GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke); +} + static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); @@ -1353,13 +1027,6 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) { - if (kEmitCompilerReadBarrier && - !StringEqualsOptimizations(invoke).GetArgumentIsString() && - !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { - // No support for this odd case (String class is moveable, not in the boot image). - return; - } - LocationSummary* locations = new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); @@ -1405,7 +1072,15 @@ void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) { // All string objects must have the same type since String cannot be subclassed. // Receiver must be a string object, so its class field is equal to all strings' class fields. // If the argument is a string object, its class field must be equal to receiver's class field. + // + // As the String class is expected to be non-movable, we can read the class + // field from String.equals' arguments without read barriers. + AssertNonMovableStringClass(); + // Also, because we use the loaded class references only to compare them, we + // don't need to unpoison them. + // /* HeapReference<Class> */ ecx = str->klass_ __ movl(ecx, Address(str, class_offset)); + // if (ecx != /* HeapReference<Class> */ arg->klass_) return false __ cmpl(ecx, Address(arg, class_offset)); __ j(kNotEqual, &return_false); } @@ -1650,19 +1325,19 @@ static void GenerateStringIndexOf(HInvoke* invoke, } void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) { - CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ true); + CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true); } void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true); } void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) { - CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ false); + CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false); } void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false); } void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) { @@ -2024,7 +1699,7 @@ static void GenUnsafeGet(HInvoke* invoke, if (kUseBakerReadBarrier) { Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, output_loc, base, src, /* needs_null_check */ false); + invoke, output_loc, base, src, /* needs_null_check= */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -2095,45 +1770,45 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) { CreateIntIntIntToIntLocations( - allocator_, invoke, DataType::Type::kInt32, /* is_volatile */ false); + allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /* is_volatile */ true); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) { CreateIntIntIntToIntLocations( - allocator_, invoke, DataType::Type::kInt64, /* is_volatile */ false); + allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /* is_volatile */ true); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) { CreateIntIntIntToIntLocations( - allocator_, invoke, DataType::Type::kReference, /* is_volatile */ false); + allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { CreateIntIntIntToIntLocations( - allocator_, invoke, DataType::Type::kReference, /* is_volatile */ true); + allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ true); } void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_); } @@ -2160,39 +1835,39 @@ static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ true); + allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kReference, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kReference, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kReference, invoke, /* is_volatile */ true); + allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ true); + allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ true); } // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 @@ -2244,34 +1919,34 @@ static void GenUnsafePut(LocationSummary* locations, } void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ true, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) { GenUnsafePut( - invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_); + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { GenUnsafePut( - invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_); + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { GenUnsafePut( - invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ true, codegen_); + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ true, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ true, codegen_); } static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, @@ -2368,8 +2043,8 @@ static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codeg temp1_loc, // Unused, used only as a "temporary" within the read barrier. base, field_addr, - /* needs_null_check */ false, - /* always_update_field */ true, + /* needs_null_check= */ false, + /* always_update_field= */ true, &temp2); } @@ -2600,19 +2275,19 @@ static void GenBitCount(X86Assembler* assembler, } void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) { - CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long */ false); + CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ false); } void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false); + GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false); } void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) { - CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long */ true); + CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ true); } void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true); + GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true); } static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) { @@ -2704,19 +2379,19 @@ static void GenLeadingZeros(X86Assembler* assembler, } void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - CreateLeadingZeroLocations(allocator_, invoke, /* is_long */ false); + CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ false); } void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); + GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false); } void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - CreateLeadingZeroLocations(allocator_, invoke, /* is_long */ true); + CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ true); } void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); + GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true); } static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) { @@ -2795,19 +2470,19 @@ static void GenTrailingZeros(X86Assembler* assembler, } void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - CreateTrailingZeroLocations(allocator_, invoke, /* is_long */ false); + CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ false); } void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); + GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false); } void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - CreateTrailingZeroLocations(allocator_, invoke, /* is_long */ true); + CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ true); } void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); + GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true); } static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) { @@ -3015,11 +2690,11 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false); // Bail out if the source is not a non primitive array. // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false); __ testl(temp1, temp1); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp1` has been unpoisoned @@ -3052,7 +2727,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // /* HeapReference<Class> */ temp1 = dest->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false); + invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false); if (!optimizations.GetDestinationIsNonPrimitiveArray()) { // Bail out if the destination is not a non primitive array. @@ -3064,7 +2739,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // temporaries such a `temp1`. // /* HeapReference<Class> */ temp2 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, temp2_loc, temp1, component_offset, /* needs_null_check= */ false); __ testl(temp2, temp2); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp2` has been unpoisoned @@ -3077,7 +2752,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. // /* HeapReference<Class> */ temp2 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp2_loc, src, class_offset, /* needs_null_check */ false); + invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false); // Note: if heap poisoning is on, we are comparing two unpoisoned references here. __ cmpl(temp1, temp2); @@ -3086,7 +2761,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kEqual, &do_copy); // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false); // We do not need to emit a read barrier for the following // heap reference load, as `temp1` is only used in a // comparison with null below, and this reference is not @@ -3140,10 +2815,10 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false); // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false); __ testl(temp1, temp1); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp1` has been unpoisoned @@ -3212,7 +2887,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kEqual, &done); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; @@ -3276,22 +2951,36 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { } // We only need one card marking on the destination array. - codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false); + codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null= */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) { + DCHECK(invoke->IsInvokeStaticOrDirect()); InvokeRuntimeCallingConvention calling_convention; IntrinsicVisitor::ComputeIntegerValueOfLocations( invoke, codegen_, Location::RegisterLocation(EAX), Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + + LocationSummary* locations = invoke->GetLocations(); + if (locations != nullptr) { + HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); + if (invoke_static_or_direct->HasSpecialInput() && + invoke->InputAt(invoke_static_or_direct->GetSpecialInputIndex()) + ->IsX86ComputeBaseMethodAddress()) { + locations->SetInAt(invoke_static_or_direct->GetSpecialInputIndex(), + Location::RequiresRegister()); + } + } } void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + DCHECK(invoke->IsInvokeStaticOrDirect()); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); X86Assembler* assembler = GetAssembler(); @@ -3299,42 +2988,58 @@ void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) { InvokeRuntimeCallingConvention calling_convention; if (invoke->InputAt(0)->IsConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ movl(out, Immediate(address)); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress( + out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect()); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ movl(calling_convention.GetRegisterAt(0), Immediate(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ movl(Address(out, info.value_offset), Immediate(value)); } } else { + DCHECK(locations->CanCall()); Register in = locations->InAt(0).AsRegister<Register>(); // Check bounds of our cache. __ leal(out, Address(in, -info.low)); - __ cmpl(out, Immediate(info.high - info.low + 1)); + __ cmpl(out, Immediate(info.length)); NearLabel allocate, done; __ j(kAboveEqual, &allocate); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - __ movl(out, Address(out, TIMES_4, data_offset + address)); + constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>); + static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>), + "Check heap reference size."); + if (codegen_->GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + size_t method_address_index = invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex(); + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(method_address_index)->AsX86ComputeBaseMethodAddress(); + DCHECK(method_address != nullptr); + Register method_address_reg = + invoke->GetLocations()->InAt(method_address_index).AsRegister<Register>(); + __ movl(out, Address(method_address_reg, out, TIMES_4, CodeGeneratorX86::kDummy32BitOffset)); + codegen_->RecordBootImageIntrinsicPatch(method_address, info.array_data_boot_image_reference); + } else { + // Note: We're about to clobber the index in `out`, so we need to use `in` and + // adjust the offset accordingly. + uint32_t mid_array_boot_image_offset = + info.array_data_boot_image_reference - info.low * kElementSize; + codegen_->LoadBootImageAddress( + out, mid_array_boot_image_offset, invoke->AsInvokeStaticOrDirect()); + DCHECK_NE(out, in); + __ movl(out, Address(out, in, TIMES_4, 0)); + } __ MaybeUnpoisonHeapReference(out); __ jmp(&done); __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ movl(calling_convention.GetRegisterAt(0), Immediate(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ movl(Address(out, info.value_offset), in); __ Bind(&done); } @@ -3373,8 +3078,9 @@ UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit) -UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit) -UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit) +UNIMPLEMENTED_INTRINSIC(X86, CRC32Update) +UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes) +UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h index e3555e78fc..ae150dad43 100644 --- a/compiler/optimizing/intrinsics_x86.h +++ b/compiler/optimizing/intrinsics_x86.h @@ -30,14 +30,14 @@ namespace x86 { class CodeGeneratorX86; class X86Assembler; -class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor { +class IntrinsicLocationsBuilderX86 final : public IntrinsicVisitor { public: explicit IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen); // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST @@ -55,14 +55,14 @@ class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor { DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86); }; -class IntrinsicCodeGeneratorX86 FINAL : public IntrinsicVisitor { +class IntrinsicCodeGeneratorX86 final : public IntrinsicVisitor { public: explicit IntrinsicCodeGeneratorX86(CodeGeneratorX86* codegen) : codegen_(codegen) {} // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 91a505ede1..e79c0c9adf 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -80,7 +80,7 @@ class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode { DCHECK(kUseBakerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); @@ -118,7 +118,7 @@ class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86_64"; } + const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64); @@ -162,10 +162,10 @@ void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invok } void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { @@ -176,10 +176,10 @@ void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) } void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -236,304 +236,6 @@ void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } - -// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we -// need is 64b. - -static void CreateFloatToFloatPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) { - // TODO: Enable memory operations when the assembler supports them. - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask. -} - -static void MathAbsFP(LocationSummary* locations, - bool is64bit, - X86_64Assembler* assembler, - CodeGeneratorX86_64* codegen) { - Location output = locations->Out(); - - DCHECK(output.IsFpuRegister()); - XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - - // TODO: Can mask directly with constant area using pand if we can guarantee - // that the literal is aligned on a 16 byte boundary. This will avoid a - // temporary. - if (is64bit) { - __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); - __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); - } else { - __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); - __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); - } -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFloatToFloatPlusTemps(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFloatToFloatPlusTemps(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_); -} - -static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { - Location output = locations->Out(); - CpuRegister out = output.AsRegister<CpuRegister>(); - CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); - - if (is64bit) { - // Create mask. - __ movq(mask, out); - __ sarq(mask, Immediate(63)); - // Add mask. - __ addq(out, mask); - __ xorq(out, mask); - } else { - // Create mask. - __ movl(mask, out); - __ sarl(mask, Immediate(31)); - // Add mask. - __ addl(out, mask); - __ xorl(out, mask); - } -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - bool is_double, - X86_64Assembler* assembler, - CodeGeneratorX86_64* codegen) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - Location out_loc = locations->Out(); - XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); - return; - } - - // (out := op1) - // out <=? op2 - // if Nan jmp Nan_label - // if out is min jmp done - // if op2 is min jmp op2_label - // handle -0/+0 - // jmp done - // Nan_label: - // out := NaN - // op2_label: - // out := op2 - // done: - // - // This removes one jmp, but needs to copy one input (op1) to out. - // - // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? - - XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); - - NearLabel nan, done, op2_label; - if (is_double) { - __ ucomisd(out, op2); - } else { - __ ucomiss(out, op2); - } - - __ j(Condition::kParityEven, &nan); - - __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); - __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); - - // Handle 0.0/-0.0. - if (is_min) { - if (is_double) { - __ orpd(out, op2); - } else { - __ orps(out, op2); - } - } else { - if (is_double) { - __ andpd(out, op2); - } else { - __ andps(out, op2); - } - } - __ jmp(&done); - - // NaN handling. - __ Bind(&nan); - if (is_double) { - __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000))); - } else { - __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000))); - } - __ jmp(&done); - - // out := op2; - __ Bind(&op2_label); - if (is_double) { - __ movsd(out, op2); - } else { - __ movss(out, op2); - } - - // Done. - __ Bind(&done); -} - -static void CreateFPFPToFP(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - // The following is sub-optimal, but all we can do for now. It would be fine to also accept - // the second input to be the output (we can simply swap inputs). - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_); -} - -static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, - X86_64Assembler* assembler) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - // Can return immediately, as op1_loc == out_loc. - // Note: if we ever support separate registers, e.g., output into memory, we need to check for - // a copy here. - DCHECK(locations->Out().Equals(op1_loc)); - return; - } - - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); - - // (out := op1) - // out <=? op2 - // if out is min jmp done - // out := op2 - // done: - - if (is_long) { - __ cmpq(out, op2); - } else { - __ cmpl(out, op2); - } - - __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long); -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); -} - static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); @@ -728,12 +430,12 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { // direct x86 instruction, since NaN should map to 0 and large positive // values need to be clipped to the extreme value. codegen_->Load64BitValue(out, kPrimLongMax); - __ cvtsi2sd(t2, out, /* is64bit */ true); + __ cvtsi2sd(t2, out, /* is64bit= */ true); __ comisd(t1, t2); __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered __ movl(out, Immediate(0)); // does not change flags, implicit zero extension to 64-bit __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out) - __ cvttsd2si(out, t1, /* is64bit */ true); + __ cvttsd2si(out, t1, /* is64bit= */ true); __ Bind(&done); } @@ -1277,7 +979,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = dest->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false); + invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false); // Register `temp1` is not trashed by the read barrier emitted // by GenerateFieldLoadWithBakerReadBarrier below, as that // method produces a call to a ReadBarrierMarkRegX entry point, @@ -1285,7 +987,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // temporaries such a `temp1`. // /* HeapReference<Class> */ temp2 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp2_loc, src, class_offset, /* needs_null_check */ false); + invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false); // If heap poisoning is enabled, `temp1` and `temp2` have been // unpoisoned by the the previous calls to // GenerateFieldLoadWithBakerReadBarrier. @@ -1309,7 +1011,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ TMP = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false); __ testl(CpuRegister(TMP), CpuRegister(TMP)); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `TMP` has been unpoisoned by @@ -1332,7 +1034,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. // /* HeapReference<Class> */ TMP = temp2->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, TMP_loc, temp2, component_offset, /* needs_null_check */ false); + invoke, TMP_loc, temp2, component_offset, /* needs_null_check= */ false); __ testl(CpuRegister(TMP), CpuRegister(TMP)); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `TMP` has been unpoisoned by @@ -1356,7 +1058,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false); // We do not need to emit a read barrier for the following // heap reference load, as `temp1` is only used in a // comparison with null below, and this reference is not @@ -1384,10 +1086,10 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false); // /* HeapReference<Class> */ TMP = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false); __ testl(CpuRegister(TMP), CpuRegister(TMP)); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } else { @@ -1441,7 +1143,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kEqual, &done); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; @@ -1496,7 +1198,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { } // We only need one card marking on the destination array. - codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null */ false); + codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null= */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } @@ -1528,13 +1230,6 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) { - if (kEmitCompilerReadBarrier && - !StringEqualsOptimizations(invoke).GetArgumentIsString() && - !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { - // No support for this odd case (String class is moveable, not in the boot image). - return; - } - LocationSummary* locations = new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); @@ -1580,7 +1275,15 @@ void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) { // All string objects must have the same type since String cannot be subclassed. // Receiver must be a string object, so its class field is equal to all strings' class fields. // If the argument is a string object, its class field must be equal to receiver's class field. + // + // As the String class is expected to be non-movable, we can read the class + // field from String.equals' arguments without read barriers. + AssertNonMovableStringClass(); + // Also, because we use the loaded class references only to compare them, we + // don't need to unpoison them. + // /* HeapReference<Class> */ rcx = str->klass_ __ movl(rcx, Address(str, class_offset)); + // if (rcx != /* HeapReference<Class> */ arg->klass_) return false __ cmpl(rcx, Address(arg, class_offset)); __ j(kNotEqual, &return_false); } @@ -1749,7 +1452,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, // Ensure we have a start index >= 0; __ xorl(counter, counter); __ cmpl(start_index, Immediate(0)); - __ cmov(kGreater, counter, start_index, /* is64bit */ false); // 32-bit copy is enough. + __ cmov(kGreater, counter, start_index, /* is64bit= */ false); // 32-bit copy is enough. if (mirror::kUseStringCompression) { NearLabel modify_counter, offset_uncompressed_label; @@ -1811,19 +1514,19 @@ static void GenerateStringIndexOf(HInvoke* invoke, } void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) { - CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ true); + CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true); } void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true); } void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { - CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ false); + CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false); } void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false); } void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { @@ -2137,7 +1840,7 @@ void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) { CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>(); GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(), - /* no_rip */ true)); + /* no_rip= */ true)); } static void GenUnsafeGet(HInvoke* invoke, @@ -2163,7 +1866,7 @@ static void GenUnsafeGet(HInvoke* invoke, if (kUseBakerReadBarrier) { Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, output_loc, base, src, /* needs_null_check */ false); + invoke, output_loc, base, src, /* needs_null_check= */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -2227,22 +1930,22 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invo void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_); } @@ -2325,34 +2028,34 @@ static void GenUnsafePut(LocationSummary* locations, DataType::Type type, bool i } void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ true, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) { GenUnsafePut( - invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_); + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { GenUnsafePut( - invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_); + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { GenUnsafePut( - invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ true, codegen_); + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ true, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ true, codegen_); } static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, @@ -2437,8 +2140,8 @@ static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86_64* co out_loc, // Unused, used only as a "temporary" within the read barrier. base, field_addr, - /* needs_null_check */ false, - /* always_update_field */ true, + /* needs_null_check= */ false, + /* always_update_field= */ true, &temp1, &temp2); } @@ -2666,7 +2369,7 @@ void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false); + GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false); } void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) { @@ -2674,7 +2377,7 @@ void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true); + GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true); } static void CreateOneBitLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_high) { @@ -2718,93 +2421,98 @@ static void GenOneBit(X86_64Assembler* assembler, } // Handle the non-constant cases. - CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); - if (is_high) { - // Use architectural support: basically 1 << bsr. - if (src.IsRegister()) { + if (!is_high && codegen->GetInstructionSetFeatures().HasAVX2() && + src.IsRegister()) { + __ blsi(out, src.AsRegister<CpuRegister>()); + } else { + CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); + if (is_high) { + // Use architectural support: basically 1 << bsr. + if (src.IsRegister()) { + if (is_long) { + __ bsrq(tmp, src.AsRegister<CpuRegister>()); + } else { + __ bsrl(tmp, src.AsRegister<CpuRegister>()); + } + } else if (is_long) { + DCHECK(src.IsDoubleStackSlot()); + __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); + } else { + DCHECK(src.IsStackSlot()); + __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); + } + // BSR sets ZF if the input was zero. + NearLabel is_zero, done; + __ j(kEqual, &is_zero); + __ movl(out, Immediate(1)); // Clears upper bits too. if (is_long) { - __ bsrq(tmp, src.AsRegister<CpuRegister>()); + __ shlq(out, tmp); } else { - __ bsrl(tmp, src.AsRegister<CpuRegister>()); + __ shll(out, tmp); } - } else if (is_long) { - DCHECK(src.IsDoubleStackSlot()); - __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); - } else { - DCHECK(src.IsStackSlot()); - __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); - } - // BSR sets ZF if the input was zero. - NearLabel is_zero, done; - __ j(kEqual, &is_zero); - __ movl(out, Immediate(1)); // Clears upper bits too. - if (is_long) { - __ shlq(out, tmp); - } else { - __ shll(out, tmp); - } - __ jmp(&done); - __ Bind(&is_zero); - __ xorl(out, out); // Clears upper bits too. - __ Bind(&done); - } else { - // Copy input into temporary. - if (src.IsRegister()) { + __ jmp(&done); + __ Bind(&is_zero); + __ xorl(out, out); // Clears upper bits too. + __ Bind(&done); + } else { + // Copy input into temporary. + if (src.IsRegister()) { + if (is_long) { + __ movq(tmp, src.AsRegister<CpuRegister>()); + } else { + __ movl(tmp, src.AsRegister<CpuRegister>()); + } + } else if (is_long) { + DCHECK(src.IsDoubleStackSlot()); + __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); + } else { + DCHECK(src.IsStackSlot()); + __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); + } + // Do the bit twiddling: basically tmp & -tmp; if (is_long) { - __ movq(tmp, src.AsRegister<CpuRegister>()); + __ movq(out, tmp); + __ negq(tmp); + __ andq(out, tmp); } else { - __ movl(tmp, src.AsRegister<CpuRegister>()); + __ movl(out, tmp); + __ negl(tmp); + __ andl(out, tmp); } - } else if (is_long) { - DCHECK(src.IsDoubleStackSlot()); - __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); - } else { - DCHECK(src.IsStackSlot()); - __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); - } - // Do the bit twiddling: basically tmp & -tmp; - if (is_long) { - __ movq(out, tmp); - __ negq(tmp); - __ andq(out, tmp); - } else { - __ movl(out, tmp); - __ negl(tmp); - __ andl(out, tmp); } } } void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) { - CreateOneBitLocations(allocator_, invoke, /* is_high */ true); + CreateOneBitLocations(allocator_, invoke, /* is_high= */ true); } void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) { - GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ false); + GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ false); } void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) { - CreateOneBitLocations(allocator_, invoke, /* is_high */ true); + CreateOneBitLocations(allocator_, invoke, /* is_high= */ true); } void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) { - GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ true); + GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ true); } void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) { - CreateOneBitLocations(allocator_, invoke, /* is_high */ false); + CreateOneBitLocations(allocator_, invoke, /* is_high= */ false); } void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) { - GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ false); + GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ false); } void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) { - CreateOneBitLocations(allocator_, invoke, /* is_high */ false); + CreateOneBitLocations(allocator_, invoke, /* is_high= */ false); } void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) { - GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ true); + GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ true); } static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -2869,7 +2577,7 @@ void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* } void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); + GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false); } void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { @@ -2877,7 +2585,7 @@ void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* inv } void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); + GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true); } static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -2937,7 +2645,7 @@ void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* } void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); + GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false); } void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { @@ -2945,7 +2653,7 @@ void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* in } void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); + GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true); } void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) { @@ -2958,58 +2666,49 @@ void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); X86_64Assembler* assembler = GetAssembler(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); InvokeRuntimeCallingConvention calling_convention; - if (invoke->InputAt(0)->IsConstant()) { + CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0)); + if (invoke->InputAt(0)->IsIntConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ movl(out, Immediate(static_cast<int32_t>(address))); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ movl(argument, Immediate(static_cast<int32_t>(address))); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ movl(Address(out, info.value_offset), Immediate(value)); } } else { + DCHECK(locations->CanCall()); CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>(); // Check bounds of our cache. __ leal(out, Address(in, -info.low)); - __ cmpl(out, Immediate(info.high - info.low + 1)); + __ cmpl(out, Immediate(info.length)); NearLabel allocate, done; __ j(kAboveEqual, &allocate); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - if (data_offset + address <= std::numeric_limits<int32_t>::max()) { - __ movl(out, Address(out, TIMES_4, data_offset + address)); - } else { - CpuRegister temp = CpuRegister(calling_convention.GetRegisterAt(0)); - __ movl(temp, Immediate(static_cast<int32_t>(data_offset + address))); - __ movl(out, Address(temp, out, TIMES_4, 0)); - } + DCHECK_NE(out.AsRegister(), argument.AsRegister()); + codegen_->LoadBootImageAddress(argument, info.array_data_boot_image_reference); + static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>), + "Check heap reference size."); + __ movl(out, Address(argument, out, TIMES_4, 0)); __ MaybeUnpoisonHeapReference(out); __ jmp(&done); __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0)); - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ movl(argument, Immediate(static_cast<int32_t>(address))); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ movl(Address(out, info.value_offset), in); __ Bind(&done); } @@ -3025,7 +2724,7 @@ void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) { X86_64Assembler* assembler = GetAssembler(); CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>(); Address address = Address::Absolute - (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip */ true); + (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip= */ true); NearLabel done; __ gs()->movl(out, address); __ testl(out, out); @@ -3046,6 +2745,9 @@ void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence(HInvoke* invoke ATTRIB UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite) +UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update) +UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateBytes) +UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h index 5cb601edfe..199cfede1a 100644 --- a/compiler/optimizing/intrinsics_x86_64.h +++ b/compiler/optimizing/intrinsics_x86_64.h @@ -30,14 +30,14 @@ namespace x86_64 { class CodeGeneratorX86_64; class X86_64Assembler; -class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor { +class IntrinsicLocationsBuilderX86_64 final : public IntrinsicVisitor { public: explicit IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen); // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST @@ -55,14 +55,14 @@ class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor { DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64); }; -class IntrinsicCodeGeneratorX86_64 FINAL : public IntrinsicVisitor { +class IntrinsicCodeGeneratorX86_64 final : public IntrinsicVisitor { public: explicit IntrinsicCodeGeneratorX86_64(CodeGeneratorX86_64* codegen) : codegen_(codegen) {} // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index d3a0376e9c..0edb23b857 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -78,7 +78,8 @@ static void UpdateLoopPhisIn(HEnvironment* environment, HLoopInformation* info) } } -void LICM::Run() { +bool LICM::Run() { + bool didLICM = false; DCHECK(side_effects_.HasRun()); // Only used during debug. @@ -157,6 +158,7 @@ void LICM::Run() { } instruction->MoveBefore(pre_header->GetLastInstruction()); MaybeRecordStat(stats_, MethodCompilationStat::kLoopInvariantMoved); + didLICM = true; } if (!can_move && (instruction->CanThrow() || instruction->DoesAnyWrite())) { @@ -167,6 +169,7 @@ void LICM::Run() { } } } + return didLICM; } } // namespace art diff --git a/compiler/optimizing/licm.h b/compiler/optimizing/licm.h index ee567aeb20..9cafddb05a 100644 --- a/compiler/optimizing/licm.h +++ b/compiler/optimizing/licm.h @@ -33,7 +33,7 @@ class LICM : public HOptimization { : HOptimization(graph, name, stats), side_effects_(side_effects) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kLoopInvariantCodeMotionPassName = "licm"; diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index 9fa5b74c62..50bfe843b5 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -16,11 +16,9 @@ #include <fstream> -#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" -#include "code_generator_x86.h" #include "dex/dex_file.h" #include "dex/dex_instruction.h" #include "driver/compiler_options.h" @@ -43,10 +41,8 @@ template <size_t number_of_blocks> void LinearizeTest::TestCode(const std::vector<uint16_t>& data, const uint32_t (&expected_order)[number_of_blocks]) { HGraph* graph = CreateCFG(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); ASSERT_EQ(graph->GetLinearOrder().size(), number_of_blocks); diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 66660662e4..60f513ca48 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -14,11 +14,9 @@ * limitations under the License. */ -#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" -#include "code_generator_x86.h" #include "dex/dex_file.h" #include "dex/dex_instruction.h" #include "driver/compiler_options.h" @@ -40,7 +38,7 @@ HGraph* LiveRangesTest::BuildGraph(const std::vector<uint16_t>& data) { // on how instructions are ordered. RemoveSuspendChecks(graph); // `Inline` conditions into ifs. - PrepareForRegisterAllocation(graph).Run(); + PrepareForRegisterAllocation(graph, *compiler_options_).Run(); return graph; } @@ -63,10 +61,8 @@ TEST_F(LiveRangesTest, CFG1) { HGraph* graph = BuildGraph(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); @@ -109,10 +105,8 @@ TEST_F(LiveRangesTest, CFG2) { Instruction::RETURN | 0 << 8); HGraph* graph = BuildGraph(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); @@ -158,10 +152,8 @@ TEST_F(LiveRangesTest, CFG3) { Instruction::RETURN | 0 << 8); HGraph* graph = BuildGraph(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); // Test for the 4 constant. @@ -235,10 +227,8 @@ TEST_F(LiveRangesTest, Loop1) { HGraph* graph = BuildGraph(data); RemoveSuspendChecks(graph); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); // Test for the 0 constant. @@ -312,10 +302,8 @@ TEST_F(LiveRangesTest, Loop2) { Instruction::RETURN | 0 << 8); HGraph* graph = BuildGraph(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); // Test for the 0 constant. @@ -388,10 +376,8 @@ TEST_F(LiveRangesTest, CFG4) { Instruction::RETURN); HGraph* graph = BuildGraph(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); // Test for the 0 constant. diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 6621a03568..f11f7a9779 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -14,11 +14,9 @@ * limitations under the License. */ -#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" -#include "code_generator_x86.h" #include "dex/dex_file.h" #include "dex/dex_instruction.h" #include "driver/compiler_options.h" @@ -49,11 +47,9 @@ static void DumpBitVector(BitVector* vector, void LivenessTest::TestCode(const std::vector<uint16_t>& data, const char* expected) { HGraph* graph = CreateCFG(data); // `Inline` conditions into ifs. - PrepareForRegisterAllocation(graph).Run(); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + PrepareForRegisterAllocation(graph, *compiler_options_).Run(); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); std::ostringstream buffer; diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc index 8b1812a6de..7d7bb94933 100644 --- a/compiler/optimizing/load_store_analysis.cc +++ b/compiler/optimizing/load_store_analysis.cc @@ -152,7 +152,7 @@ bool HeapLocationCollector::CanArrayElementsAlias(const HInstruction* idx1, return true; } -void LoadStoreAnalysis::Run() { +bool LoadStoreAnalysis::Run() { for (HBasicBlock* block : graph_->GetReversePostOrder()) { heap_location_collector_.VisitBasicBlock(block); } @@ -160,22 +160,23 @@ void LoadStoreAnalysis::Run() { if (heap_location_collector_.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) { // Bail out if there are too many heap locations to deal with. heap_location_collector_.CleanUp(); - return; + return false; } if (!heap_location_collector_.HasHeapStores()) { // Without heap stores, this pass would act mostly as GVN on heap accesses. heap_location_collector_.CleanUp(); - return; + return false; } if (heap_location_collector_.HasVolatile() || heap_location_collector_.HasMonitorOps()) { // Don't do load/store elimination if the method has volatile field accesses or // monitor operations, for now. // TODO: do it right. heap_location_collector_.CleanUp(); - return; + return false; } heap_location_collector_.BuildAliasingMatrix(); + return true; } } // namespace art diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h index 437e6be418..08d9309a3e 100644 --- a/compiler/optimizing/load_store_analysis.h +++ b/compiler/optimizing/load_store_analysis.h @@ -94,11 +94,13 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> { static constexpr int16_t kDeclaringClassDefIndexForArrays = -1; HeapLocation(ReferenceInfo* ref_info, + DataType::Type type, size_t offset, HInstruction* index, size_t vector_length, int16_t declaring_class_def_index) : ref_info_(ref_info), + type_(DataType::ToSigned(type)), offset_(offset), index_(index), vector_length_(vector_length), @@ -116,6 +118,7 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> { } ReferenceInfo* GetReferenceInfo() const { return ref_info_; } + DataType::Type GetType() const { return type_; } size_t GetOffset() const { return offset_; } HInstruction* GetIndex() const { return index_; } size_t GetVectorLength() const { return vector_length_; } @@ -149,6 +152,10 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> { private: // Reference for instance/static field, array element or vector data. ReferenceInfo* const ref_info_; + // Type of data residing at HeapLocation (always signed for integral + // data since e.g. a[i] and a[i] & 0xff are represented by differently + // signed types; char vs short are disambiguated through the reference). + const DataType::Type type_; // Offset of static/instance field. // Invalid when this HeapLocation is not field. const size_t offset_; @@ -237,19 +244,31 @@ class HeapLocationCollector : public HGraphVisitor { DCHECK(object != nullptr); DCHECK(field != nullptr); return FindHeapLocationIndex(FindReferenceInfoOf(HuntForOriginalReference(object)), + field->GetFieldType(), field->GetFieldOffset().SizeValue(), nullptr, HeapLocation::kScalar, field->GetDeclaringClassDefIndex()); } - size_t GetArrayHeapLocation(HInstruction* array, - HInstruction* index, - size_t vector_length = HeapLocation::kScalar) const { - DCHECK(array != nullptr); - DCHECK(index != nullptr); - DCHECK_GE(vector_length, HeapLocation::kScalar); + size_t GetArrayHeapLocation(HInstruction* instruction) const { + DCHECK(instruction != nullptr); + HInstruction* array = instruction->InputAt(0); + HInstruction* index = instruction->InputAt(1); + DataType::Type type = instruction->GetType(); + size_t vector_length = HeapLocation::kScalar; + if (instruction->IsArraySet()) { + type = instruction->AsArraySet()->GetComponentType(); + } else if (instruction->IsVecStore() || + instruction->IsVecLoad()) { + HVecOperation* vec_op = instruction->AsVecOperation(); + type = vec_op->GetPackedType(); + vector_length = vec_op->GetVectorLength(); + } else { + DCHECK(instruction->IsArrayGet()); + } return FindHeapLocationIndex(FindReferenceInfoOf(HuntForOriginalReference(array)), + type, HeapLocation::kInvalidFieldOffset, index, vector_length, @@ -279,13 +298,16 @@ class HeapLocationCollector : public HGraphVisitor { // In later analysis, ComputeMayAlias() and MayAlias() compute and tell whether // these indexes alias. size_t FindHeapLocationIndex(ReferenceInfo* ref_info, + DataType::Type type, size_t offset, HInstruction* index, size_t vector_length, int16_t declaring_class_def_index) const { + DataType::Type lookup_type = DataType::ToSigned(type); for (size_t i = 0; i < heap_locations_.size(); i++) { HeapLocation* loc = heap_locations_[i]; if (loc->GetReferenceInfo() == ref_info && + loc->GetType() == lookup_type && loc->GetOffset() == offset && loc->GetIndex() == index && loc->GetVectorLength() == vector_length && @@ -425,6 +447,7 @@ class HeapLocationCollector : public HGraphVisitor { } HeapLocation* GetOrCreateHeapLocation(HInstruction* ref, + DataType::Type type, size_t offset, HInstruction* index, size_t vector_length, @@ -432,10 +455,10 @@ class HeapLocationCollector : public HGraphVisitor { HInstruction* original_ref = HuntForOriginalReference(ref); ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref); size_t heap_location_idx = FindHeapLocationIndex( - ref_info, offset, index, vector_length, declaring_class_def_index); + ref_info, type, offset, index, vector_length, declaring_class_def_index); if (heap_location_idx == kHeapLocationNotFound) { HeapLocation* heap_loc = new (GetGraph()->GetAllocator()) - HeapLocation(ref_info, offset, index, vector_length, declaring_class_def_index); + HeapLocation(ref_info, type, offset, index, vector_length, declaring_class_def_index); heap_locations_.push_back(heap_loc); return heap_loc; } @@ -446,29 +469,35 @@ class HeapLocationCollector : public HGraphVisitor { if (field_info.IsVolatile()) { has_volatile_ = true; } + DataType::Type type = field_info.GetFieldType(); const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); const size_t offset = field_info.GetFieldOffset().SizeValue(); return GetOrCreateHeapLocation(ref, + type, offset, nullptr, HeapLocation::kScalar, declaring_class_def_index); } - void VisitArrayAccess(HInstruction* array, HInstruction* index, size_t vector_length) { + void VisitArrayAccess(HInstruction* array, + HInstruction* index, + DataType::Type type, + size_t vector_length) { GetOrCreateHeapLocation(array, + type, HeapLocation::kInvalidFieldOffset, index, vector_length, HeapLocation::kDeclaringClassDefIndexForArrays); } - void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE { + void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override { VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); CreateReferenceInfoForReferenceType(instruction); } - void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { + void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override { HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); has_heap_stores_ = true; if (location->GetReferenceInfo()->IsSingleton()) { @@ -494,12 +523,12 @@ class HeapLocationCollector : public HGraphVisitor { } } - void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { + void VisitStaticFieldGet(HStaticFieldGet* instruction) override { VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); CreateReferenceInfoForReferenceType(instruction); } - void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { + void VisitStaticFieldSet(HStaticFieldSet* instruction) override { VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); has_heap_stores_ = true; } @@ -507,35 +536,39 @@ class HeapLocationCollector : public HGraphVisitor { // We intentionally don't collect HUnresolvedInstanceField/HUnresolvedStaticField accesses // since we cannot accurately track the fields. - void VisitArrayGet(HArrayGet* instruction) OVERRIDE { + void VisitArrayGet(HArrayGet* instruction) override { HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); - VisitArrayAccess(array, index, HeapLocation::kScalar); + DataType::Type type = instruction->GetType(); + VisitArrayAccess(array, index, type, HeapLocation::kScalar); CreateReferenceInfoForReferenceType(instruction); } - void VisitArraySet(HArraySet* instruction) OVERRIDE { + void VisitArraySet(HArraySet* instruction) override { HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); - VisitArrayAccess(array, index, HeapLocation::kScalar); + DataType::Type type = instruction->GetComponentType(); + VisitArrayAccess(array, index, type, HeapLocation::kScalar); has_heap_stores_ = true; } - void VisitVecLoad(HVecLoad* instruction) OVERRIDE { + void VisitVecLoad(HVecLoad* instruction) override { HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); - VisitArrayAccess(array, index, instruction->GetVectorLength()); + DataType::Type type = instruction->GetPackedType(); + VisitArrayAccess(array, index, type, instruction->GetVectorLength()); CreateReferenceInfoForReferenceType(instruction); } - void VisitVecStore(HVecStore* instruction) OVERRIDE { + void VisitVecStore(HVecStore* instruction) override { HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); - VisitArrayAccess(array, index, instruction->GetVectorLength()); + DataType::Type type = instruction->GetPackedType(); + VisitArrayAccess(array, index, type, instruction->GetVectorLength()); has_heap_stores_ = true; } - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { // Any new-instance or new-array cannot alias with references that // pre-exist the new-instance/new-array. We append entries into // ref_info_array_ which keeps track of the order of creation @@ -547,7 +580,7 @@ class HeapLocationCollector : public HGraphVisitor { CreateReferenceInfoForReferenceType(instruction); } - void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE { + void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) override { has_monitor_operations_ = true; } @@ -572,7 +605,7 @@ class LoadStoreAnalysis : public HOptimization { return heap_location_collector_; } - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kLoadStoreAnalysisPassName = "load_store_analysis"; diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc index 56361a8c90..bfe7a4f72f 100644 --- a/compiler/optimizing/load_store_analysis_test.cc +++ b/compiler/optimizing/load_store_analysis_test.cc @@ -78,12 +78,16 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) { // Test queries on HeapLocationCollector's ref info and index records. ReferenceInfo* ref = heap_location_collector.FindReferenceInfoOf(array); + DataType::Type type = DataType::Type::kInt32; size_t field = HeapLocation::kInvalidFieldOffset; size_t vec = HeapLocation::kScalar; size_t class_def = HeapLocation::kDeclaringClassDefIndexForArrays; - size_t loc1 = heap_location_collector.FindHeapLocationIndex(ref, field, c1, vec, class_def); - size_t loc2 = heap_location_collector.FindHeapLocationIndex(ref, field, c2, vec, class_def); - size_t loc3 = heap_location_collector.FindHeapLocationIndex(ref, field, index, vec, class_def); + size_t loc1 = heap_location_collector.FindHeapLocationIndex( + ref, type, field, c1, vec, class_def); + size_t loc2 = heap_location_collector.FindHeapLocationIndex( + ref, type, field, c2, vec, class_def); + size_t loc3 = heap_location_collector.FindHeapLocationIndex( + ref, type, field, index, vec, class_def); // must find this reference info for array in HeapLocationCollector. ASSERT_TRUE(ref != nullptr); // must find these heap locations; @@ -246,28 +250,28 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) { size_t loc2 = HeapLocationCollector::kHeapLocationNotFound; // Test alias: array[0] and array[1] - loc1 = heap_location_collector.GetArrayHeapLocation(array, c0); - loc2 = heap_location_collector.GetArrayHeapLocation(array, c1); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set1); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set2); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+0] and array[i-0] - loc1 = heap_location_collector.GetArrayHeapLocation(array, add0); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub0); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set3); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set5); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+1] and array[i-1] - loc1 = heap_location_collector.GetArrayHeapLocation(array, add1); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub1); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set4); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set6); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+1] and array[1-i] - loc1 = heap_location_collector.GetArrayHeapLocation(array, add1); - loc2 = heap_location_collector.GetArrayHeapLocation(array, rev_sub1); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set4); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set7); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+1] and array[i-(-1)] - loc1 = heap_location_collector.GetArrayHeapLocation(array, add1); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_neg1); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set4); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set8); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); } @@ -409,70 +413,75 @@ TEST_F(LoadStoreAnalysisTest, ArrayAliasingTest) { size_t loc1, loc2; // Test alias: array[0] and array[0,1,2,3] - loc1 = heap_location_collector.GetArrayHeapLocation(array, c0); - loc2 = heap_location_collector.GetArrayHeapLocation(array, c0, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_0); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_0); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); + // Test alias: array[0] and array[1,2,3,4] + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_0); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_1); + ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); + // Test alias: array[0] and array[8,9,10,11] - loc1 = heap_location_collector.GetArrayHeapLocation(array, c0); - loc2 = heap_location_collector.GetArrayHeapLocation(array, c8, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_0); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_8); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[1] and array[8,9,10,11] - loc1 = heap_location_collector.GetArrayHeapLocation(array, c1); - loc2 = heap_location_collector.GetArrayHeapLocation(array, c8, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_1); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_8); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[1] and array[0,1,2,3] - loc1 = heap_location_collector.GetArrayHeapLocation(array, c1); - loc2 = heap_location_collector.GetArrayHeapLocation(array, c0, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_1); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_0); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[0,1,2,3] and array[8,9,10,11] - loc1 = heap_location_collector.GetArrayHeapLocation(array, c0, 4); - loc2 = heap_location_collector.GetArrayHeapLocation(array, c8, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(vstore_0); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_8); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[0,1,2,3] and array[1,2,3,4] - loc1 = heap_location_collector.GetArrayHeapLocation(array, c1, 4); - loc2 = heap_location_collector.GetArrayHeapLocation(array, c0, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(vstore_0); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_1); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[0] and array[i,i+1,i+2,i+3] - loc1 = heap_location_collector.GetArrayHeapLocation(array, c0); - loc2 = heap_location_collector.GetArrayHeapLocation(array, index, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_0); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i] and array[0,1,2,3] - loc1 = heap_location_collector.GetArrayHeapLocation(array, index); - loc2 = heap_location_collector.GetArrayHeapLocation(array, c0, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_0); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i] and array[i,i+1,i+2,i+3] - loc1 = heap_location_collector.GetArrayHeapLocation(array, index); - loc2 = heap_location_collector.GetArrayHeapLocation(array, index, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i] and array[i+8,i+9,i+10,i+11] - loc1 = heap_location_collector.GetArrayHeapLocation(array, index); - loc2 = heap_location_collector.GetArrayHeapLocation(array, i_add8, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i_add8); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+6,i+7,i+8,i+9] and array[i+8,i+9,i+10,i+11] // Test partial overlap. - loc1 = heap_location_collector.GetArrayHeapLocation(array, i_add6, 4); - loc2 = heap_location_collector.GetArrayHeapLocation(array, i_add8, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(vstore_i_add6); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i_add8); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+6,i+7] and array[i,i+1,i+2,i+3] // Test different vector lengths. - loc1 = heap_location_collector.GetArrayHeapLocation(array, i_add6, 2); - loc2 = heap_location_collector.GetArrayHeapLocation(array, index, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(vstore_i_add6_vlen2); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+6,i+7] and array[i+8,i+9,i+10,i+11] - loc1 = heap_location_collector.GetArrayHeapLocation(array, i_add6, 2); - loc2 = heap_location_collector.GetArrayHeapLocation(array, i_add8, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(vstore_i_add6_vlen2); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i_add8); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); } @@ -563,33 +572,33 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexCalculationOverflowTest) { size_t loc2 = HeapLocationCollector::kHeapLocationNotFound; // Test alias: array[i+0x80000000] and array[i-0x80000000] - loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0x80000000); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000000); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_1); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_2); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+0x10] and array[i-0xFFFFFFF0] - loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0x10); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0xFFFFFFF0); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_3); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_4); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+0x7FFFFFFF] and array[i-0x80000001] - loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0x7FFFFFFF); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000001); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_5); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_6); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+0] and array[i-0] - loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_7); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_8); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Should not alias: - loc1 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000000); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000001); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_2); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_6); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Should not alias: - loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000000); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_7); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_2); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); } @@ -647,10 +656,10 @@ TEST_F(LoadStoreAnalysisTest, TestHuntOriginalRef) { // times the original reference has been transformed by BoundType, // NullCheck, IntermediateAddress, etc. ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 1U); - size_t loc1 = heap_location_collector.GetArrayHeapLocation(array, c1); - size_t loc2 = heap_location_collector.GetArrayHeapLocation(bound_type, c1); - size_t loc3 = heap_location_collector.GetArrayHeapLocation(null_check, c1); - size_t loc4 = heap_location_collector.GetArrayHeapLocation(inter_addr, c1); + size_t loc1 = heap_location_collector.GetArrayHeapLocation(array_get1); + size_t loc2 = heap_location_collector.GetArrayHeapLocation(array_get2); + size_t loc3 = heap_location_collector.GetArrayHeapLocation(array_get3); + size_t loc4 = heap_location_collector.GetArrayHeapLocation(array_get4); ASSERT_TRUE(loc1 != HeapLocationCollector::kHeapLocationNotFound); ASSERT_EQ(loc1, loc2); ASSERT_EQ(loc1, loc3); diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 237ecd3c10..b33d0f488e 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -107,7 +107,7 @@ class LSEVisitor : public HGraphDelegateVisitor { singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)) { } - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + void VisitBasicBlock(HBasicBlock* block) override { // Populate the heap_values array for this block. // TODO: try to reuse the heap_values array from one predecessor if possible. if (block->IsLoopHeader()) { @@ -160,7 +160,7 @@ class LSEVisitor : public HGraphDelegateVisitor { // Scan the list of removed loads to see if we can reuse `type_conversion`, if // the other removed load has the same substitute and type and is dominated - // by `type_conversioni`. + // by `type_conversion`. void TryToReuseTypeConversion(HInstruction* type_conversion, size_t index) { size_t size = removed_loads_.size(); HInstruction* load = removed_loads_[index]; @@ -458,8 +458,13 @@ class LSEVisitor : public HGraphDelegateVisitor { } if (from_all_predecessors) { if (ref_info->IsSingletonAndRemovable() && - block->IsSingleReturnOrReturnVoidAllowingPhis()) { - // Values in the singleton are not needed anymore. + (block->IsSingleReturnOrReturnVoidAllowingPhis() || + (block->EndsWithReturn() && (merged_value != kUnknownHeapValue || + merged_store_value != kUnknownHeapValue)))) { + // Values in the singleton are not needed anymore: + // (1) if this block consists of a sole return, or + // (2) if this block returns and a usable merged value is obtained + // (loads prior to the return will always use that value). } else if (!IsStore(merged_value)) { // We don't track merged value as a store anymore. We have to // hold the stores in predecessors live here. @@ -542,16 +547,7 @@ class LSEVisitor : public HGraphDelegateVisitor { } } - void VisitGetLocation(HInstruction* instruction, - HInstruction* ref, - size_t offset, - HInstruction* index, - size_t vector_length, - int16_t declaring_class_def_index) { - HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref); - ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref); - size_t idx = heap_location_collector_.FindHeapLocationIndex( - ref_info, offset, index, vector_length, declaring_class_def_index); + void VisitGetLocation(HInstruction* instruction, size_t idx) { DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound); ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; @@ -569,23 +565,7 @@ class LSEVisitor : public HGraphDelegateVisitor { heap_values[idx] = instruction; KeepStoresIfAliasedToLocation(heap_values, idx); } else { - if (DataType::Kind(heap_value->GetType()) != DataType::Kind(instruction->GetType())) { - // The only situation where the same heap location has different type is when - // we do an array get on an instruction that originates from the null constant - // (the null could be behind a field access, an array access, a null check or - // a bound type). - // In order to stay properly typed on primitive types, we do not eliminate - // the array gets. - if (kIsDebugBuild) { - DCHECK(heap_value->IsArrayGet()) << heap_value->DebugName(); - DCHECK(instruction->IsArrayGet()) << instruction->DebugName(); - } - // Load isn't eliminated. Put the load as the value into the HeapLocation. - // This acts like GVN but with better aliasing analysis. - heap_values[idx] = instruction; - KeepStoresIfAliasedToLocation(heap_values, idx); - return; - } + // Load is eliminated. AddRemovedLoad(instruction, heap_value); TryRemovingNullCheck(instruction); } @@ -610,21 +590,11 @@ class LSEVisitor : public HGraphDelegateVisitor { return false; } - void VisitSetLocation(HInstruction* instruction, - HInstruction* ref, - size_t offset, - HInstruction* index, - size_t vector_length, - int16_t declaring_class_def_index, - HInstruction* value) { + void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) { + DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound); DCHECK(!IsStore(value)) << value->DebugName(); // value may already have a substitute. value = FindSubstitute(value); - HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref); - ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref); - size_t idx = heap_location_collector_.FindHeapLocationIndex( - ref_info, offset, index, vector_length, declaring_class_def_index); - DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound); ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; HInstruction* heap_value = heap_values[idx]; @@ -644,7 +614,8 @@ class LSEVisitor : public HGraphDelegateVisitor { } else if (!loop_info->IsIrreducible()) { // instruction is a store in the loop so the loop must do write. DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite()); - if (ref_info->IsSingleton() && !loop_info->IsDefinedOutOfTheLoop(original_ref)) { + ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo(); + if (ref_info->IsSingleton() && !loop_info->IsDefinedOutOfTheLoop(ref_info->GetReference())) { // original_ref is created inside the loop. Value stored to it isn't needed at // the loop header. This is true for outer loops also. possibly_redundant = true; @@ -685,83 +656,43 @@ class LSEVisitor : public HGraphDelegateVisitor { } } - void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE { - HInstruction* obj = instruction->InputAt(0); - size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue(); - int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex(); - VisitGetLocation(instruction, - obj, - offset, - nullptr, - HeapLocation::kScalar, - declaring_class_def_index); + void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override { + HInstruction* object = instruction->InputAt(0); + const FieldInfo& field = instruction->GetFieldInfo(); + VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field)); } - void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { - HInstruction* obj = instruction->InputAt(0); - size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue(); - int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex(); + void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override { + HInstruction* object = instruction->InputAt(0); + const FieldInfo& field = instruction->GetFieldInfo(); HInstruction* value = instruction->InputAt(1); - VisitSetLocation(instruction, - obj, - offset, - nullptr, - HeapLocation::kScalar, - declaring_class_def_index, - value); + size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field); + VisitSetLocation(instruction, idx, value); } - void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { + void VisitStaticFieldGet(HStaticFieldGet* instruction) override { HInstruction* cls = instruction->InputAt(0); - size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue(); - int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex(); - VisitGetLocation(instruction, - cls, - offset, - nullptr, - HeapLocation::kScalar, - declaring_class_def_index); + const FieldInfo& field = instruction->GetFieldInfo(); + VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field)); } - void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { + void VisitStaticFieldSet(HStaticFieldSet* instruction) override { HInstruction* cls = instruction->InputAt(0); - size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue(); - int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex(); - HInstruction* value = instruction->InputAt(1); - VisitSetLocation(instruction, - cls, - offset, - nullptr, - HeapLocation::kScalar, - declaring_class_def_index, - value); - } - - void VisitArrayGet(HArrayGet* instruction) OVERRIDE { - HInstruction* array = instruction->InputAt(0); - HInstruction* index = instruction->InputAt(1); - VisitGetLocation(instruction, - array, - HeapLocation::kInvalidFieldOffset, - index, - HeapLocation::kScalar, - HeapLocation::kDeclaringClassDefIndexForArrays); - } - - void VisitArraySet(HArraySet* instruction) OVERRIDE { - HInstruction* array = instruction->InputAt(0); - HInstruction* index = instruction->InputAt(1); - HInstruction* value = instruction->InputAt(2); - VisitSetLocation(instruction, - array, - HeapLocation::kInvalidFieldOffset, - index, - HeapLocation::kScalar, - HeapLocation::kDeclaringClassDefIndexForArrays, - value); - } - - void VisitDeoptimize(HDeoptimize* instruction) { + const FieldInfo& field = instruction->GetFieldInfo(); + size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field); + VisitSetLocation(instruction, idx, instruction->InputAt(1)); + } + + void VisitArrayGet(HArrayGet* instruction) override { + VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction)); + } + + void VisitArraySet(HArraySet* instruction) override { + size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction); + VisitSetLocation(instruction, idx, instruction->InputAt(2)); + } + + void VisitDeoptimize(HDeoptimize* instruction) override { const ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; for (HInstruction* heap_value : heap_values) { @@ -812,15 +743,15 @@ class LSEVisitor : public HGraphDelegateVisitor { } } - void VisitReturn(HReturn* instruction) OVERRIDE { + void VisitReturn(HReturn* instruction) override { HandleExit(instruction->GetBlock()); } - void VisitReturnVoid(HReturnVoid* return_void) OVERRIDE { + void VisitReturnVoid(HReturnVoid* return_void) override { HandleExit(return_void->GetBlock()); } - void VisitThrow(HThrow* throw_instruction) OVERRIDE { + void VisitThrow(HThrow* throw_instruction) override { HandleExit(throw_instruction->GetBlock()); } @@ -846,35 +777,35 @@ class LSEVisitor : public HGraphDelegateVisitor { } } - void VisitInvoke(HInvoke* invoke) OVERRIDE { + void VisitInvoke(HInvoke* invoke) override { HandleInvoke(invoke); } - void VisitClinitCheck(HClinitCheck* clinit) OVERRIDE { + void VisitClinitCheck(HClinitCheck* clinit) override { HandleInvoke(clinit); } - void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) OVERRIDE { + void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } - void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) OVERRIDE { + void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } - void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) OVERRIDE { + void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } - void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) OVERRIDE { + void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } - void VisitNewInstance(HNewInstance* new_instance) OVERRIDE { + void VisitNewInstance(HNewInstance* new_instance) override { ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance); if (ref_info == nullptr) { // new_instance isn't used for field accesses. No need to process it. @@ -898,7 +829,7 @@ class LSEVisitor : public HGraphDelegateVisitor { } } - void VisitNewArray(HNewArray* new_array) OVERRIDE { + void VisitNewArray(HNewArray* new_array) override { ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array); if (ref_info == nullptr) { // new_array isn't used for array accesses. No need to process it. @@ -948,22 +879,22 @@ class LSEVisitor : public HGraphDelegateVisitor { DISALLOW_COPY_AND_ASSIGN(LSEVisitor); }; -void LoadStoreElimination::Run() { +bool LoadStoreElimination::Run() { if (graph_->IsDebuggable() || graph_->HasTryCatch()) { // Debugger may set heap values or trigger deoptimization of callers. // Try/catch support not implemented yet. // Skip this optimization. - return; + return false; } const HeapLocationCollector& heap_location_collector = lsa_.GetHeapLocationCollector(); if (heap_location_collector.GetNumberOfHeapLocations() == 0) { // No HeapLocation information from LSA, skip this optimization. - return; + return false; } // TODO: analyze VecLoad/VecStore better. if (graph_->HasSIMD()) { - return; + return false; } LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_, stats_); @@ -971,6 +902,8 @@ void LoadStoreElimination::Run() { lse_visitor.VisitBasicBlock(block); } lse_visitor.RemoveInstructions(); + + return true; } } // namespace art diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h index 7153541baf..f7ba41a1af 100644 --- a/compiler/optimizing/load_store_elimination.h +++ b/compiler/optimizing/load_store_elimination.h @@ -35,7 +35,7 @@ class LoadStoreElimination : public HOptimization { side_effects_(side_effects), lsa_(lsa) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kLoadStoreEliminationPassName = "load_store_elimination"; diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc new file mode 100644 index 0000000000..2ae3683ffa --- /dev/null +++ b/compiler/optimizing/loop_analysis.cc @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loop_analysis.h" + +#include "base/bit_vector-inl.h" +#include "induction_var_range.h" + +namespace art { + +void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info, + LoopAnalysisInfo* analysis_results, + int64_t trip_count) { + analysis_results->trip_count_ = trip_count; + + for (HBlocksInLoopIterator block_it(*loop_info); + !block_it.Done(); + block_it.Advance()) { + HBasicBlock* block = block_it.Current(); + + // Check whether one of the successor is loop exit. + for (HBasicBlock* successor : block->GetSuccessors()) { + if (!loop_info->Contains(*successor)) { + analysis_results->exits_num_++; + + // We track number of invariant loop exits which correspond to HIf instruction and + // can be eliminated by loop peeling; other control flow instruction are ignored and will + // not cause loop peeling to happen as they either cannot be inside a loop, or by + // definition cannot be loop exits (unconditional instructions), or are not beneficial for + // the optimization. + HIf* hif = block->GetLastInstruction()->AsIf(); + if (hif != nullptr && !loop_info->Contains(*hif->InputAt(0)->GetBlock())) { + analysis_results->invariant_exits_num_++; + } + } + } + + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (it.Current()->GetType() == DataType::Type::kInt64) { + analysis_results->has_long_type_instructions_ = true; + } + if (MakesScalarPeelingUnrollingNonBeneficial(instruction)) { + analysis_results->has_instructions_preventing_scalar_peeling_ = true; + analysis_results->has_instructions_preventing_scalar_unrolling_ = true; + } + analysis_results->instr_num_++; + } + analysis_results->bb_num_++; + } +} + +int64_t LoopAnalysis::GetLoopTripCount(HLoopInformation* loop_info, + const InductionVarRange* induction_range) { + int64_t trip_count; + if (!induction_range->HasKnownTripCount(loop_info, &trip_count)) { + trip_count = LoopAnalysisInfo::kUnknownTripCount; + } + return trip_count; +} + +// Default implementation of loop helper; used for all targets unless a custom implementation +// is provided. Enables scalar loop peeling and unrolling with the most conservative heuristics. +class ArchDefaultLoopHelper : public ArchNoOptsLoopHelper { + public: + // Scalar loop unrolling parameters and heuristics. + // + // Maximum possible unrolling factor. + static constexpr uint32_t kScalarMaxUnrollFactor = 2; + // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled. + static constexpr uint32_t kScalarHeuristicMaxBodySizeInstr = 17; + // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled. + static constexpr uint32_t kScalarHeuristicMaxBodySizeBlocks = 6; + // Maximum number of instructions to be created as a result of full unrolling. + static constexpr uint32_t kScalarHeuristicFullyUnrolledMaxInstrThreshold = 35; + + bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* analysis_info) const override { + return analysis_info->HasLongTypeInstructions() || + IsLoopTooBig(analysis_info, + kScalarHeuristicMaxBodySizeInstr, + kScalarHeuristicMaxBodySizeBlocks); + } + + uint32_t GetScalarUnrollingFactor(const LoopAnalysisInfo* analysis_info) const override { + int64_t trip_count = analysis_info->GetTripCount(); + // Unroll only loops with known trip count. + if (trip_count == LoopAnalysisInfo::kUnknownTripCount) { + return LoopAnalysisInfo::kNoUnrollingFactor; + } + uint32_t desired_unrolling_factor = kScalarMaxUnrollFactor; + if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) { + return LoopAnalysisInfo::kNoUnrollingFactor; + } + + return desired_unrolling_factor; + } + + bool IsLoopPeelingEnabled() const override { return true; } + + bool IsFullUnrollingBeneficial(LoopAnalysisInfo* analysis_info) const override { + int64_t trip_count = analysis_info->GetTripCount(); + // We assume that trip count is known. + DCHECK_NE(trip_count, LoopAnalysisInfo::kUnknownTripCount); + size_t instr_num = analysis_info->GetNumberOfInstructions(); + return (trip_count * instr_num < kScalarHeuristicFullyUnrolledMaxInstrThreshold); + } + + protected: + bool IsLoopTooBig(LoopAnalysisInfo* loop_analysis_info, + size_t instr_threshold, + size_t bb_threshold) const { + size_t instr_num = loop_analysis_info->GetNumberOfInstructions(); + size_t bb_num = loop_analysis_info->GetNumberOfBasicBlocks(); + return (instr_num >= instr_threshold || bb_num >= bb_threshold); + } +}; + +// Custom implementation of loop helper for arm64 target. Enables heuristics for scalar loop +// peeling and unrolling and supports SIMD loop unrolling. +class Arm64LoopHelper : public ArchDefaultLoopHelper { + public: + // SIMD loop unrolling parameters and heuristics. + // + // Maximum possible unrolling factor. + static constexpr uint32_t kArm64SimdMaxUnrollFactor = 8; + // Loop's maximum instruction count. Loops with higher count will not be unrolled. + static constexpr uint32_t kArm64SimdHeuristicMaxBodySizeInstr = 50; + + // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled. + static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeInstr = 40; + // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled. + static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeBlocks = 8; + + bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* loop_analysis_info) const override { + return IsLoopTooBig(loop_analysis_info, + kArm64ScalarHeuristicMaxBodySizeInstr, + kArm64ScalarHeuristicMaxBodySizeBlocks); + } + + uint32_t GetSIMDUnrollingFactor(HBasicBlock* block, + int64_t trip_count, + uint32_t max_peel, + uint32_t vector_length) const override { + // Don't unroll with insufficient iterations. + // TODO: Unroll loops with unknown trip count. + DCHECK_NE(vector_length, 0u); + if (trip_count < (2 * vector_length + max_peel)) { + return LoopAnalysisInfo::kNoUnrollingFactor; + } + // Don't unroll for large loop body size. + uint32_t instruction_count = block->GetInstructions().CountSize(); + if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) { + return LoopAnalysisInfo::kNoUnrollingFactor; + } + // Find a beneficial unroll factor with the following restrictions: + // - At least one iteration of the transformed loop should be executed. + // - The loop body shouldn't be "too big" (heuristic). + + uint32_t uf1 = kArm64SimdHeuristicMaxBodySizeInstr / instruction_count; + uint32_t uf2 = (trip_count - max_peel) / vector_length; + uint32_t unroll_factor = + TruncToPowerOfTwo(std::min({uf1, uf2, kArm64SimdMaxUnrollFactor})); + DCHECK_GE(unroll_factor, 1u); + return unroll_factor; + } +}; + +ArchNoOptsLoopHelper* ArchNoOptsLoopHelper::Create(InstructionSet isa, + ArenaAllocator* allocator) { + switch (isa) { + case InstructionSet::kArm64: { + return new (allocator) Arm64LoopHelper; + } + default: { + return new (allocator) ArchDefaultLoopHelper; + } + } +} + +} // namespace art diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h new file mode 100644 index 0000000000..57509ee410 --- /dev/null +++ b/compiler/optimizing/loop_analysis.h @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_ +#define ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_ + +#include "nodes.h" + +namespace art { + +class InductionVarRange; +class LoopAnalysis; + +// Class to hold cached information on properties of the loop. +class LoopAnalysisInfo : public ValueObject { + public: + // No loop unrolling factor (just one copy of the loop-body). + static constexpr uint32_t kNoUnrollingFactor = 1; + // Used for unknown and non-constant trip counts (see InductionVarRange::HasKnownTripCount). + static constexpr int64_t kUnknownTripCount = -1; + + explicit LoopAnalysisInfo(HLoopInformation* loop_info) + : trip_count_(kUnknownTripCount), + bb_num_(0), + instr_num_(0), + exits_num_(0), + invariant_exits_num_(0), + has_instructions_preventing_scalar_peeling_(false), + has_instructions_preventing_scalar_unrolling_(false), + has_long_type_instructions_(false), + loop_info_(loop_info) {} + + int64_t GetTripCount() const { return trip_count_; } + size_t GetNumberOfBasicBlocks() const { return bb_num_; } + size_t GetNumberOfInstructions() const { return instr_num_; } + size_t GetNumberOfExits() const { return exits_num_; } + size_t GetNumberOfInvariantExits() const { return invariant_exits_num_; } + + bool HasInstructionsPreventingScalarPeeling() const { + return has_instructions_preventing_scalar_peeling_; + } + + bool HasInstructionsPreventingScalarUnrolling() const { + return has_instructions_preventing_scalar_unrolling_; + } + + bool HasInstructionsPreventingScalarOpts() const { + return HasInstructionsPreventingScalarPeeling() || HasInstructionsPreventingScalarUnrolling(); + } + + bool HasLongTypeInstructions() const { + return has_long_type_instructions_; + } + + HLoopInformation* GetLoopInfo() const { return loop_info_; } + + private: + // Trip count of the loop if known, kUnknownTripCount otherwise. + int64_t trip_count_; + // Number of basic blocks in the loop body. + size_t bb_num_; + // Number of instructions in the loop body. + size_t instr_num_; + // Number of loop's exits. + size_t exits_num_; + // Number of "if" loop exits (with HIf instruction) whose condition is loop-invariant. + size_t invariant_exits_num_; + // Whether the loop has instructions which make scalar loop peeling non-beneficial. + bool has_instructions_preventing_scalar_peeling_; + // Whether the loop has instructions which make scalar loop unrolling non-beneficial. + bool has_instructions_preventing_scalar_unrolling_; + // Whether the loop has instructions of primitive long type; unrolling these loop will + // likely introduce spill/fills on 32-bit targets. + bool has_long_type_instructions_; + + // Corresponding HLoopInformation. + HLoopInformation* loop_info_; + + friend class LoopAnalysis; +}; + +// Placeholder class for methods and routines used to analyse loops, calculate loop properties +// and characteristics. +class LoopAnalysis : public ValueObject { + public: + // Calculates loops basic properties like body size, exits number, etc. and fills + // 'analysis_results' with this information. + static void CalculateLoopBasicProperties(HLoopInformation* loop_info, + LoopAnalysisInfo* analysis_results, + int64_t trip_count); + + // Returns the trip count of the loop if it is known and kUnknownTripCount otherwise. + static int64_t GetLoopTripCount(HLoopInformation* loop_info, + const InductionVarRange* induction_range); + + private: + // Returns whether an instruction makes scalar loop peeling/unrolling non-beneficial. + // + // If in the loop body we have a dex/runtime call then its contribution to the whole + // loop performance will probably prevail. So peeling/unrolling optimization will not bring + // any noticeable performance improvement. It will increase the code size. + static bool MakesScalarPeelingUnrollingNonBeneficial(HInstruction* instruction) { + return (instruction->IsNewArray() || + instruction->IsNewInstance() || + instruction->IsUnresolvedInstanceFieldGet() || + instruction->IsUnresolvedInstanceFieldSet() || + instruction->IsUnresolvedStaticFieldGet() || + instruction->IsUnresolvedStaticFieldSet() || + // TODO: Support loops with intrinsified invokes. + instruction->IsInvoke()); + } +}; + +// +// Helper class which holds target-dependent methods and constants needed for loop optimizations. +// +// To support peeling/unrolling for a new architecture one needs to create new helper class, +// inherit it from this and add implementation for the following methods. +// +class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> { + public: + virtual ~ArchNoOptsLoopHelper() {} + + // Creates an instance of specialised helper for the target or default helper if the target + // doesn't support loop peeling and unrolling. + static ArchNoOptsLoopHelper* Create(InstructionSet isa, ArenaAllocator* allocator); + + // Returns whether the loop is not beneficial for loop peeling/unrolling. + // + // For example, if the loop body has too many instructions then peeling/unrolling optimization + // will not bring any noticeable performance improvement however will increase the code size. + // + // Returns 'true' by default, should be overridden by particular target loop helper. + virtual bool IsLoopNonBeneficialForScalarOpts( + LoopAnalysisInfo* loop_analysis_info ATTRIBUTE_UNUSED) const { return true; } + + // Returns optimal scalar unrolling factor for the loop. + // + // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper. + virtual uint32_t GetScalarUnrollingFactor( + const LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const { + return LoopAnalysisInfo::kNoUnrollingFactor; + } + + // Returns whether scalar loop peeling is enabled, + // + // Returns 'false' by default, should be overridden by particular target loop helper. + virtual bool IsLoopPeelingEnabled() const { return false; } + + // Returns whether it is beneficial to fully unroll the loop. + // + // Returns 'false' by default, should be overridden by particular target loop helper. + virtual bool IsFullUnrollingBeneficial(LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const { + return false; + } + + // Returns optimal SIMD unrolling factor for the loop. + // + // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper. + virtual uint32_t GetSIMDUnrollingFactor(HBasicBlock* block ATTRIBUTE_UNUSED, + int64_t trip_count ATTRIBUTE_UNUSED, + uint32_t max_peel ATTRIBUTE_UNUSED, + uint32_t vector_length ATTRIBUTE_UNUSED) const { + return LoopAnalysisInfo::kNoUnrollingFactor; + } +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_ diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 9f278a9f4e..6c76ab858b 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -23,7 +23,7 @@ #include "arch/mips64/instruction_set_features_mips64.h" #include "arch/x86/instruction_set_features_x86.h" #include "arch/x86_64/instruction_set_features_x86_64.h" -#include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "linear_order.h" #include "mirror/array-inl.h" #include "mirror/string.h" @@ -33,9 +33,6 @@ namespace art { // Enables vectorization (SIMDization) in the loop optimizer. static constexpr bool kEnableVectorization = true; -// No loop unrolling factor (just one copy of the loop-body). -static constexpr uint32_t kNoUnrollingFactor = 1; - // // Static helpers. // @@ -227,6 +224,7 @@ static bool IsNarrowerOperands(HInstruction* a, /*out*/ HInstruction** r, /*out*/ HInstruction** s, /*out*/ bool* is_unsigned) { + DCHECK(a != nullptr && b != nullptr); // Look for a matching sign extension. DataType::Type stype = HVecOperation::ToSignedType(type); if (IsSignExtensionAndGet(a, stype, r) && IsSignExtensionAndGet(b, stype, s)) { @@ -247,6 +245,7 @@ static bool IsNarrowerOperand(HInstruction* a, DataType::Type type, /*out*/ HInstruction** r, /*out*/ bool* is_unsigned) { + DCHECK(a != nullptr); // Look for a matching sign extension. DataType::Type stype = HVecOperation::ToSignedType(type); if (IsSignExtensionAndGet(a, stype, r)) { @@ -270,20 +269,28 @@ static uint32_t GetOtherVL(DataType::Type other_type, DataType::Type vector_type return vl >> (DataType::SizeShift(other_type) - DataType::SizeShift(vector_type)); } -// Detect up to two instructions a and b, and an acccumulated constant c. -static bool IsAddConstHelper(HInstruction* instruction, - /*out*/ HInstruction** a, - /*out*/ HInstruction** b, - /*out*/ int64_t* c, - int32_t depth) { - static constexpr int32_t kMaxDepth = 8; // don't search too deep +// Detect up to two added operands a and b and an acccumulated constant c. +static bool IsAddConst(HInstruction* instruction, + /*out*/ HInstruction** a, + /*out*/ HInstruction** b, + /*out*/ int64_t* c, + int32_t depth = 8) { // don't search too deep int64_t value = 0; + // Enter add/sub while still within reasonable depth. + if (depth > 0) { + if (instruction->IsAdd()) { + return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1) && + IsAddConst(instruction->InputAt(1), a, b, c, depth - 1); + } else if (instruction->IsSub() && + IsInt64AndGet(instruction->InputAt(1), &value)) { + *c -= value; + return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1); + } + } + // Otherwise, deal with leaf nodes. if (IsInt64AndGet(instruction, &value)) { *c += value; return true; - } else if (instruction->IsAdd() && depth <= kMaxDepth) { - return IsAddConstHelper(instruction->InputAt(0), a, b, c, depth + 1) && - IsAddConstHelper(instruction->InputAt(1), a, b, c, depth + 1); } else if (*a == nullptr) { *a = instruction; return true; @@ -291,42 +298,40 @@ static bool IsAddConstHelper(HInstruction* instruction, *b = instruction; return true; } - return false; // too many non-const operands + return false; // too many operands } -// Detect a + b + c for an optional constant c. -static bool IsAddConst(HInstruction* instruction, - /*out*/ HInstruction** a, - /*out*/ HInstruction** b, - /*out*/ int64_t* c) { - if (instruction->IsAdd()) { - // Try to find a + b and accumulated c. - if (IsAddConstHelper(instruction->InputAt(0), a, b, c, /*depth*/ 0) && - IsAddConstHelper(instruction->InputAt(1), a, b, c, /*depth*/ 0) && - *b != nullptr) { - return true; +// Detect a + b + c with optional constant c. +static bool IsAddConst2(HGraph* graph, + HInstruction* instruction, + /*out*/ HInstruction** a, + /*out*/ HInstruction** b, + /*out*/ int64_t* c) { + if (IsAddConst(instruction, a, b, c) && *a != nullptr) { + if (*b == nullptr) { + // Constant is usually already present, unless accumulated. + *b = graph->GetConstant(instruction->GetType(), (*c)); + *c = 0; } - // Found a + b. - *a = instruction->InputAt(0); - *b = instruction->InputAt(1); - *c = 0; return true; } return false; } -// Detect a + c for constant c. -static bool IsAddConst(HInstruction* instruction, - /*out*/ HInstruction** a, - /*out*/ int64_t* c) { - if (instruction->IsAdd()) { - if (IsInt64AndGet(instruction->InputAt(0), c)) { - *a = instruction->InputAt(1); - return true; - } else if (IsInt64AndGet(instruction->InputAt(1), c)) { - *a = instruction->InputAt(0); - return true; - } +// Detect a direct a - b or a hidden a - (-c). +static bool IsSubConst2(HGraph* graph, + HInstruction* instruction, + /*out*/ HInstruction** a, + /*out*/ HInstruction** b) { + int64_t c = 0; + if (instruction->IsSub()) { + *a = instruction->InputAt(0); + *b = instruction->InputAt(1); + return true; + } else if (IsAddConst(instruction, a, b, &c) && *a != nullptr && *b == nullptr) { + // Constant for the hidden subtraction. + *b = graph->GetConstant(instruction->GetType(), -c); + return true; } return false; } @@ -346,7 +351,10 @@ static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { // Translates vector operation to reduction kind. static HVecReduce::ReductionKind GetReductionKind(HVecOperation* reduction) { - if (reduction->IsVecAdd() || reduction->IsVecSub() || reduction->IsVecSADAccumulate()) { + if (reduction->IsVecAdd() || + reduction->IsVecSub() || + reduction->IsVecSADAccumulate() || + reduction->IsVecDotProd()) { return HVecReduce::kSum; } LOG(FATAL) << "Unsupported SIMD reduction " << reduction->GetId(); @@ -380,17 +388,82 @@ static bool CheckInductionSetFullyRemoved(ScopedArenaSet<HInstruction*>* iset) { return true; } +// Tries to statically evaluate condition of the specified "HIf" for other condition checks. +static void TryToEvaluateIfCondition(HIf* instruction, HGraph* graph) { + HInstruction* cond = instruction->InputAt(0); + + // If a condition 'cond' is evaluated in an HIf instruction then in the successors of the + // IF_BLOCK we statically know the value of the condition 'cond' (TRUE in TRUE_SUCC, FALSE in + // FALSE_SUCC). Using that we can replace another evaluation (use) EVAL of the same 'cond' + // with TRUE value (FALSE value) if every path from the ENTRY_BLOCK to EVAL_BLOCK contains the + // edge HIF_BLOCK->TRUE_SUCC (HIF_BLOCK->FALSE_SUCC). + // if (cond) { if(cond) { + // if (cond) {} if (1) {} + // } else { =======> } else { + // if (cond) {} if (0) {} + // } } + if (!cond->IsConstant()) { + HBasicBlock* true_succ = instruction->IfTrueSuccessor(); + HBasicBlock* false_succ = instruction->IfFalseSuccessor(); + + DCHECK_EQ(true_succ->GetPredecessors().size(), 1u); + DCHECK_EQ(false_succ->GetPredecessors().size(), 1u); + + const HUseList<HInstruction*>& uses = cond->GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { + HInstruction* user = it->GetUser(); + size_t index = it->GetIndex(); + HBasicBlock* user_block = user->GetBlock(); + // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). + ++it; + if (true_succ->Dominates(user_block)) { + user->ReplaceInput(graph->GetIntConstant(1), index); + } else if (false_succ->Dominates(user_block)) { + user->ReplaceInput(graph->GetIntConstant(0), index); + } + } + } +} + +// Peel the first 'count' iterations of the loop. +static void PeelByCount(HLoopInformation* loop_info, + int count, + InductionVarRange* induction_range) { + for (int i = 0; i < count; i++) { + // Perform peeling. + PeelUnrollSimpleHelper helper(loop_info, induction_range); + helper.DoPeeling(); + } +} + +// Returns the narrower type out of instructions a and b types. +static DataType::Type GetNarrowerType(HInstruction* a, HInstruction* b) { + DataType::Type type = a->GetType(); + if (DataType::Size(b->GetType()) < DataType::Size(type)) { + type = b->GetType(); + } + if (a->IsTypeConversion() && + DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(type)) { + type = a->InputAt(0)->GetType(); + } + if (b->IsTypeConversion() && + DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(type)) { + type = b->InputAt(0)->GetType(); + } + return type; +} + // // Public methods. // HLoopOptimization::HLoopOptimization(HGraph* graph, - CompilerDriver* compiler_driver, + const CompilerOptions* compiler_options, HInductionVarAnalysis* induction_analysis, OptimizingCompilerStats* stats, const char* name) : HOptimization(graph, name, stats), - compiler_driver_(compiler_driver), + compiler_options_(compiler_options), induction_range_(induction_analysis), loop_allocator_(nullptr), global_allocator_(graph_->GetAllocator()), @@ -411,14 +484,18 @@ HLoopOptimization::HLoopOptimization(HGraph* graph, vector_preheader_(nullptr), vector_header_(nullptr), vector_body_(nullptr), - vector_index_(nullptr) { + vector_index_(nullptr), + arch_loop_helper_(ArchNoOptsLoopHelper::Create(compiler_options_ != nullptr + ? compiler_options_->GetInstructionSet() + : InstructionSet::kNone, + global_allocator_)) { } -void HLoopOptimization::Run() { +bool HLoopOptimization::Run() { // Skip if there is no loop or the graph has try-catch/irreducible loops. // TODO: make this less of a sledgehammer. if (!graph_->HasLoops() || graph_->HasTryCatch() || graph_->HasIrreducibleLoops()) { - return; + return false; } // Phase-local allocator. @@ -426,7 +503,7 @@ void HLoopOptimization::Run() { loop_allocator_ = &allocator; // Perform loop optimizations. - LocalRun(); + bool didLoopOpt = LocalRun(); if (top_loop_ == nullptr) { graph_->SetHasLoops(false); // no more loops } @@ -434,13 +511,16 @@ void HLoopOptimization::Run() { // Detach. loop_allocator_ = nullptr; last_loop_ = top_loop_ = nullptr; + + return didLoopOpt; } // // Loop setup and traversal. // -void HLoopOptimization::LocalRun() { +bool HLoopOptimization::LocalRun() { + bool didLoopOpt = false; // Build the linear order using the phase-local allocator. This step enables building // a loop hierarchy that properly reflects the outer-inner and previous-next relation. ScopedArenaVector<HBasicBlock*> linear_order(loop_allocator_->Adapter(kArenaAllocLinearOrder)); @@ -472,7 +552,7 @@ void HLoopOptimization::LocalRun() { vector_map_ = ↦ vector_permanent_map_ = &perm; // Traverse. - TraverseLoopsInnerToOuter(top_loop_); + didLoopOpt = TraverseLoopsInnerToOuter(top_loop_); // Detach. iset_ = nullptr; reductions_ = nullptr; @@ -480,6 +560,7 @@ void HLoopOptimization::LocalRun() { vector_map_ = nullptr; vector_permanent_map_ = nullptr; } + return didLoopOpt; } void HLoopOptimization::AddLoop(HLoopInformation* loop_info) { @@ -536,6 +617,7 @@ bool HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { // loop if the induction of any inner loop has changed. if (TraverseLoopsInnerToOuter(node->inner)) { induction_range_.ReVisit(node->loop_info); + changed = true; } // Repeat simplifications in the loop-body until no more changes occur. // Note that since each simplification consists of eliminating code (without @@ -622,7 +704,7 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) { } } -bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { +bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); // Ensure loop header logic is finite. @@ -692,6 +774,146 @@ bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { return false; } +bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { + return TryOptimizeInnerLoopFinite(node) || TryPeelingAndUnrolling(node); +} + + + +// +// Scalar loop peeling and unrolling: generic part methods. +// + +bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info, + bool generate_code) { + if (analysis_info->GetNumberOfExits() > 1) { + return false; + } + + uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(analysis_info); + if (unrolling_factor == LoopAnalysisInfo::kNoUnrollingFactor) { + return false; + } + + if (generate_code) { + // TODO: support other unrolling factors. + DCHECK_EQ(unrolling_factor, 2u); + + // Perform unrolling. + HLoopInformation* loop_info = analysis_info->GetLoopInfo(); + PeelUnrollSimpleHelper helper(loop_info, &induction_range_); + helper.DoUnrolling(); + + // Remove the redundant loop check after unrolling. + HIf* copy_hif = + helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf(); + int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0; + copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u); + } + return true; +} + +bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info, + bool generate_code) { + HLoopInformation* loop_info = analysis_info->GetLoopInfo(); + if (!arch_loop_helper_->IsLoopPeelingEnabled()) { + return false; + } + + if (analysis_info->GetNumberOfInvariantExits() == 0) { + return false; + } + + if (generate_code) { + // Perform peeling. + PeelUnrollSimpleHelper helper(loop_info, &induction_range_); + helper.DoPeeling(); + + // Statically evaluate loop check after peeling for loop invariant condition. + const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap(); + for (auto entry : *hir_map) { + HInstruction* copy = entry.second; + if (copy->IsIf()) { + TryToEvaluateIfCondition(copy->AsIf(), graph_); + } + } + } + + return true; +} + +bool HLoopOptimization::TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool generate_code) { + // Fully unroll loops with a known and small trip count. + int64_t trip_count = analysis_info->GetTripCount(); + if (!arch_loop_helper_->IsLoopPeelingEnabled() || + trip_count == LoopAnalysisInfo::kUnknownTripCount || + !arch_loop_helper_->IsFullUnrollingBeneficial(analysis_info)) { + return false; + } + + if (generate_code) { + // Peeling of the N first iterations (where N equals to the trip count) will effectively + // eliminate the loop: after peeling we will have N sequential iterations copied into the loop + // preheader and the original loop. The trip count of this loop will be 0 as the sequential + // iterations are executed first and there are exactly N of them. Thus we can statically + // evaluate the loop exit condition to 'false' and fully eliminate it. + // + // Here is an example of full unrolling of a loop with a trip count 2: + // + // loop_cond_1 + // loop_body_1 <- First iteration. + // | + // \ v + // ==\ loop_cond_2 + // ==/ loop_body_2 <- Second iteration. + // / | + // <- v <- + // loop_cond \ loop_cond \ <- This cond is always false. + // loop_body _/ loop_body _/ + // + HLoopInformation* loop_info = analysis_info->GetLoopInfo(); + PeelByCount(loop_info, trip_count, &induction_range_); + HIf* loop_hif = loop_info->GetHeader()->GetLastInstruction()->AsIf(); + int32_t constant = loop_info->Contains(*loop_hif->IfTrueSuccessor()) ? 0 : 1; + loop_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u); + } + + return true; +} + +bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) { + // Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests) + // as InstructionSet is needed. + if (compiler_options_ == nullptr) { + return false; + } + + HLoopInformation* loop_info = node->loop_info; + int64_t trip_count = LoopAnalysis::GetLoopTripCount(loop_info, &induction_range_); + LoopAnalysisInfo analysis_info(loop_info); + LoopAnalysis::CalculateLoopBasicProperties(loop_info, &analysis_info, trip_count); + + if (analysis_info.HasInstructionsPreventingScalarOpts() || + arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&analysis_info)) { + return false; + } + + if (!TryFullUnrolling(&analysis_info, /*generate_code*/ false) && + !TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) && + !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) { + return false; + } + + // Run 'IsLoopClonable' the last as it might be time-consuming. + if (!PeelUnrollHelper::IsLoopClonable(loop_info)) { + return false; + } + + return TryFullUnrolling(&analysis_info) || + TryPeelingForLoopInvariantExitsElimination(&analysis_info) || + TryUnrollingForBranchPenaltyReduction(&analysis_info); +} + // // Loop vectorization. The implementation is based on the book by Aart J.C. Bik: // "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance." @@ -822,7 +1044,8 @@ void HLoopOptimization::Vectorize(LoopNode* node, HBasicBlock* preheader = node->loop_info->GetPreHeader(); // Pick a loop unrolling factor for the vector loop. - uint32_t unroll = GetUnrollingFactor(block, trip_count); + uint32_t unroll = arch_loop_helper_->GetSIMDUnrollingFactor( + block, trip_count, MaxNumberPeeled(), vector_length_); uint32_t chunk = vector_length_ * unroll; DCHECK(trip_count == 0 || (trip_count >= MaxNumberPeeled() + chunk)); @@ -927,7 +1150,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, vector_index_, ptc, graph_->GetConstant(induc_type, 1), - kNoUnrollingFactor); + LoopAnalysisInfo::kNoUnrollingFactor); } // Generate vector loop, possibly further unrolled: @@ -954,7 +1177,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, vector_index_, stc, graph_->GetConstant(induc_type, 1), - kNoUnrollingFactor); + LoopAnalysisInfo::kNoUnrollingFactor); } // Link reductions to their final uses. @@ -1061,6 +1284,11 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, HInstruction* index = instruction->InputAt(1); HInstruction* value = instruction->InputAt(2); HInstruction* offset = nullptr; + // For narrow types, explicit type conversion may have been + // optimized way, so set the no hi bits restriction here. + if (DataType::Size(type) <= 2) { + restrictions |= kNoHiBits; + } if (TrySetVectorType(type, &restrictions) && node->loop_info->IsDefinedOutOfTheLoop(base) && induction_range_.IsUnitStride(instruction, index, graph_, &offset) && @@ -1083,6 +1311,7 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, DataType::Type type = instruction->GetType(); // Recognize SAD idiom or direct reduction. if (VectorizeSADIdiom(node, instruction, generate_code, type, restrictions) || + VectorizeDotProdIdiom(node, instruction, generate_code, type, restrictions) || (TrySetVectorType(type, &restrictions) && VectorizeUse(node, instruction, generate_code, type, restrictions))) { if (generate_code) { @@ -1275,49 +1504,37 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, return true; } } - } else if (instruction->IsInvokeStaticOrDirect()) { - // Accept particular intrinsics. - HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect(); - switch (invoke->GetIntrinsic()) { - case Intrinsics::kMathAbsInt: - case Intrinsics::kMathAbsLong: - case Intrinsics::kMathAbsFloat: - case Intrinsics::kMathAbsDouble: { - // Deal with vector restrictions. - HInstruction* opa = instruction->InputAt(0); - HInstruction* r = opa; - bool is_unsigned = false; - if (HasVectorRestrictions(restrictions, kNoAbs)) { - return false; - } else if (HasVectorRestrictions(restrictions, kNoHiBits) && - (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) { - return false; // reject, unless operand is sign-extension narrower - } - // Accept ABS(x) for vectorizable operand. - DCHECK(r != nullptr); - if (generate_code && vector_mode_ != kVector) { // de-idiom - r = opa; - } - if (VectorizeUse(node, r, generate_code, type, restrictions)) { - if (generate_code) { - GenerateVecOp(instruction, - vector_map_->Get(r), - nullptr, - HVecOperation::ToProperType(type, is_unsigned)); - } - return true; - } - return false; + } else if (instruction->IsAbs()) { + // Deal with vector restrictions. + HInstruction* opa = instruction->InputAt(0); + HInstruction* r = opa; + bool is_unsigned = false; + if (HasVectorRestrictions(restrictions, kNoAbs)) { + return false; + } else if (HasVectorRestrictions(restrictions, kNoHiBits) && + (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) { + return false; // reject, unless operand is sign-extension narrower + } + // Accept ABS(x) for vectorizable operand. + DCHECK(r != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = opa; + } + if (VectorizeUse(node, r, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecOp(instruction, + vector_map_->Get(r), + nullptr, + HVecOperation::ToProperType(type, is_unsigned)); } - default: - return false; - } // switch + return true; + } } return false; } uint32_t HLoopOptimization::GetVectorSizeInBytes() { - switch (compiler_driver_->GetInstructionSet()) { + switch (compiler_options_->GetInstructionSet()) { case InstructionSet::kArm: case InstructionSet::kThumb2: return 8; // 64-bit SIMD @@ -1327,8 +1544,8 @@ uint32_t HLoopOptimization::GetVectorSizeInBytes() { } bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrictions) { - const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures(); - switch (compiler_driver_->GetInstructionSet()) { + const InstructionSetFeatures* features = compiler_options_->GetInstructionSetFeatures(); + switch (compiler_options_->GetInstructionSet()) { case InstructionSet::kArm: case InstructionSet::kThumb2: // Allow vectorization for all ARM devices, because Android assumes that @@ -1337,11 +1554,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv | kNoReduction | kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction; + *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoDotProd; return TrySetVectorLength(4); case DataType::Type::kInt32: *restrictions |= kNoDiv | kNoWideSAD; @@ -1386,12 +1603,23 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= - kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD; + *restrictions |= kNoMul | + kNoDiv | + kNoShift | + kNoAbs | + kNoSignedHAdd | + kNoUnroundedHAdd | + kNoSAD | + kNoDotProd; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD; + *restrictions |= kNoDiv | + kNoAbs | + kNoSignedHAdd | + kNoUnroundedHAdd | + kNoSAD| + kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv | kNoSAD; @@ -1416,11 +1644,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoDotProd; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt; + *restrictions |= kNoDiv | kNoStringCharAt | kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv; @@ -1445,11 +1673,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoDotProd; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt; + *restrictions |= kNoDiv | kNoStringCharAt | kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv; @@ -1751,57 +1979,11 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, GENERATE_VEC( new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_, dex_pc), new (global_allocator_) HUShr(org_type, opa, opb, dex_pc)); - case HInstruction::kInvokeStaticOrDirect: { - HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect(); - if (vector_mode_ == kVector) { - switch (invoke->GetIntrinsic()) { - case Intrinsics::kMathAbsInt: - case Intrinsics::kMathAbsLong: - case Intrinsics::kMathAbsFloat: - case Intrinsics::kMathAbsDouble: - DCHECK(opb == nullptr); - vector = new (global_allocator_) - HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc); - break; - default: - LOG(FATAL) << "Unsupported SIMD intrinsic " << org->GetId(); - UNREACHABLE(); - } // switch invoke - } else { - // In scalar code, simply clone the method invoke, and replace its operands with the - // corresponding new scalar instructions in the loop. The instruction will get an - // environment while being inserted from the instruction map in original program order. - DCHECK(vector_mode_ == kSequential); - size_t num_args = invoke->GetNumberOfArguments(); - HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect( - global_allocator_, - num_args, - invoke->GetType(), - invoke->GetDexPc(), - invoke->GetDexMethodIndex(), - invoke->GetResolvedMethod(), - invoke->GetDispatchInfo(), - invoke->GetInvokeType(), - invoke->GetTargetMethod(), - invoke->GetClinitCheckRequirement()); - HInputsRef inputs = invoke->GetInputs(); - size_t num_inputs = inputs.size(); - DCHECK_LE(num_args, num_inputs); - DCHECK_EQ(num_inputs, new_invoke->GetInputs().size()); // both invokes agree - for (size_t index = 0; index < num_inputs; ++index) { - HInstruction* new_input = index < num_args - ? vector_map_->Get(inputs[index]) - : inputs[index]; // beyond arguments: just pass through - new_invoke->SetArgumentAt(index, new_input); - } - new_invoke->SetIntrinsic(invoke->GetIntrinsic(), - kNeedsEnvironmentOrCache, - kNoSideEffects, - kNoThrow); - vector = new_invoke; - } - break; - } + case HInstruction::kAbs: + DCHECK(opb == nullptr); + GENERATE_VEC( + new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc), + new (global_allocator_) HAbs(org_type, opa, dex_pc)); default: break; } // switch @@ -1838,8 +2020,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, HInstruction* a = nullptr; HInstruction* b = nullptr; int64_t c = 0; - if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) { - DCHECK(a != nullptr && b != nullptr); + if (IsAddConst2(graph_, instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) { // Accept c == 1 (rounded) or c == 0 (not rounded). bool is_rounded = false; if (c == 1) { @@ -1861,8 +2042,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, } // Accept recognized halving add for vectorizable operands. Vectorized code uses the // shorthand idiomatic operation. Sequential code uses the original scalar expressions. - DCHECK(r != nullptr); - DCHECK(s != nullptr); + DCHECK(r != nullptr && s != nullptr); if (generate_code && vector_mode_ != kVector) { // de-idiom r = instruction->InputAt(0); s = instruction->InputAt(1); @@ -1912,21 +2092,11 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, HInstruction* v = instruction->InputAt(1); HInstruction* a = nullptr; HInstruction* b = nullptr; - if (v->IsInvokeStaticOrDirect() && - (v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsInt || - v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsLong)) { - HInstruction* x = v->InputAt(0); - if (x->GetType() == reduction_type) { - int64_t c = 0; - if (x->IsSub()) { - a = x->InputAt(0); - b = x->InputAt(1); - } else if (IsAddConst(x, /*out*/ &a, /*out*/ &c)) { - b = graph_->GetConstant(reduction_type, -c); // hidden SUB! - } - } - } - if (a == nullptr || b == nullptr) { + if (v->IsAbs() && + v->GetType() == reduction_type && + IsSubConst2(graph_, v->InputAt(0), /*out*/ &a, /*out*/ &b)) { + DCHECK(a != nullptr && b != nullptr); + } else { return false; } // Accept same-type or consistent sign extension for narrower-type on operands a and b. @@ -1935,18 +2105,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, HInstruction* r = a; HInstruction* s = b; bool is_unsigned = false; - DataType::Type sub_type = a->GetType(); - if (DataType::Size(b->GetType()) < DataType::Size(sub_type)) { - sub_type = b->GetType(); - } - if (a->IsTypeConversion() && - DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(sub_type)) { - sub_type = a->InputAt(0)->GetType(); - } - if (b->IsTypeConversion() && - DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(sub_type)) { - sub_type = b->InputAt(0)->GetType(); - } + DataType::Type sub_type = GetNarrowerType(a, b); if (reduction_type != sub_type && (!IsNarrowerOperands(a, b, sub_type, &r, &s, &is_unsigned) || is_unsigned)) { return false; @@ -1959,8 +2118,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, } // Accept SAD idiom for vectorizable operands. Vectorized code uses the shorthand // idiomatic operation. Sequential code uses the original scalar expressions. - DCHECK(r != nullptr); - DCHECK(s != nullptr); + DCHECK(r != nullptr && s != nullptr); if (generate_code && vector_mode_ != kVector) { // de-idiom r = s = v->InputAt(0); } @@ -1968,14 +2126,13 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, VectorizeUse(node, r, generate_code, sub_type, restrictions) && VectorizeUse(node, s, generate_code, sub_type, restrictions)) { if (generate_code) { - reduction_type = HVecOperation::ToProperType(reduction_type, is_unsigned); if (vector_mode_ == kVector) { vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate( global_allocator_, vector_map_->Get(q), vector_map_->Get(r), vector_map_->Get(s), - reduction_type, + HVecOperation::ToProperType(reduction_type, is_unsigned), GetOtherVL(reduction_type, sub_type, vector_length_), kNoDexPc)); MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); @@ -1989,6 +2146,75 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, return false; } +// Method recognises the following dot product idiom: +// q += a * b for operands a, b whose type is narrower than the reduction one. +// Provided that the operands have the same type or are promoted to a wider form. +// Since this may involve a vector length change, the idiom is handled by going directly +// to a dot product node (rather than relying combining finer grained nodes later). +bool HLoopOptimization::VectorizeDotProdIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + DataType::Type reduction_type, + uint64_t restrictions) { + if (!instruction->IsAdd() || (reduction_type != DataType::Type::kInt32)) { + return false; + } + + HInstruction* q = instruction->InputAt(0); + HInstruction* v = instruction->InputAt(1); + if (!v->IsMul() || v->GetType() != reduction_type) { + return false; + } + + HInstruction* a = v->InputAt(0); + HInstruction* b = v->InputAt(1); + HInstruction* r = a; + HInstruction* s = b; + DataType::Type op_type = GetNarrowerType(a, b); + bool is_unsigned = false; + + if (!IsNarrowerOperands(a, b, op_type, &r, &s, &is_unsigned)) { + return false; + } + op_type = HVecOperation::ToProperType(op_type, is_unsigned); + + if (!TrySetVectorType(op_type, &restrictions) || + HasVectorRestrictions(restrictions, kNoDotProd)) { + return false; + } + + DCHECK(r != nullptr && s != nullptr); + // Accept dot product idiom for vectorizable operands. Vectorized code uses the shorthand + // idiomatic operation. Sequential code uses the original scalar expressions. + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = a; + s = b; + } + if (VectorizeUse(node, q, generate_code, op_type, restrictions) && + VectorizeUse(node, r, generate_code, op_type, restrictions) && + VectorizeUse(node, s, generate_code, op_type, restrictions)) { + if (generate_code) { + if (vector_mode_ == kVector) { + vector_map_->Put(instruction, new (global_allocator_) HVecDotProd( + global_allocator_, + vector_map_->Get(q), + vector_map_->Get(r), + vector_map_->Get(s), + reduction_type, + is_unsigned, + GetOtherVL(reduction_type, op_type, vector_length_), + kNoDexPc)); + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); + } else { + GenerateVecOp(v, vector_map_->Get(r), vector_map_->Get(s), reduction_type); + GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type); + } + } + return true; + } + return false; +} + // // Vectorization heuristics. // @@ -2048,41 +2274,6 @@ bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) { return true; } -static constexpr uint32_t ARM64_SIMD_MAXIMUM_UNROLL_FACTOR = 8; -static constexpr uint32_t ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE = 50; - -uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) { - uint32_t max_peel = MaxNumberPeeled(); - switch (compiler_driver_->GetInstructionSet()) { - case InstructionSet::kArm64: { - // Don't unroll with insufficient iterations. - // TODO: Unroll loops with unknown trip count. - DCHECK_NE(vector_length_, 0u); - if (trip_count < (2 * vector_length_ + max_peel)) { - return kNoUnrollingFactor; - } - // Don't unroll for large loop body size. - uint32_t instruction_count = block->GetInstructions().CountSize(); - if (instruction_count >= ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE) { - return kNoUnrollingFactor; - } - // Find a beneficial unroll factor with the following restrictions: - // - At least one iteration of the transformed loop should be executed. - // - The loop body shouldn't be "too big" (heuristic). - uint32_t uf1 = ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE / instruction_count; - uint32_t uf2 = (trip_count - max_peel) / vector_length_; - uint32_t unroll_factor = - TruncToPowerOfTwo(std::min({uf1, uf2, ARM64_SIMD_MAXIMUM_UNROLL_FACTOR})); - DCHECK_GE(unroll_factor, 1u); - return unroll_factor; - } - case InstructionSet::kX86: - case InstructionSet::kX86_64: - default: - return kNoUnrollingFactor; - } -} - // // Helpers. // diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index d70751037b..1a842c4bf3 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -20,12 +20,15 @@ #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "induction_var_range.h" +#include "loop_analysis.h" #include "nodes.h" #include "optimization.h" +#include "superblock_cloner.h" namespace art { -class CompilerDriver; +class CompilerOptions; +class ArchNoOptsLoopHelper; /** * Loop optimizations. Builds a loop hierarchy and applies optimizations to @@ -35,12 +38,12 @@ class CompilerDriver; class HLoopOptimization : public HOptimization { public: HLoopOptimization(HGraph* graph, - CompilerDriver* compiler_driver, + const CompilerOptions* compiler_options, HInductionVarAnalysis* induction_analysis, OptimizingCompilerStats* stats, const char* name = kLoopOptimizationPassName); - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kLoopOptimizationPassName = "loop_optimization"; @@ -79,6 +82,7 @@ class HLoopOptimization : public HOptimization { kNoReduction = 1 << 9, // no reduction kNoSAD = 1 << 10, // no sum of absolute differences (SAD) kNoWideSAD = 1 << 11, // no sum of absolute differences (SAD) with operand widening + kNoDotProd = 1 << 12, // no dot product }; /* @@ -118,7 +122,7 @@ class HLoopOptimization : public HOptimization { // Loop setup and traversal. // - void LocalRun(); + bool LocalRun(); void AddLoop(HLoopInformation* loop_info); void RemoveLoop(LoopNode* node); @@ -133,10 +137,34 @@ class HLoopOptimization : public HOptimization { void SimplifyInduction(LoopNode* node); void SimplifyBlocks(LoopNode* node); - // Performs optimizations specific to inner loop (empty loop removal, + // Performs optimizations specific to inner loop with finite header logic (empty loop removal, // unrolling, vectorization). Returns true if anything changed. + bool TryOptimizeInnerLoopFinite(LoopNode* node); + + // Performs optimizations specific to inner loop. Returns true if anything changed. bool OptimizeInnerLoop(LoopNode* node); + // Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling + // opportunities. Returns whether transformation happened. 'generate_code' determines whether the + // optimization should be actually applied. + bool TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info, + bool generate_code = true); + + // Tries to apply loop peeling for loop invariant exits elimination. Returns whether + // transformation happened. 'generate_code' determines whether the optimization should be + // actually applied. + bool TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info, + bool generate_code = true); + + // Tries to perform whole loop unrolling for a small loop with a small trip count to eliminate + // the loop check overhead and to have more opportunities for inter-iteration optimizations. + // Returns whether transformation happened. 'generate_code' determines whether the optimization + // should be actually applied. + bool TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool generate_code = true); + + // Tries to apply scalar loop peeling and unrolling. + bool TryPeelingAndUnrolling(LoopNode* node); + // // Vectorization analysis and synthesis. // @@ -175,6 +203,11 @@ class HLoopOptimization : public HOptimization { DataType::Type type); // Vectorization idioms. + bool VectorizeSaturationIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + DataType::Type type, + uint64_t restrictions); bool VectorizeHalvingAddIdiom(LoopNode* node, HInstruction* instruction, bool generate_code, @@ -185,6 +218,11 @@ class HLoopOptimization : public HOptimization { bool generate_code, DataType::Type type, uint64_t restrictions); + bool VectorizeDotProdIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + DataType::Type type, + uint64_t restrictions); // Vectorization heuristics. Alignment ComputeAlignment(HInstruction* offset, @@ -195,7 +233,6 @@ class HLoopOptimization : public HOptimization { const ArrayReference* peeling_candidate); uint32_t MaxNumberPeeled(); bool IsVectorizationProfitable(int64_t trip_count); - uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count); // // Helpers. @@ -225,8 +262,8 @@ class HLoopOptimization : public HOptimization { void RemoveDeadInstructions(const HInstructionList& list); bool CanRemoveCycle(); // Whether the current 'iset_' is removable. - // Compiler driver (to query ISA features). - const CompilerDriver* compiler_driver_; + // Compiler options (to query ISA features). + const CompilerOptions* compiler_options_; // Range information based on prior induction variable analysis. InductionVarRange induction_range_; @@ -289,6 +326,9 @@ class HLoopOptimization : public HOptimization { HBasicBlock* vector_body_; // body of the new loop HInstruction* vector_index_; // normalized index of the new loop + // Helper for target-specific behaviour for loop optimizations. + ArchNoOptsLoopHelper* arch_loop_helper_; + friend class LoopOptimizationTest; DISALLOW_COPY_AND_ASSIGN(HLoopOptimization); diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc index db8368986c..310d98b5b0 100644 --- a/compiler/optimizing/loop_optimization_test.cc +++ b/compiler/optimizing/loop_optimization_test.cc @@ -29,7 +29,8 @@ class LoopOptimizationTest : public OptimizingUnitTest { LoopOptimizationTest() : graph_(CreateGraph()), iva_(new (GetAllocator()) HInductionVarAnalysis(graph_)), - loop_opt_(new (GetAllocator()) HLoopOptimization(graph_, nullptr, iva_, nullptr)) { + loop_opt_(new (GetAllocator()) HLoopOptimization( + graph_, /* compiler_options= */ nullptr, iva_, /* stats= */ nullptr)) { BuildGraph(); } @@ -227,11 +228,14 @@ TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) { graph_->ClearDominanceInformation(); graph_->BuildDominatorTree(); + // BuildDominatorTree inserts a block beetween loop header and entry block. + EXPECT_EQ(header->GetPredecessors()[0]->GetSinglePredecessor(), entry_block_); + // Check that after optimizations in BuildDominatorTree()/SimplifyCFG() phi inputs // are still mapped correctly to the block predecessors. for (size_t i = 0, e = phi->InputCount(); i < e; i++) { HInstruction* input = phi->InputAt(i); - ASSERT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i])); + EXPECT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i])); } } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index f6ba19f22a..1940d55a9d 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -20,8 +20,10 @@ #include "art_method-inl.h" #include "base/bit_utils.h" #include "base/bit_vector-inl.h" +#include "base/logging.h" #include "base/stl_util.h" #include "class_linker-inl.h" +#include "class_root.h" #include "code_generator.h" #include "common_dominator.h" #include "intrinsics.h" @@ -40,10 +42,9 @@ static constexpr bool kEnableFloatingPointStaticEvaluation = (FLT_EVAL_METHOD == void HGraph::InitializeInexactObjectRTI(VariableSizedHandleScope* handles) { ScopedObjectAccess soa(Thread::Current()); // Create the inexact Object reference type and store it in the HGraph. - ClassLinker* linker = Runtime::Current()->GetClassLinker(); inexact_object_rti_ = ReferenceTypeInfo::Create( - handles->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject)), - /* is_exact */ false); + handles->NewHandle(GetClassRoot<mirror::Object>()), + /* is_exact= */ false); } void HGraph::AddBlock(HBasicBlock* block) { @@ -59,7 +60,7 @@ void HGraph::FindBackEdges(ArenaBitVector* visited) { ScopedArenaAllocator allocator(GetArenaStack()); // Nodes that we're currently visiting, indexed by block id. ArenaBitVector visiting( - &allocator, blocks_.size(), /* expandable */ false, kArenaAllocGraphBuilder); + &allocator, blocks_.size(), /* expandable= */ false, kArenaAllocGraphBuilder); visiting.ClearAllBits(); // Number of successors visited from a given node, indexed by block id. ScopedArenaVector<size_t> successors_visited(blocks_.size(), @@ -146,7 +147,9 @@ void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visit if (!visited.IsBitSet(i)) { HBasicBlock* block = blocks_[i]; if (block == nullptr) continue; - DCHECK(block->GetPhis().IsEmpty()) << "Phis are not inserted at this stage"; + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + RemoveAsUser(it.Current()); + } for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { RemoveAsUser(it.Current()); } @@ -688,7 +691,7 @@ HCurrentMethod* HGraph::GetCurrentMethod() { } const char* HGraph::GetMethodName() const { - const DexFile::MethodId& method_id = dex_file_.GetMethodId(method_idx_); + const dex::MethodId& method_id = dex_file_.GetMethodId(method_idx_); return dex_file_.GetMethodName(method_id); } @@ -825,7 +828,7 @@ void HLoopInformation::Populate() { ScopedArenaAllocator allocator(graph->GetArenaStack()); ArenaBitVector visited(&allocator, graph->GetBlocks().size(), - /* expandable */ false, + /* expandable= */ false, kArenaAllocGraphBuilder); visited.ClearAllBits(); // Stop marking blocks at the loop header. @@ -1121,6 +1124,23 @@ void HEnvironment::RemoveAsUserOfInput(size_t index) const { user->FixUpUserRecordsAfterEnvUseRemoval(before_env_use_node); } +void HEnvironment::ReplaceInput(HInstruction* replacement, size_t index) { + const HUserRecord<HEnvironment*>& env_use_record = vregs_[index]; + HInstruction* orig_instr = env_use_record.GetInstruction(); + + DCHECK(orig_instr != replacement); + + HUseList<HEnvironment*>::iterator before_use_node = env_use_record.GetBeforeUseNode(); + // Note: fixup_end remains valid across splice_after(). + auto fixup_end = replacement->env_uses_.empty() ? replacement->env_uses_.begin() + : ++replacement->env_uses_.begin(); + replacement->env_uses_.splice_after(replacement->env_uses_.before_begin(), + env_use_record.GetInstruction()->env_uses_, + before_use_node); + replacement->FixUpUserRecordsAfterEnvUseInsertion(fixup_end); + orig_instr->FixUpUserRecordsAfterEnvUseRemoval(before_use_node); +} + HInstruction* HInstruction::GetNextDisregardingMoves() const { HInstruction* next = GetNext(); while (next != nullptr && next->IsParallelMove()) { @@ -1213,7 +1233,7 @@ bool HInstructionList::FoundBefore(const HInstruction* instruction1, } } LOG(FATAL) << "Did not find an order between two instructions of the same block."; - return true; + UNREACHABLE(); } bool HInstruction::StrictlyDominates(HInstruction* other_instruction) const { @@ -1236,7 +1256,7 @@ bool HInstruction::StrictlyDominates(HInstruction* other_instruction) const { } else { // There is no order among phis. LOG(FATAL) << "There is no dominance between phis of a same block."; - return false; + UNREACHABLE(); } } else { // `this` is not a phi. @@ -1284,6 +1304,28 @@ void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* ++it; if (dominator->StrictlyDominates(user)) { user->ReplaceInput(replacement, index); + } else if (user->IsPhi() && !user->AsPhi()->IsCatchPhi()) { + // If the input flows from a block dominated by `dominator`, we can replace it. + // We do not perform this for catch phis as we don't have control flow support + // for their inputs. + const ArenaVector<HBasicBlock*>& predecessors = user->GetBlock()->GetPredecessors(); + HBasicBlock* predecessor = predecessors[index]; + if (dominator->GetBlock()->Dominates(predecessor)) { + user->ReplaceInput(replacement, index); + } + } + } +} + +void HInstruction::ReplaceEnvUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) { + const HUseList<HEnvironment*>& uses = GetEnvUses(); + for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { + HEnvironment* user = it->GetUser(); + size_t index = it->GetIndex(); + // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). + ++it; + if (dominator->StrictlyDominates(user->GetHolder())) { + user->ReplaceInput(replacement, index); } } } @@ -1680,10 +1722,9 @@ bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const { } bool HInstruction::Equals(const HInstruction* other) const { - if (!InstructionTypeEquals(other)) return false; - DCHECK_EQ(GetKind(), other->GetKind()); - if (!InstructionDataEquals(other)) return false; + if (GetKind() != other->GetKind()) return false; if (GetType() != other->GetType()) return false; + if (!InstructionDataEquals(other)) return false; HConstInputsRef inputs = GetInputs(); HConstInputsRef other_inputs = other->GetInputs(); if (inputs.size() != other_inputs.size()) return false; @@ -1698,7 +1739,7 @@ bool HInstruction::Equals(const HInstruction* other) const { std::ostream& operator<<(std::ostream& os, const HInstruction::InstructionKind& rhs) { #define DECLARE_CASE(type, super) case HInstruction::k##type: os << #type; break; switch (rhs) { - FOR_EACH_INSTRUCTION(DECLARE_CASE) + FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_CASE) default: os << "Unknown instruction kind " << static_cast<int>(rhs); break; @@ -1952,6 +1993,11 @@ bool HBasicBlock::EndsWithControlFlowInstruction() const { return !GetInstructions().IsEmpty() && GetLastInstruction()->IsControlFlow(); } +bool HBasicBlock::EndsWithReturn() const { + return !GetInstructions().IsEmpty() && + (GetLastInstruction()->IsReturn() || GetLastInstruction()->IsReturnVoid()); +} + bool HBasicBlock::EndsWithIf() const { return !GetInstructions().IsEmpty() && GetLastInstruction()->IsIf(); } @@ -2483,7 +2529,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { current->SetGraph(outer_graph); outer_graph->AddBlock(current); outer_graph->reverse_post_order_[++index_of_at] = current; - UpdateLoopAndTryInformationOfNewBlock(current, at, /* replace_if_back_edge */ false); + UpdateLoopAndTryInformationOfNewBlock(current, at, /* replace_if_back_edge= */ false); } } @@ -2493,7 +2539,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { outer_graph->reverse_post_order_[++index_of_at] = to; // Only `to` can become a back edge, as the inlined blocks // are predecessors of `to`. - UpdateLoopAndTryInformationOfNewBlock(to, at, /* replace_if_back_edge */ true); + UpdateLoopAndTryInformationOfNewBlock(to, at, /* replace_if_back_edge= */ true); // Update all predecessors of the exit block (now the `to` block) // to not `HReturn` but `HGoto` instead. Special case throwing blocks @@ -2667,13 +2713,13 @@ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { DCHECK((old_pre_header->GetLoopInformation() == nullptr) || !old_pre_header->GetLoopInformation()->IsBackEdge(*old_pre_header)); UpdateLoopAndTryInformationOfNewBlock( - if_block, old_pre_header, /* replace_if_back_edge */ false); + if_block, old_pre_header, /* replace_if_back_edge= */ false); UpdateLoopAndTryInformationOfNewBlock( - true_block, old_pre_header, /* replace_if_back_edge */ false); + true_block, old_pre_header, /* replace_if_back_edge= */ false); UpdateLoopAndTryInformationOfNewBlock( - false_block, old_pre_header, /* replace_if_back_edge */ false); + false_block, old_pre_header, /* replace_if_back_edge= */ false); UpdateLoopAndTryInformationOfNewBlock( - new_pre_header, old_pre_header, /* replace_if_back_edge */ false); + new_pre_header, old_pre_header, /* replace_if_back_edge= */ false); } HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header, @@ -2765,6 +2811,14 @@ void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) { SetPackedFlag<kFlagReferenceTypeIsExact>(rti.IsExact()); } +bool HBoundType::InstructionDataEquals(const HInstruction* other) const { + const HBoundType* other_bt = other->AsBoundType(); + ScopedObjectAccess soa(Thread::Current()); + return GetUpperBound().IsEqual(other_bt->GetUpperBound()) && + GetUpperCanBeNull() == other_bt->GetUpperCanBeNull() && + CanBeNull() == other_bt->CanBeNull(); +} + void HBoundType::SetUpperBound(const ReferenceTypeInfo& upper_bound, bool can_be_null) { if (kIsDebugBuild) { ScopedObjectAccess soa(Thread::Current()); @@ -2850,8 +2904,7 @@ void HInvoke::SetIntrinsic(Intrinsics intrinsic, } bool HNewInstance::IsStringAlloc() const { - ScopedObjectAccess soa(Thread::Current()); - return GetReferenceTypeInfo().IsStringClass(); + return GetEntrypoint() == kQuickAllocStringObject; } bool HInvoke::NeedsEnvironment() const { @@ -2889,10 +2942,12 @@ std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind return os << "Recursive"; case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: return os << "BootImageLinkTimePcRelative"; - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: - return os << "DirectAddress"; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: + return os << "BootImageRelRo"; case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: return os << "BssEntry"; + case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: + return os << "JitDirectAddress"; case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: return os << "RuntimeCall"; default: @@ -2924,8 +2979,8 @@ bool HLoadClass::InstructionDataEquals(const HInstruction* other) const { return false; } switch (GetLoadKind()) { - case LoadKind::kBootImageAddress: - case LoadKind::kBootImageClassTable: + case LoadKind::kBootImageRelRo: + case LoadKind::kJitBootImageAddress: case LoadKind::kJitTableAddress: { ScopedObjectAccess soa(Thread::Current()); return GetClass().Get() == other_load_class->GetClass().Get(); @@ -2942,12 +2997,12 @@ std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) { return os << "ReferrersClass"; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: return os << "BootImageLinkTimePcRelative"; - case HLoadClass::LoadKind::kBootImageAddress: - return os << "BootImageAddress"; - case HLoadClass::LoadKind::kBootImageClassTable: - return os << "BootImageClassTable"; + case HLoadClass::LoadKind::kBootImageRelRo: + return os << "BootImageRelRo"; case HLoadClass::LoadKind::kBssEntry: return os << "BssEntry"; + case HLoadClass::LoadKind::kJitBootImageAddress: + return os << "JitBootImageAddress"; case HLoadClass::LoadKind::kJitTableAddress: return os << "JitTableAddress"; case HLoadClass::LoadKind::kRuntimeCall: @@ -2967,8 +3022,8 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const { return false; } switch (GetLoadKind()) { - case LoadKind::kBootImageAddress: - case LoadKind::kBootImageInternTable: + case LoadKind::kBootImageRelRo: + case LoadKind::kJitBootImageAddress: case LoadKind::kJitTableAddress: { ScopedObjectAccess soa(Thread::Current()); return GetString().Get() == other_load_string->GetString().Get(); @@ -2982,12 +3037,12 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) { switch (rhs) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: return os << "BootImageLinkTimePcRelative"; - case HLoadString::LoadKind::kBootImageAddress: - return os << "BootImageAddress"; - case HLoadString::LoadKind::kBootImageInternTable: - return os << "BootImageInternTable"; + case HLoadString::LoadKind::kBootImageRelRo: + return os << "BootImageRelRo"; case HLoadString::LoadKind::kBssEntry: return os << "BssEntry"; + case HLoadString::LoadKind::kJitBootImageAddress: + return os << "JitBootImageAddress"; case HLoadString::LoadKind::kJitTableAddress: return os << "JitTableAddress"; case HLoadString::LoadKind::kRuntimeCall: @@ -3101,6 +3156,8 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) { return os << "array_object_check"; case TypeCheckKind::kArrayCheck: return os << "array_check"; + case TypeCheckKind::kBitstringCheck: + return os << "bitstring_check"; default: LOG(FATAL) << "Unknown TypeCheckKind: " << static_cast<int>(rhs); UNREACHABLE(); @@ -3126,4 +3183,77 @@ std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind) { } } +// Check that intrinsic enum values fit within space set aside in ArtMethod modifier flags. +#define CHECK_INTRINSICS_ENUM_VALUES(Name, InvokeType, _, SideEffects, Exceptions, ...) \ + static_assert( \ + static_cast<uint32_t>(Intrinsics::k ## Name) <= (kAccIntrinsicBits >> CTZ(kAccIntrinsicBits)), \ + "Instrinsics enumeration space overflow."); +#include "intrinsics_list.h" + INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES) +#undef INTRINSICS_LIST +#undef CHECK_INTRINSICS_ENUM_VALUES + +// Function that returns whether an intrinsic needs an environment or not. +static inline IntrinsicNeedsEnvironmentOrCache NeedsEnvironmentOrCacheIntrinsic(Intrinsics i) { + switch (i) { + case Intrinsics::kNone: + return kNeedsEnvironmentOrCache; // Non-sensical for intrinsic. +#define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnvOrCache, SideEffects, Exceptions, ...) \ + case Intrinsics::k ## Name: \ + return NeedsEnvOrCache; +#include "intrinsics_list.h" + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + } + return kNeedsEnvironmentOrCache; +} + +// Function that returns whether an intrinsic has side effects. +static inline IntrinsicSideEffects GetSideEffectsIntrinsic(Intrinsics i) { + switch (i) { + case Intrinsics::kNone: + return kAllSideEffects; +#define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnvOrCache, SideEffects, Exceptions, ...) \ + case Intrinsics::k ## Name: \ + return SideEffects; +#include "intrinsics_list.h" + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + } + return kAllSideEffects; +} + +// Function that returns whether an intrinsic can throw exceptions. +static inline IntrinsicExceptions GetExceptionsIntrinsic(Intrinsics i) { + switch (i) { + case Intrinsics::kNone: + return kCanThrow; +#define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnvOrCache, SideEffects, Exceptions, ...) \ + case Intrinsics::k ## Name: \ + return Exceptions; +#include "intrinsics_list.h" + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + } + return kCanThrow; +} + +void HInvoke::SetResolvedMethod(ArtMethod* method) { + // TODO: b/65872996 The intent is that polymorphic signature methods should + // be compiler intrinsics. At present, they are only interpreter intrinsics. + if (method != nullptr && + method->IsIntrinsic() && + !method->IsPolymorphicSignature()) { + Intrinsics intrinsic = static_cast<Intrinsics>(method->GetIntrinsic()); + SetIntrinsic(intrinsic, + NeedsEnvironmentOrCacheIntrinsic(intrinsic), + GetSideEffectsIntrinsic(intrinsic), + GetExceptionsIntrinsic(intrinsic)); + } + resolved_method_ = method; +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index fe992a7f39..fedad0c69a 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -26,9 +26,11 @@ #include "base/arena_object.h" #include "base/array_ref.h" #include "base/iteration_range.h" +#include "base/mutex.h" #include "base/quasi_atomic.h" #include "base/stl_util.h" #include "base/transform_array_ref.h" +#include "art_method.h" #include "data_type.h" #include "deoptimization_kind.h" #include "dex/dex_file.h" @@ -41,6 +43,7 @@ #include "intrinsics_enum.h" #include "locations.h" #include "mirror/class.h" +#include "mirror/method_type.h" #include "offsets.h" #include "utils/intrusive_forward_list.h" @@ -127,6 +130,7 @@ enum GraphAnalysisResult { kAnalysisInvalidBytecode, kAnalysisFailThrowCatchLoop, kAnalysisFailAmbiguousArrayOp, + kAnalysisFailIrreducibleLoopAndStringInit, kAnalysisSuccess, }; @@ -313,6 +317,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { uint32_t method_idx, InstructionSet instruction_set, InvokeType invoke_type = kInvalidInvokeType, + bool dead_reference_safe = false, bool debuggable = false, bool osr = false, int start_instruction_id = 0) @@ -332,6 +337,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { has_simd_(false), has_loops_(false), has_irreducible_loops_(false), + dead_reference_safe_(dead_reference_safe), debuggable_(debuggable), current_instruction_id_(start_instruction_id), dex_file_(dex_file), @@ -522,6 +528,12 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { has_bounds_checks_ = value; } + // Is the code known to be robust against eliminating dead references + // and the effects of early finalization? + bool IsDeadReferenceSafe() const { return dead_reference_safe_; } + + void MarkDeadReferenceUnsafe() { dead_reference_safe_ = false; } + bool IsDebuggable() const { return debuggable_; } // Returns a constant of the given type and value. If it does not exist @@ -700,6 +712,14 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // so there might be false positives. bool has_irreducible_loops_; + // Is the code known to be robust against eliminating dead references + // and the effects of early finalization? If false, dead reference variables + // are kept if they might be visible to the garbage collector. + // Currently this means that the class was declared to be dead-reference-safe, + // the method accesses no reachability-sensitive fields or data, and the same + // is true for any methods that were inlined into the current one. + bool dead_reference_safe_; + // Indicates whether the graph should be compiled in a way that // ensures full debuggability. If false, we can apply more // aggressive optimizations that may limit the level of debugging. @@ -891,7 +911,7 @@ class TryCatchInformation : public ArenaObject<kArenaAllocTryCatchInfo> { explicit TryCatchInformation(const HTryBoundary& try_entry) : try_entry_(&try_entry), catch_dex_file_(nullptr), - catch_type_index_(DexFile::kDexNoIndex16) { + catch_type_index_(dex::TypeIndex::Invalid()) { DCHECK(try_entry_ != nullptr); } @@ -910,9 +930,9 @@ class TryCatchInformation : public ArenaObject<kArenaAllocTryCatchInfo> { bool IsCatchBlock() const { return catch_dex_file_ != nullptr; } - bool IsCatchAllTypeIndex() const { + bool IsValidTypeIndex() const { DCHECK(IsCatchBlock()); - return !catch_type_index_.IsValid(); + return catch_type_index_.IsValid(); } dex::TypeIndex GetCatchTypeIndex() const { @@ -925,6 +945,10 @@ class TryCatchInformation : public ArenaObject<kArenaAllocTryCatchInfo> { return *catch_dex_file_; } + void SetInvalidTypeIndex() { + catch_type_index_ = dex::TypeIndex::Invalid(); + } + private: // One of possibly several TryBoundary instructions entering the block's try. // Only set for try blocks. @@ -932,7 +956,7 @@ class TryCatchInformation : public ArenaObject<kArenaAllocTryCatchInfo> { // Exception type information. Only set for catch blocks. const DexFile* catch_dex_file_; - const dex::TypeIndex catch_type_index_; + dex::TypeIndex catch_type_index_; }; static constexpr size_t kNoLifetime = -1; @@ -1284,6 +1308,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { void SetLifetimeEnd(size_t end) { lifetime_end_ = end; } bool EndsWithControlFlowInstruction() const; + bool EndsWithReturn() const; bool EndsWithIf() const; bool EndsWithTryBoundary() const; bool HasSinglePhi() const; @@ -1338,6 +1363,7 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ M(Above, Condition) \ M(AboveOrEqual, Condition) \ + M(Abs, UnaryOperation) \ M(Add, BinaryOperation) \ M(And, BinaryOperation) \ M(ArrayGet, Instruction) \ @@ -1377,13 +1403,18 @@ class HLoopInformationOutwardIterator : public ValueObject { M(InvokeStaticOrDirect, Invoke) \ M(InvokeVirtual, Invoke) \ M(InvokePolymorphic, Invoke) \ + M(InvokeCustom, Invoke) \ M(LessThan, Condition) \ M(LessThanOrEqual, Condition) \ M(LoadClass, Instruction) \ M(LoadException, Instruction) \ + M(LoadMethodHandle, Instruction) \ + M(LoadMethodType, Instruction) \ M(LoadString, Instruction) \ M(LongConstant, Constant) \ + M(Max, Instruction) \ M(MemoryBarrier, Instruction) \ + M(Min, BinaryOperation) \ M(MonitorOperation, Instruction) \ M(Mul, BinaryOperation) \ M(NativeDebugInfo, Instruction) \ @@ -1437,12 +1468,15 @@ class HLoopInformationOutwardIterator : public ValueObject { M(VecAndNot, VecBinaryOperation) \ M(VecOr, VecBinaryOperation) \ M(VecXor, VecBinaryOperation) \ + M(VecSaturationAdd, VecBinaryOperation) \ + M(VecSaturationSub, VecBinaryOperation) \ M(VecShl, VecBinaryOperation) \ M(VecShr, VecBinaryOperation) \ M(VecUShr, VecBinaryOperation) \ M(VecSetScalars, VecOperation) \ M(VecMultiplyAccumulate, VecOperation) \ M(VecSADAccumulate, VecOperation) \ + M(VecDotProd, VecOperation) \ M(VecLoad, VecMemoryOperation) \ M(VecStore, VecMemoryOperation) \ @@ -1484,6 +1518,14 @@ class HLoopInformationOutwardIterator : public ValueObject { M(X86PackedSwitch, Instruction) #endif +#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) +#define FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(M) \ + M(X86AndNot, Instruction) \ + M(X86MaskOrResetLeastSetBit, Instruction) +#else +#define FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(M) +#endif + #define FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) #define FOR_EACH_CONCRETE_INSTRUCTION(M) \ @@ -1494,7 +1536,8 @@ class HLoopInformationOutwardIterator : public ValueObject { FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \ FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M) \ FOR_EACH_CONCRETE_INSTRUCTION_X86(M) \ - FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) + FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(M) #define FOR_EACH_ABSTRACT_INSTRUCTION(M) \ M(Condition, BinaryOperation) \ @@ -1519,23 +1562,17 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION) private: \ H##type& operator=(const H##type&) = delete; \ public: \ - const char* DebugName() const OVERRIDE { return #type; } \ - bool InstructionTypeEquals(const HInstruction* other) const OVERRIDE { \ - return other->Is##type(); \ - } \ - HInstruction* Clone(ArenaAllocator* arena) const OVERRIDE { \ + const char* DebugName() const override { return #type; } \ + HInstruction* Clone(ArenaAllocator* arena) const override { \ DCHECK(IsClonable()); \ return new (arena) H##type(*this->As##type()); \ } \ - void Accept(HGraphVisitor* visitor) OVERRIDE + void Accept(HGraphVisitor* visitor) override #define DECLARE_ABSTRACT_INSTRUCTION(type) \ private: \ H##type& operator=(const H##type&) = delete; \ - public: \ - bool Is##type() const { return As##type() != nullptr; } \ - const H##type* As##type() const { return this; } \ - H##type* As##type() { return this; } + public: #define DEFAULT_COPY_CONSTRUCTOR(type) \ explicit H##type(const H##type& other) = default; @@ -1622,6 +1659,21 @@ using HConstInputsRef = TransformArrayRef<const HUserRecord<HInstruction*>, HInp * the same, and any reference read depends on any reference read without * further regard of its type). * + * kDependsOnGCBit is defined in the following way: instructions with kDependsOnGCBit must not be + * alive across the point where garbage collection might happen. + * + * Note: Instructions with kCanTriggerGCBit do not depend on each other. + * + * kCanTriggerGCBit must be used for instructions for which GC might happen on the path across + * those instructions from the compiler perspective (between this instruction and the next one + * in the IR). + * + * Note: Instructions which can cause GC only on a fatal slow path do not need + * kCanTriggerGCBit as the execution never returns to the instruction next to the exceptional + * one. However the execution may return to compiled code if there is a catch block in the + * current method; for this purpose the TryBoundary exit instruction has kCanTriggerGCBit + * set. + * * The internal representation uses 38-bit and is described in the table below. * The first line indicates the side effect, and for field/array accesses the * second line indicates the type of the access (in the order of the @@ -1694,10 +1746,17 @@ class SideEffects : public ValueObject { return SideEffects(TypeFlag(type, kArrayReadOffset)); } + // Returns whether GC might happen across this instruction from the compiler perspective so + // the next instruction in the IR would see that. + // + // See the SideEffect class comments. static SideEffects CanTriggerGC() { return SideEffects(1ULL << kCanTriggerGCBit); } + // Returns whether the instruction must not be alive across a GC point. + // + // See the SideEffect class comments. static SideEffects DependsOnGC() { return SideEffects(1ULL << kDependsOnGCBit); } @@ -1906,6 +1965,11 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { void RemoveAsUserOfInput(size_t index) const; + // Replaces the input at the position 'index' with the replacement; the replacement and old + // input instructions' env_uses_ lists are adjusted. The function works similar to + // HInstruction::ReplaceInput. + void ReplaceInput(HInstruction* replacement, size_t index); + size_t Size() const { return vregs_.size(); } HEnvironment* GetParent() const { return parent_; } @@ -1954,12 +2018,15 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { public: #define DECLARE_KIND(type, super) k##type, enum InstructionKind { - FOR_EACH_INSTRUCTION(DECLARE_KIND) + FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_KIND) kLastInstructionKind }; #undef DECLARE_KIND HInstruction(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc) + : HInstruction(kind, DataType::Type::kVoid, side_effects, dex_pc) {} + + HInstruction(InstructionKind kind, DataType::Type type, SideEffects side_effects, uint32_t dex_pc) : previous_(nullptr), next_(nullptr), block_(nullptr), @@ -1974,6 +2041,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { side_effects_(side_effects), reference_type_handle_(ReferenceTypeInfo::CreateInvalid().GetTypeHandle()) { SetPackedField<InstructionKindField>(kind); + SetPackedField<TypeField>(type); SetPackedFlag<kFlagReferenceTypeIsExact>(ReferenceTypeInfo::CreateInvalid().IsExact()); } @@ -2031,7 +2099,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { virtual void Accept(HGraphVisitor* visitor) = 0; virtual const char* DebugName() const = 0; - virtual DataType::Type GetType() const { return DataType::Type::kVoid; } + DataType::Type GetType() const { + return TypeField::Decode(GetPackedFields()); + } virtual bool NeedsEnvironment() const { return false; } @@ -2064,6 +2134,19 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { return false; } + // If this instruction will do an implicit null check, return the `HNullCheck` associated + // with it. Otherwise return null. + HNullCheck* GetImplicitNullCheck() const { + // Find the first previous instruction which is not a move. + HInstruction* first_prev_not_move = GetPreviousDisregardingMoves(); + if (first_prev_not_move != nullptr && + first_prev_not_move->IsNullCheck() && + first_prev_not_move->IsEmittedAtUseSite()) { + return first_prev_not_move->AsNullCheck(); + } + return nullptr; + } + virtual bool IsActualObject() const { return GetType() == DataType::Type::kReference; } @@ -2202,6 +2285,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { void ReplaceWith(HInstruction* instruction); void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement); + void ReplaceEnvUsesDominatedBy(HInstruction* dominator, HInstruction* replacement); void ReplaceInput(HInstruction* replacement, size_t index); // This is almost the same as doing `ReplaceWith()`. But in this helper, the @@ -2223,19 +2307,17 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { void MoveBeforeFirstUserAndOutOfLoops(); #define INSTRUCTION_TYPE_CHECK(type, super) \ - bool Is##type() const; \ - const H##type* As##type() const; \ - H##type* As##type(); + bool Is##type() const; - FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK) + FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CHECK) #undef INSTRUCTION_TYPE_CHECK -#define INSTRUCTION_TYPE_CHECK(type, super) \ - bool Is##type() const { return (As##type() != nullptr); } \ - virtual const H##type* As##type() const { return nullptr; } \ - virtual H##type* As##type() { return nullptr; } - FOR_EACH_ABSTRACT_INSTRUCTION(INSTRUCTION_TYPE_CHECK) -#undef INSTRUCTION_TYPE_CHECK +#define INSTRUCTION_TYPE_CAST(type, super) \ + const H##type* As##type() const; \ + H##type* As##type(); + + FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CAST) +#undef INSTRUCTION_TYPE_CAST // Return a clone of the instruction if it is clonable (shallow copy by default, custom copy // if a custom copy-constructor is provided for a particular type). If IsClonable() is false for @@ -2261,11 +2343,6 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // meanings? split and rename? virtual bool CanBeMoved() const { return false; } - // Returns whether the two instructions are of the same kind. - virtual bool InstructionTypeEquals(const HInstruction* other ATTRIBUTE_UNUSED) const { - return false; - } - // Returns whether any data encoded in the two instructions is equal. // This method does not look at the inputs. Both instructions must be // of the same type, otherwise the method has undefined behavior. @@ -2278,10 +2355,6 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // 2) Their inputs are identical. bool Equals(const HInstruction* other) const; - // TODO: Remove this indirection when the [[pure]] attribute proposal (n3744) - // is adopted and implemented by our C++ compiler(s). Fow now, we need to hide - // the virtual function because the __attribute__((__pure__)) doesn't really - // apply the strong requirement for virtual functions, preventing optimizations. InstructionKind GetKind() const { return GetPackedField<InstructionKindField>(); } virtual size_t ComputeHashCode() const { @@ -2337,13 +2410,18 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { static constexpr size_t kFieldInstructionKind = kFlagReferenceTypeIsExact + 1; static constexpr size_t kFieldInstructionKindSize = MinimumBitsToStore(static_cast<size_t>(InstructionKind::kLastInstructionKind - 1)); - static constexpr size_t kNumberOfGenericPackedBits = + static constexpr size_t kFieldType = kFieldInstructionKind + kFieldInstructionKindSize; + static constexpr size_t kFieldTypeSize = + MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); + static constexpr size_t kNumberOfGenericPackedBits = kFieldType + kFieldTypeSize; static constexpr size_t kMaxNumberOfPackedBits = sizeof(uint32_t) * kBitsPerByte; static_assert(kNumberOfGenericPackedBits <= kMaxNumberOfPackedBits, "Too many generic packed fields"); + using TypeField = BitField<DataType::Type, kFieldType, kFieldTypeSize>; + const HUserRecord<HInstruction*> InputRecordAt(size_t i) const { return GetInputRecords()[i]; } @@ -2568,7 +2646,7 @@ class HBackwardInstructionIterator : public ValueObject { class HVariableInputSizeInstruction : public HInstruction { public: using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE { + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() override { return ArrayRef<HUserRecord<HInstruction*>>(inputs_); } @@ -2590,6 +2668,15 @@ class HVariableInputSizeInstruction : public HInstruction { ArenaAllocKind kind) : HInstruction(inst_kind, side_effects, dex_pc), inputs_(number_of_inputs, allocator->Adapter(kind)) {} + HVariableInputSizeInstruction(InstructionKind inst_kind, + DataType::Type type, + SideEffects side_effects, + uint32_t dex_pc, + ArenaAllocator* allocator, + size_t number_of_inputs, + ArenaAllocKind kind) + : HInstruction(inst_kind, type, side_effects, dex_pc), + inputs_(number_of_inputs, allocator->Adapter(kind)) {} DEFAULT_COPY_CONSTRUCTOR(VariableInputSizeInstruction); @@ -2597,19 +2684,24 @@ class HVariableInputSizeInstruction : public HInstruction { }; template<size_t N> -class HTemplateInstruction: public HInstruction { +class HExpression : public HInstruction { public: - HTemplateInstruction<N>(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc) + HExpression<N>(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc) : HInstruction(kind, side_effects, dex_pc), inputs_() {} - virtual ~HTemplateInstruction() {} + HExpression<N>(InstructionKind kind, + DataType::Type type, + SideEffects side_effects, + uint32_t dex_pc) + : HInstruction(kind, type, side_effects, dex_pc), inputs_() {} + virtual ~HExpression() {} using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL { + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final { return ArrayRef<HUserRecord<HInstruction*>>(inputs_); } protected: - DEFAULT_COPY_CONSTRUCTOR(TemplateInstruction<N>); + DEFAULT_COPY_CONSTRUCTOR(Expression<N>); private: std::array<HUserRecord<HInstruction*>, N> inputs_; @@ -2617,64 +2709,35 @@ class HTemplateInstruction: public HInstruction { friend class SsaBuilder; }; -// HTemplateInstruction specialization for N=0. +// HExpression specialization for N=0. template<> -class HTemplateInstruction<0>: public HInstruction { +class HExpression<0> : public HInstruction { public: - explicit HTemplateInstruction<0>(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc) - : HInstruction(kind, side_effects, dex_pc) {} + using HInstruction::HInstruction; - virtual ~HTemplateInstruction() {} + virtual ~HExpression() {} using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL { + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final { return ArrayRef<HUserRecord<HInstruction*>>(); } protected: - DEFAULT_COPY_CONSTRUCTOR(TemplateInstruction<0>); + DEFAULT_COPY_CONSTRUCTOR(Expression<0>); private: friend class SsaBuilder; }; -template<intptr_t N> -class HExpression : public HTemplateInstruction<N> { - public: - using HInstruction::InstructionKind; - HExpression<N>(InstructionKind kind, - DataType::Type type, - SideEffects side_effects, - uint32_t dex_pc) - : HTemplateInstruction<N>(kind, side_effects, dex_pc) { - this->template SetPackedField<TypeField>(type); - } - virtual ~HExpression() {} - - DataType::Type GetType() const OVERRIDE { - return TypeField::Decode(this->GetPackedFields()); - } - - protected: - static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits; - static constexpr size_t kFieldTypeSize = - MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); - static constexpr size_t kNumberOfExpressionPackedBits = kFieldType + kFieldTypeSize; - static_assert(kNumberOfExpressionPackedBits <= HInstruction::kMaxNumberOfPackedBits, - "Too many packed fields."); - using TypeField = BitField<DataType::Type, kFieldType, kFieldTypeSize>; - DEFAULT_COPY_CONSTRUCTOR(Expression<N>); -}; - // Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow // instruction that branches to the exit block. -class HReturnVoid FINAL : public HTemplateInstruction<0> { +class HReturnVoid final : public HExpression<0> { public: explicit HReturnVoid(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kReturnVoid, SideEffects::None(), dex_pc) { + : HExpression(kReturnVoid, SideEffects::None(), dex_pc) { } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsControlFlow() const override { return true; } DECLARE_INSTRUCTION(ReturnVoid); @@ -2684,14 +2747,14 @@ class HReturnVoid FINAL : public HTemplateInstruction<0> { // Represents dex's RETURN opcodes. A HReturn is a control flow // instruction that branches to the exit block. -class HReturn FINAL : public HTemplateInstruction<1> { +class HReturn final : public HExpression<1> { public: explicit HReturn(HInstruction* value, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kReturn, SideEffects::None(), dex_pc) { + : HExpression(kReturn, SideEffects::None(), dex_pc) { SetRawInputAt(0, value); } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsControlFlow() const override { return true; } DECLARE_INSTRUCTION(Return); @@ -2699,7 +2762,7 @@ class HReturn FINAL : public HTemplateInstruction<1> { DEFAULT_COPY_CONSTRUCTOR(Return); }; -class HPhi FINAL : public HVariableInputSizeInstruction { +class HPhi final : public HVariableInputSizeInstruction { public: HPhi(ArenaAllocator* allocator, uint32_t reg_number, @@ -2708,13 +2771,13 @@ class HPhi FINAL : public HVariableInputSizeInstruction { uint32_t dex_pc = kNoDexPc) : HVariableInputSizeInstruction( kPhi, + ToPhiType(type), SideEffects::None(), dex_pc, allocator, number_of_inputs, kArenaAllocPhiInputs), reg_number_(reg_number) { - SetPackedField<TypeField>(ToPhiType(type)); DCHECK_NE(GetType(), DataType::Type::kVoid); // Phis are constructed live and marked dead if conflicting or unused. // Individual steps of SsaBuilder should assume that if a phi has been @@ -2723,7 +2786,7 @@ class HPhi FINAL : public HVariableInputSizeInstruction { SetPackedFlag<kFlagCanBeNull>(true); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } // Returns a type equivalent to the given `type`, but that a `HPhi` can hold. static DataType::Type ToPhiType(DataType::Type type) { @@ -2732,7 +2795,6 @@ class HPhi FINAL : public HVariableInputSizeInstruction { bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); } - DataType::Type GetType() const OVERRIDE { return GetPackedField<TypeField>(); } void SetType(DataType::Type new_type) { // Make sure that only valid type changes occur. The following are allowed: // (1) int -> float/ref (primitive type propagation), @@ -2744,7 +2806,7 @@ class HPhi FINAL : public HVariableInputSizeInstruction { SetPackedField<TypeField>(new_type); } - bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); } + bool CanBeNull() const override { return GetPackedFlag<kFlagCanBeNull>(); } void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); } uint32_t GetRegNumber() const { return reg_number_; } @@ -2791,14 +2853,10 @@ class HPhi FINAL : public HVariableInputSizeInstruction { DEFAULT_COPY_CONSTRUCTOR(Phi); private: - static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits; - static constexpr size_t kFieldTypeSize = - MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); - static constexpr size_t kFlagIsLive = kFieldType + kFieldTypeSize; + static constexpr size_t kFlagIsLive = HInstruction::kNumberOfGenericPackedBits; static constexpr size_t kFlagCanBeNull = kFlagIsLive + 1; static constexpr size_t kNumberOfPhiPackedBits = kFlagCanBeNull + 1; static_assert(kNumberOfPhiPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using TypeField = BitField<DataType::Type, kFieldType, kFieldTypeSize>; const uint32_t reg_number_; }; @@ -2806,13 +2864,13 @@ class HPhi FINAL : public HVariableInputSizeInstruction { // The exit instruction is the only instruction of the exit block. // Instructions aborting the method (HThrow and HReturn) must branch to the // exit block. -class HExit FINAL : public HTemplateInstruction<0> { +class HExit final : public HExpression<0> { public: explicit HExit(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kExit, SideEffects::None(), dex_pc) { + : HExpression(kExit, SideEffects::None(), dex_pc) { } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsControlFlow() const override { return true; } DECLARE_INSTRUCTION(Exit); @@ -2821,14 +2879,14 @@ class HExit FINAL : public HTemplateInstruction<0> { }; // Jumps from one block to another. -class HGoto FINAL : public HTemplateInstruction<0> { +class HGoto final : public HExpression<0> { public: explicit HGoto(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kGoto, SideEffects::None(), dex_pc) { + : HExpression(kGoto, SideEffects::None(), dex_pc) { } - bool IsClonable() const OVERRIDE { return true; } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } + bool IsControlFlow() const override { return true; } HBasicBlock* GetSuccessor() const { return GetBlock()->GetSingleSuccessor(); @@ -2846,7 +2904,7 @@ class HConstant : public HExpression<0> { : HExpression(kind, type, SideEffects::None(), dex_pc) { } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } // Is this constant -1 in the arithmetic sense? virtual bool IsMinusOne() const { return false; } @@ -2865,18 +2923,18 @@ class HConstant : public HExpression<0> { DEFAULT_COPY_CONSTRUCTOR(Constant); }; -class HNullConstant FINAL : public HConstant { +class HNullConstant final : public HConstant { public: - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - uint64_t GetValueAsUint64() const OVERRIDE { return 0; } + uint64_t GetValueAsUint64() const override { return 0; } - size_t ComputeHashCode() const OVERRIDE { return 0; } + size_t ComputeHashCode() const override { return 0; } // The null constant representation is a 0-bit pattern. - virtual bool IsZeroBitPattern() const { return true; } + bool IsZeroBitPattern() const override { return true; } DECLARE_INSTRUCTION(NullConstant); @@ -2893,25 +2951,25 @@ class HNullConstant FINAL : public HConstant { // Constants of the type int. Those can be from Dex instructions, or // synthesized (for example with the if-eqz instruction). -class HIntConstant FINAL : public HConstant { +class HIntConstant final : public HConstant { public: int32_t GetValue() const { return value_; } - uint64_t GetValueAsUint64() const OVERRIDE { + uint64_t GetValueAsUint64() const override { return static_cast<uint64_t>(static_cast<uint32_t>(value_)); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsIntConstant()) << other->DebugName(); return other->AsIntConstant()->value_ == value_; } - size_t ComputeHashCode() const OVERRIDE { return GetValue(); } + size_t ComputeHashCode() const override { return GetValue(); } - bool IsMinusOne() const OVERRIDE { return GetValue() == -1; } - bool IsArithmeticZero() const OVERRIDE { return GetValue() == 0; } - bool IsZeroBitPattern() const OVERRIDE { return GetValue() == 0; } - bool IsOne() const OVERRIDE { return GetValue() == 1; } + bool IsMinusOne() const override { return GetValue() == -1; } + bool IsArithmeticZero() const override { return GetValue() == 0; } + bool IsZeroBitPattern() const override { return GetValue() == 0; } + bool IsOne() const override { return GetValue() == 1; } // Integer constants are used to encode Boolean values as well, // where 1 means true and 0 means false. @@ -2939,23 +2997,23 @@ class HIntConstant FINAL : public HConstant { ART_FRIEND_TYPED_TEST(ParallelMoveTest, ConstantLast); }; -class HLongConstant FINAL : public HConstant { +class HLongConstant final : public HConstant { public: int64_t GetValue() const { return value_; } - uint64_t GetValueAsUint64() const OVERRIDE { return value_; } + uint64_t GetValueAsUint64() const override { return value_; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsLongConstant()) << other->DebugName(); return other->AsLongConstant()->value_ == value_; } - size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } + size_t ComputeHashCode() const override { return static_cast<size_t>(GetValue()); } - bool IsMinusOne() const OVERRIDE { return GetValue() == -1; } - bool IsArithmeticZero() const OVERRIDE { return GetValue() == 0; } - bool IsZeroBitPattern() const OVERRIDE { return GetValue() == 0; } - bool IsOne() const OVERRIDE { return GetValue() == 1; } + bool IsMinusOne() const override { return GetValue() == -1; } + bool IsArithmeticZero() const override { return GetValue() == 0; } + bool IsZeroBitPattern() const override { return GetValue() == 0; } + bool IsOne() const override { return GetValue() == 1; } DECLARE_INSTRUCTION(LongConstant); @@ -2973,25 +3031,25 @@ class HLongConstant FINAL : public HConstant { friend class HGraph; }; -class HFloatConstant FINAL : public HConstant { +class HFloatConstant final : public HConstant { public: float GetValue() const { return value_; } - uint64_t GetValueAsUint64() const OVERRIDE { + uint64_t GetValueAsUint64() const override { return static_cast<uint64_t>(bit_cast<uint32_t, float>(value_)); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsFloatConstant()) << other->DebugName(); return other->AsFloatConstant()->GetValueAsUint64() == GetValueAsUint64(); } - size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } + size_t ComputeHashCode() const override { return static_cast<size_t>(GetValue()); } - bool IsMinusOne() const OVERRIDE { + bool IsMinusOne() const override { return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>((-1.0f)); } - bool IsArithmeticZero() const OVERRIDE { + bool IsArithmeticZero() const override { return std::fpclassify(value_) == FP_ZERO; } bool IsArithmeticPositiveZero() const { @@ -3000,10 +3058,10 @@ class HFloatConstant FINAL : public HConstant { bool IsArithmeticNegativeZero() const { return IsArithmeticZero() && std::signbit(value_); } - bool IsZeroBitPattern() const OVERRIDE { + bool IsZeroBitPattern() const override { return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(0.0f); } - bool IsOne() const OVERRIDE { + bool IsOne() const override { return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(1.0f); } bool IsNaN() const { @@ -3032,23 +3090,23 @@ class HFloatConstant FINAL : public HConstant { friend class HGraph; }; -class HDoubleConstant FINAL : public HConstant { +class HDoubleConstant final : public HConstant { public: double GetValue() const { return value_; } - uint64_t GetValueAsUint64() const OVERRIDE { return bit_cast<uint64_t, double>(value_); } + uint64_t GetValueAsUint64() const override { return bit_cast<uint64_t, double>(value_); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsDoubleConstant()) << other->DebugName(); return other->AsDoubleConstant()->GetValueAsUint64() == GetValueAsUint64(); } - size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } + size_t ComputeHashCode() const override { return static_cast<size_t>(GetValue()); } - bool IsMinusOne() const OVERRIDE { + bool IsMinusOne() const override { return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((-1.0)); } - bool IsArithmeticZero() const OVERRIDE { + bool IsArithmeticZero() const override { return std::fpclassify(value_) == FP_ZERO; } bool IsArithmeticPositiveZero() const { @@ -3057,10 +3115,10 @@ class HDoubleConstant FINAL : public HConstant { bool IsArithmeticNegativeZero() const { return IsArithmeticZero() && std::signbit(value_); } - bool IsZeroBitPattern() const OVERRIDE { + bool IsZeroBitPattern() const override { return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((0.0)); } - bool IsOne() const OVERRIDE { + bool IsOne() const override { return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>(1.0); } bool IsNaN() const { @@ -3091,15 +3149,15 @@ class HDoubleConstant FINAL : public HConstant { // Conditional branch. A block ending with an HIf instruction must have // two successors. -class HIf FINAL : public HTemplateInstruction<1> { +class HIf final : public HExpression<1> { public: explicit HIf(HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kIf, SideEffects::None(), dex_pc) { + : HExpression(kIf, SideEffects::None(), dex_pc) { SetRawInputAt(0, input); } - bool IsClonable() const OVERRIDE { return true; } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } + bool IsControlFlow() const override { return true; } HBasicBlock* IfTrueSuccessor() const { return GetBlock()->GetSuccessors()[0]; @@ -3121,7 +3179,7 @@ class HIf FINAL : public HTemplateInstruction<1> { // non-exceptional control flow. // Normal-flow successor is stored at index zero, exception handlers under // higher indices in no particular order. -class HTryBoundary FINAL : public HTemplateInstruction<0> { +class HTryBoundary final : public HExpression<0> { public: enum class BoundaryKind { kEntry, @@ -3129,12 +3187,19 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> { kLast = kExit }; + // SideEffects::CanTriggerGC prevents instructions with SideEffects::DependOnGC to be alive + // across the catch block entering edges as GC might happen during throwing an exception. + // TryBoundary with BoundaryKind::kExit is conservatively used for that as there is no + // HInstruction which a catch block must start from. explicit HTryBoundary(BoundaryKind kind, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kTryBoundary, SideEffects::None(), dex_pc) { + : HExpression(kTryBoundary, + (kind == BoundaryKind::kExit) ? SideEffects::CanTriggerGC() + : SideEffects::None(), + dex_pc) { SetPackedField<BoundaryKindField>(kind); } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsControlFlow() const override { return true; } // Returns the block's non-exceptional successor (index zero). HBasicBlock* GetNormalFlowSuccessor() const { return GetBlock()->GetSuccessors()[0]; } @@ -3180,7 +3245,7 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> { }; // Deoptimize to interpreter, upon checking a condition. -class HDeoptimize FINAL : public HVariableInputSizeInstruction { +class HDeoptimize final : public HVariableInputSizeInstruction { public: // Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move // across. @@ -3193,14 +3258,14 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { SideEffects::All(), dex_pc, allocator, - /* number_of_inputs */ 1, + /* number_of_inputs= */ 1, kArenaAllocMisc) { SetPackedFlag<kFieldCanBeMoved>(false); SetPackedField<DeoptimizeKindField>(kind); SetRawInputAt(0, cond); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } // Use this constructor when the `HDeoptimize` guards an instruction, and any user // that relies on the deoptimization to pass should have its input be the `HDeoptimize` @@ -3214,10 +3279,11 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { uint32_t dex_pc) : HVariableInputSizeInstruction( kDeoptimize, + guard->GetType(), SideEffects::CanTriggerGC(), dex_pc, allocator, - /* number_of_inputs */ 2, + /* number_of_inputs= */ 2, kArenaAllocMisc) { SetPackedFlag<kFieldCanBeMoved>(true); SetPackedField<DeoptimizeKindField>(kind); @@ -3225,22 +3291,18 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { SetRawInputAt(1, guard); } - bool CanBeMoved() const OVERRIDE { return GetPackedFlag<kFieldCanBeMoved>(); } + bool CanBeMoved() const override { return GetPackedFlag<kFieldCanBeMoved>(); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { return (other->CanBeMoved() == CanBeMoved()) && (other->AsDeoptimize()->GetKind() == GetKind()); } - bool NeedsEnvironment() const OVERRIDE { return true; } + bool NeedsEnvironment() const override { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool CanThrow() const override { return true; } DeoptimizationKind GetDeoptimizationKind() const { return GetPackedField<DeoptimizeKindField>(); } - DataType::Type GetType() const OVERRIDE { - return GuardsAnInput() ? GuardedInput()->GetType() : DataType::Type::kVoid; - } - bool GuardsAnInput() const { return InputCount() == 2; } @@ -3277,12 +3339,13 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { // if it's true, starts to do deoptimization. // It has a 4-byte slot on stack. // TODO: allocate a register for this flag. -class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction { +class HShouldDeoptimizeFlag final : public HVariableInputSizeInstruction { public: // CHA guards are only optimized in a separate pass and it has no side effects // with regard to other passes. HShouldDeoptimizeFlag(ArenaAllocator* allocator, uint32_t dex_pc) : HVariableInputSizeInstruction(kShouldDeoptimizeFlag, + DataType::Type::kInt32, SideEffects::None(), dex_pc, allocator, @@ -3290,13 +3353,11 @@ class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction { kArenaAllocCHA) { } - DataType::Type GetType() const OVERRIDE { return DataType::Type::kInt32; } - // We do all CHA guard elimination/motion in a single pass, after which there is no // further guard elimination/motion since a guard might have been used for justification // of the elimination of another guard. Therefore, we pretend this guard cannot be moved // to avoid other optimizations trying to move it. - bool CanBeMoved() const OVERRIDE { return false; } + bool CanBeMoved() const override { return false; } DECLARE_INSTRUCTION(ShouldDeoptimizeFlag); @@ -3307,7 +3368,7 @@ class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction { // Represents the ArtMethod that was passed as a first argument to // the method. It is used by instructions that depend on it, like // instructions that work with the dex cache. -class HCurrentMethod FINAL : public HExpression<0> { +class HCurrentMethod final : public HExpression<0> { public: explicit HCurrentMethod(DataType::Type type, uint32_t dex_pc = kNoDexPc) : HExpression(kCurrentMethod, type, SideEffects::None(), dex_pc) { @@ -3321,7 +3382,7 @@ class HCurrentMethod FINAL : public HExpression<0> { // Fetches an ArtMethod from the virtual table or the interface method table // of a class. -class HClassTableGet FINAL : public HExpression<1> { +class HClassTableGet final : public HExpression<1> { public: enum class TableKind { kVTable, @@ -3339,9 +3400,9 @@ class HClassTableGet FINAL : public HExpression<1> { SetRawInputAt(0, cls); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other) const override { return other->AsClassTableGet()->GetIndex() == index_ && other->AsClassTableGet()->GetPackedFields() == GetPackedFields(); } @@ -3355,7 +3416,7 @@ class HClassTableGet FINAL : public HExpression<1> { DEFAULT_COPY_CONSTRUCTOR(ClassTableGet); private: - static constexpr size_t kFieldTableKind = kNumberOfExpressionPackedBits; + static constexpr size_t kFieldTableKind = kNumberOfGenericPackedBits; static constexpr size_t kFieldTableKindSize = MinimumBitsToStore(static_cast<size_t>(TableKind::kLast)); static constexpr size_t kNumberOfClassTableGetPackedBits = kFieldTableKind + kFieldTableKindSize; @@ -3370,21 +3431,21 @@ class HClassTableGet FINAL : public HExpression<1> { // PackedSwitch (jump table). A block ending with a PackedSwitch instruction will // have one successor for each entry in the switch table, and the final successor // will be the block containing the next Dex opcode. -class HPackedSwitch FINAL : public HTemplateInstruction<1> { +class HPackedSwitch final : public HExpression<1> { public: HPackedSwitch(int32_t start_value, uint32_t num_entries, HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kPackedSwitch, SideEffects::None(), dex_pc), + : HExpression(kPackedSwitch, SideEffects::None(), dex_pc), start_value_(start_value), num_entries_(num_entries) { SetRawInputAt(0, input); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsControlFlow() const override { return true; } int32_t GetStartValue() const { return start_value_; } @@ -3415,13 +3476,13 @@ class HUnaryOperation : public HExpression<1> { } // All of the UnaryOperation instructions are clonable. - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } HInstruction* GetInput() const { return InputAt(0); } DataType::Type GetResultType() const { return GetType(); } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } @@ -3456,7 +3517,7 @@ class HBinaryOperation : public HExpression<2> { } // All of the BinaryOperation instructions are clonable. - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } HInstruction* GetLeft() const { return InputAt(0); } HInstruction* GetRight() const { return InputAt(1); } @@ -3496,8 +3557,8 @@ class HBinaryOperation : public HExpression<2> { } } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } @@ -3578,7 +3639,7 @@ class HCondition : public HBinaryOperation { ComparisonBias GetBias() const { return GetPackedField<ComparisonBiasField>(); } void SetBias(ComparisonBias bias) { SetPackedField<ComparisonBiasField>(bias); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { return GetPackedFields() == other->AsCondition()->GetPackedFields(); } @@ -3606,7 +3667,7 @@ class HCondition : public HBinaryOperation { protected: // Needed if we merge a HCompare into a HCondition. - static constexpr size_t kFieldComparisonBias = kNumberOfExpressionPackedBits; + static constexpr size_t kFieldComparisonBias = kNumberOfGenericPackedBits; static constexpr size_t kFieldComparisonBiasSize = MinimumBitsToStore(static_cast<size_t>(ComparisonBias::kLast)); static constexpr size_t kNumberOfConditionPackedBits = @@ -3635,42 +3696,42 @@ class HCondition : public HBinaryOperation { }; // Instruction to check if two inputs are equal to each other. -class HEqual FINAL : public HCondition { +class HEqual final : public HCondition { public: HEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kEqual, first, second, dex_pc) { } - bool IsCommutative() const OVERRIDE { return true; } + bool IsCommutative() const override { return true; } HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED, - HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HNullConstant* y ATTRIBUTE_UNUSED) const override { return MakeConstantCondition(true, GetDexPc()); } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } // In the following Evaluate methods, a HCompare instruction has // been merged into this HEqual instruction; evaluate it as // `Compare(x, y) == 0`. - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } DECLARE_INSTRUCTION(Equal); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondEQ; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondNE; } @@ -3681,42 +3742,42 @@ class HEqual FINAL : public HCondition { template <typename T> static bool Compute(T x, T y) { return x == y; } }; -class HNotEqual FINAL : public HCondition { +class HNotEqual final : public HCondition { public: HNotEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kNotEqual, first, second, dex_pc) { } - bool IsCommutative() const OVERRIDE { return true; } + bool IsCommutative() const override { return true; } HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED, - HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HNullConstant* y ATTRIBUTE_UNUSED) const override { return MakeConstantCondition(false, GetDexPc()); } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } // In the following Evaluate methods, a HCompare instruction has // been merged into this HNotEqual instruction; evaluate it as // `Compare(x, y) != 0`. - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } DECLARE_INSTRUCTION(NotEqual); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondNE; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondEQ; } @@ -3727,36 +3788,36 @@ class HNotEqual FINAL : public HCondition { template <typename T> static bool Compute(T x, T y) { return x != y; } }; -class HLessThan FINAL : public HCondition { +class HLessThan final : public HCondition { public: HLessThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kLessThan, first, second, dex_pc) { } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } // In the following Evaluate methods, a HCompare instruction has // been merged into this HLessThan instruction; evaluate it as // `Compare(x, y) < 0`. - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } DECLARE_INSTRUCTION(LessThan); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondLT; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondGE; } @@ -3767,36 +3828,36 @@ class HLessThan FINAL : public HCondition { template <typename T> static bool Compute(T x, T y) { return x < y; } }; -class HLessThanOrEqual FINAL : public HCondition { +class HLessThanOrEqual final : public HCondition { public: HLessThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kLessThanOrEqual, first, second, dex_pc) { } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } // In the following Evaluate methods, a HCompare instruction has // been merged into this HLessThanOrEqual instruction; evaluate it as // `Compare(x, y) <= 0`. - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } DECLARE_INSTRUCTION(LessThanOrEqual); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondLE; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondGT; } @@ -3807,35 +3868,35 @@ class HLessThanOrEqual FINAL : public HCondition { template <typename T> static bool Compute(T x, T y) { return x <= y; } }; -class HGreaterThan FINAL : public HCondition { +class HGreaterThan final : public HCondition { public: HGreaterThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kGreaterThan, first, second, dex_pc) { } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } // In the following Evaluate methods, a HCompare instruction has // been merged into this HGreaterThan instruction; evaluate it as // `Compare(x, y) > 0`. - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } DECLARE_INSTRUCTION(GreaterThan); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondGT; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondLE; } @@ -3846,35 +3907,35 @@ class HGreaterThan FINAL : public HCondition { template <typename T> static bool Compute(T x, T y) { return x > y; } }; -class HGreaterThanOrEqual FINAL : public HCondition { +class HGreaterThanOrEqual final : public HCondition { public: HGreaterThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kGreaterThanOrEqual, first, second, dex_pc) { } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } // In the following Evaluate methods, a HCompare instruction has // been merged into this HGreaterThanOrEqual instruction; evaluate it as // `Compare(x, y) >= 0`. - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } DECLARE_INSTRUCTION(GreaterThanOrEqual); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondGE; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondLT; } @@ -3885,36 +3946,36 @@ class HGreaterThanOrEqual FINAL : public HCondition { template <typename T> static bool Compute(T x, T y) { return x >= y; } }; -class HBelow FINAL : public HCondition { +class HBelow final : public HCondition { public: HBelow(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kBelow, first, second, dex_pc) { } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } DECLARE_INSTRUCTION(Below); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondB; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondAE; } @@ -3927,36 +3988,36 @@ class HBelow FINAL : public HCondition { } }; -class HBelowOrEqual FINAL : public HCondition { +class HBelowOrEqual final : public HCondition { public: HBelowOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kBelowOrEqual, first, second, dex_pc) { } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } DECLARE_INSTRUCTION(BelowOrEqual); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondBE; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondA; } @@ -3969,36 +4030,36 @@ class HBelowOrEqual FINAL : public HCondition { } }; -class HAbove FINAL : public HCondition { +class HAbove final : public HCondition { public: HAbove(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kAbove, first, second, dex_pc) { } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } DECLARE_INSTRUCTION(Above); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondA; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondBE; } @@ -4011,36 +4072,36 @@ class HAbove FINAL : public HCondition { } }; -class HAboveOrEqual FINAL : public HCondition { +class HAboveOrEqual final : public HCondition { public: HAboveOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kAboveOrEqual, first, second, dex_pc) { } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } DECLARE_INSTRUCTION(AboveOrEqual); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondAE; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondB; } @@ -4055,7 +4116,7 @@ class HAboveOrEqual FINAL : public HCondition { // Instruction to check how two inputs compare to each other. // Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1. -class HCompare FINAL : public HBinaryOperation { +class HCompare final : public HBinaryOperation { public: // Note that `comparison_type` is the type of comparison performed // between the comparison's inputs, not the type of the instantiated @@ -4087,7 +4148,7 @@ class HCompare FINAL : public HBinaryOperation { return std::isunordered(x, y) ? (IsGtBias() ? 1 : -1) : Compute(x, y); } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { // Note that there is no "cmp-int" Dex instruction so we shouldn't // reach this code path when processing a freshly built HIR // graph. However HCompare integer instructions can be synthesized @@ -4095,17 +4156,17 @@ class HCompare FINAL : public HBinaryOperation { // IntegerSignum intrinsics, so we have to handle this case. return MakeConstantComparison(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantComparison(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return MakeConstantComparison(ComputeFP(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return MakeConstantComparison(ComputeFP(x->GetValue(), y->GetValue()), GetDexPc()); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { return GetPackedFields() == other->AsCompare()->GetPackedFields(); } @@ -4126,7 +4187,7 @@ class HCompare FINAL : public HBinaryOperation { DECLARE_INSTRUCTION(Compare); protected: - static constexpr size_t kFieldComparisonBias = kNumberOfExpressionPackedBits; + static constexpr size_t kFieldComparisonBias = kNumberOfGenericPackedBits; static constexpr size_t kFieldComparisonBiasSize = MinimumBitsToStore(static_cast<size_t>(ComparisonBias::kLast)); static constexpr size_t kNumberOfComparePackedBits = @@ -4144,7 +4205,7 @@ class HCompare FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Compare); }; -class HNewInstance FINAL : public HExpression<1> { +class HNewInstance final : public HExpression<1> { public: HNewInstance(HInstruction* cls, uint32_t dex_pc, @@ -4163,16 +4224,16 @@ class HNewInstance FINAL : public HExpression<1> { SetRawInputAt(0, cls); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } dex::TypeIndex GetTypeIndex() const { return type_index_; } const DexFile& GetDexFile() const { return dex_file_; } // Calls runtime so needs an environment. - bool NeedsEnvironment() const OVERRIDE { return true; } + bool NeedsEnvironment() const override { return true; } // Can throw errors when out-of-memory or if it's not instantiable/accessible. - bool CanThrow() const OVERRIDE { return true; } + bool CanThrow() const override { return true; } bool NeedsChecks() const { return entrypoint_ == kQuickAllocObjectWithChecks; @@ -4180,7 +4241,7 @@ class HNewInstance FINAL : public HExpression<1> { bool IsFinalizable() const { return GetPackedFlag<kFlagFinalizable>(); } - bool CanBeNull() const OVERRIDE { return false; } + bool CanBeNull() const override { return false; } QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; } @@ -4205,7 +4266,7 @@ class HNewInstance FINAL : public HExpression<1> { DEFAULT_COPY_CONSTRUCTOR(NewInstance); private: - static constexpr size_t kFlagFinalizable = kNumberOfExpressionPackedBits; + static constexpr size_t kFlagFinalizable = kNumberOfGenericPackedBits; static constexpr size_t kNumberOfNewInstancePackedBits = kFlagFinalizable + 1; static_assert(kNumberOfNewInstancePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); @@ -4234,7 +4295,7 @@ enum IntrinsicExceptions { class HInvoke : public HVariableInputSizeInstruction { public: - bool NeedsEnvironment() const OVERRIDE; + bool NeedsEnvironment() const override; void SetArgumentAt(size_t index, HInstruction* argument) { SetRawInputAt(index, argument); @@ -4246,8 +4307,6 @@ class HInvoke : public HVariableInputSizeInstruction { // inputs at the end of their list of inputs. uint32_t GetNumberOfArguments() const { return number_of_arguments_; } - DataType::Type GetType() const OVERRIDE { return GetPackedField<ReturnTypeField>(); } - uint32_t GetDexMethodIndex() const { return dex_method_index_; } InvokeType GetInvokeType() const { @@ -4269,15 +4328,15 @@ class HInvoke : public HVariableInputSizeInstruction { void SetCanThrow(bool can_throw) { SetPackedFlag<kFlagCanThrow>(can_throw); } - bool CanThrow() const OVERRIDE { return GetPackedFlag<kFlagCanThrow>(); } + bool CanThrow() const override { return GetPackedFlag<kFlagCanThrow>(); } void SetAlwaysThrows(bool always_throws) { SetPackedFlag<kFlagAlwaysThrows>(always_throws); } - bool AlwaysThrows() const OVERRIDE { return GetPackedFlag<kFlagAlwaysThrows>(); } + bool AlwaysThrows() const override { return GetPackedFlag<kFlagAlwaysThrows>(); } - bool CanBeMoved() const OVERRIDE { return IsIntrinsic() && !DoesAnyWrite(); } + bool CanBeMoved() const override { return IsIntrinsic() && !DoesAnyWrite(); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { return intrinsic_ != Intrinsics::kNone && intrinsic_ == other->AsInvoke()->intrinsic_; } @@ -4292,7 +4351,7 @@ class HInvoke : public HVariableInputSizeInstruction { bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; } ArtMethod* GetResolvedMethod() const { return resolved_method_; } - void SetResolvedMethod(ArtMethod* method) { resolved_method_ = method; } + void SetResolvedMethod(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_); DECLARE_ABSTRACT_INSTRUCTION(Invoke); @@ -4300,16 +4359,11 @@ class HInvoke : public HVariableInputSizeInstruction { static constexpr size_t kFieldInvokeType = kNumberOfGenericPackedBits; static constexpr size_t kFieldInvokeTypeSize = MinimumBitsToStore(static_cast<size_t>(kMaxInvokeType)); - static constexpr size_t kFieldReturnType = - kFieldInvokeType + kFieldInvokeTypeSize; - static constexpr size_t kFieldReturnTypeSize = - MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); - static constexpr size_t kFlagCanThrow = kFieldReturnType + kFieldReturnTypeSize; + static constexpr size_t kFlagCanThrow = kFieldInvokeType + kFieldInvokeTypeSize; static constexpr size_t kFlagAlwaysThrows = kFlagCanThrow + 1; static constexpr size_t kNumberOfInvokePackedBits = kFlagAlwaysThrows + 1; static_assert(kNumberOfInvokePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using InvokeTypeField = BitField<InvokeType, kFieldInvokeType, kFieldInvokeTypeSize>; - using ReturnTypeField = BitField<DataType::Type, kFieldReturnType, kFieldReturnTypeSize>; HInvoke(InstructionKind kind, ArenaAllocator* allocator, @@ -4322,19 +4376,21 @@ class HInvoke : public HVariableInputSizeInstruction { InvokeType invoke_type) : HVariableInputSizeInstruction( kind, + return_type, SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays. dex_pc, allocator, number_of_arguments + number_of_other_inputs, kArenaAllocInvokeInputs), number_of_arguments_(number_of_arguments), - resolved_method_(resolved_method), dex_method_index_(dex_method_index), intrinsic_(Intrinsics::kNone), intrinsic_optimizations_(0) { - SetPackedField<ReturnTypeField>(return_type); SetPackedField<InvokeTypeField>(invoke_type); SetPackedFlag<kFlagCanThrow>(true); + // Check mutator lock, constructors lack annotalysis support. + Locks::mutator_lock_->AssertNotExclusiveHeld(Thread::Current()); + SetResolvedMethod(resolved_method); } DEFAULT_COPY_CONSTRUCTOR(Invoke); @@ -4348,7 +4404,7 @@ class HInvoke : public HVariableInputSizeInstruction { uint32_t intrinsic_optimizations_; }; -class HInvokeUnresolved FINAL : public HInvoke { +class HInvokeUnresolved final : public HInvoke { public: HInvokeUnresolved(ArenaAllocator* allocator, uint32_t number_of_arguments, @@ -4359,7 +4415,7 @@ class HInvokeUnresolved FINAL : public HInvoke { : HInvoke(kInvokeUnresolved, allocator, number_of_arguments, - 0u /* number_of_other_inputs */, + /* number_of_other_inputs= */ 0u, return_type, dex_pc, dex_method_index, @@ -4367,7 +4423,7 @@ class HInvokeUnresolved FINAL : public HInvoke { invoke_type) { } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } DECLARE_INSTRUCTION(InvokeUnresolved); @@ -4375,7 +4431,7 @@ class HInvokeUnresolved FINAL : public HInvoke { DEFAULT_COPY_CONSTRUCTOR(InvokeUnresolved); }; -class HInvokePolymorphic FINAL : public HInvoke { +class HInvokePolymorphic final : public HInvoke { public: HInvokePolymorphic(ArenaAllocator* allocator, uint32_t number_of_arguments, @@ -4385,7 +4441,7 @@ class HInvokePolymorphic FINAL : public HInvoke { : HInvoke(kInvokePolymorphic, allocator, number_of_arguments, - 0u /* number_of_other_inputs */, + /* number_of_other_inputs= */ 0u, return_type, dex_pc, dex_method_index, @@ -4393,7 +4449,7 @@ class HInvokePolymorphic FINAL : public HInvoke { kVirtual) { } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } DECLARE_INSTRUCTION(InvokePolymorphic); @@ -4401,7 +4457,39 @@ class HInvokePolymorphic FINAL : public HInvoke { DEFAULT_COPY_CONSTRUCTOR(InvokePolymorphic); }; -class HInvokeStaticOrDirect FINAL : public HInvoke { +class HInvokeCustom final : public HInvoke { + public: + HInvokeCustom(ArenaAllocator* allocator, + uint32_t number_of_arguments, + uint32_t call_site_index, + DataType::Type return_type, + uint32_t dex_pc) + : HInvoke(kInvokeCustom, + allocator, + number_of_arguments, + /* number_of_other_inputs= */ 0u, + return_type, + dex_pc, + /* dex_method_index= */ dex::kDexNoIndex, + /* resolved_method= */ nullptr, + kStatic), + call_site_index_(call_site_index) { + } + + uint32_t GetCallSiteIndex() const { return call_site_index_; } + + bool IsClonable() const override { return true; } + + DECLARE_INSTRUCTION(InvokeCustom); + + protected: + DEFAULT_COPY_CONSTRUCTOR(InvokeCustom); + + private: + uint32_t call_site_index_; +}; + +class HInvokeStaticOrDirect final : public HInvoke { public: // Requirements of this method call regarding the class // initialization (clinit) check of its declaring class. @@ -4424,14 +4512,18 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // Used for boot image methods referenced by boot image code. kBootImageLinkTimePcRelative, - // Use ArtMethod* at a known address, embed the direct address in the code. - // Used for app->boot calls with non-relocatable image and for JIT-compiled calls. - kDirectAddress, + // Load from an entry in the .data.bimg.rel.ro using a PC-relative load. + // Used for app->boot calls with relocatable image. + kBootImageRelRo, // Load from an entry in the .bss section using a PC-relative load. - // Used for classes outside boot image when .bss is accessible with a PC-relative load. + // Used for methods outside boot image referenced by AOT-compiled app and boot image code. kBssEntry, + // Use ArtMethod* at a known address, embed the direct address in the code. + // Used for for JIT-compiled calls. + kJitDirectAddress, + // Make a runtime call to resolve and call the method. This is the last-resort-kind // used when other kinds are unimplemented on a particular architecture. kRuntimeCall, @@ -4472,8 +4564,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { allocator, number_of_arguments, // There is potentially one extra argument for the HCurrentMethod node, and - // potentially one other if the clinit check is explicit, and potentially - // one other if the method is a string factory. + // potentially one other if the clinit check is explicit. (NeedsCurrentMethodInput(dispatch_info.method_load_kind) ? 1u : 0u) + (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u), return_type, @@ -4486,7 +4577,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { SetPackedField<ClinitCheckRequirementField>(clinit_check_requirement); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } void SetDispatchInfo(const DispatchInfo& dispatch_info) { bool had_current_method_input = HasCurrentMethodInput(); @@ -4516,7 +4607,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { } using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE { + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() override { ArrayRef<HUserRecord<HInstruction*>> input_records = HInvoke::GetInputRecords(); if (kIsDebugBuild && IsStaticWithExplicitClinitCheck()) { DCHECK(!input_records.empty()); @@ -4534,14 +4625,14 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { return input_records; } - bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override { // We access the method via the dex cache so we can't do an implicit null check. // TODO: for intrinsics we can generate implicit null checks. return false; } - bool CanBeNull() const OVERRIDE { - return GetPackedField<ReturnTypeField>() == DataType::Type::kReference && !IsStringInit(); + bool CanBeNull() const override { + return GetType() == DataType::Type::kReference && !IsStringInit(); } // Get the index of the special input, if any. @@ -4555,11 +4646,12 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; } CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; } bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; } - bool NeedsDexCacheOfDeclaringClass() const OVERRIDE; + bool NeedsDexCacheOfDeclaringClass() const override; bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; } - bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; } + bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kJitDirectAddress; } bool HasPcRelativeMethodLoadKind() const { return GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative || + GetMethodLoadKind() == MethodLoadKind::kBootImageRelRo || GetMethodLoadKind() == MethodLoadKind::kBssEntry; } bool HasCurrentMethodInput() const { @@ -4655,7 +4747,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs); std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs); -class HInvokeVirtual FINAL : public HInvoke { +class HInvokeVirtual final : public HInvoke { public: HInvokeVirtual(ArenaAllocator* allocator, uint32_t number_of_arguments, @@ -4676,9 +4768,9 @@ class HInvokeVirtual FINAL : public HInvoke { vtable_index_(vtable_index) { } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } - bool CanBeNull() const OVERRIDE { + bool CanBeNull() const override { switch (GetIntrinsic()) { case Intrinsics::kThreadCurrentThread: case Intrinsics::kStringBufferAppend: @@ -4691,9 +4783,9 @@ class HInvokeVirtual FINAL : public HInvoke { } } - bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const override { // TODO: Add implicit null checks in intrinsics. - return (obj == InputAt(0)) && !GetLocations()->Intrinsified(); + return (obj == InputAt(0)) && !IsIntrinsic(); } uint32_t GetVTableIndex() const { return vtable_index_; } @@ -4708,7 +4800,7 @@ class HInvokeVirtual FINAL : public HInvoke { const uint32_t vtable_index_; }; -class HInvokeInterface FINAL : public HInvoke { +class HInvokeInterface final : public HInvoke { public: HInvokeInterface(ArenaAllocator* allocator, uint32_t number_of_arguments, @@ -4729,14 +4821,14 @@ class HInvokeInterface FINAL : public HInvoke { imt_index_(imt_index) { } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } - bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const override { // TODO: Add implicit null checks in intrinsics. - return (obj == InputAt(0)) && !GetLocations()->Intrinsified(); + return (obj == InputAt(0)) && !IsIntrinsic(); } - bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { + bool NeedsDexCacheOfDeclaringClass() const override { // The assembly stub currently needs it. return true; } @@ -4753,7 +4845,7 @@ class HInvokeInterface FINAL : public HInvoke { const uint32_t imt_index_; }; -class HNeg FINAL : public HUnaryOperation { +class HNeg final : public HUnaryOperation { public: HNeg(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) : HUnaryOperation(kNeg, result_type, input, dex_pc) { @@ -4762,16 +4854,16 @@ class HNeg FINAL : public HUnaryOperation { template <typename T> static T Compute(T x) { return -x; } - HConstant* Evaluate(HIntConstant* x) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x) const override { return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x) const override { return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x) const override { return GetBlock()->GetGraph()->GetFloatConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x) const override { return GetBlock()->GetGraph()->GetDoubleConstant(Compute(x->GetValue()), GetDexPc()); } @@ -4781,23 +4873,24 @@ class HNeg FINAL : public HUnaryOperation { DEFAULT_COPY_CONSTRUCTOR(Neg); }; -class HNewArray FINAL : public HExpression<2> { +class HNewArray final : public HExpression<2> { public: - HNewArray(HInstruction* cls, HInstruction* length, uint32_t dex_pc) + HNewArray(HInstruction* cls, HInstruction* length, uint32_t dex_pc, size_t component_size_shift) : HExpression(kNewArray, DataType::Type::kReference, SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, cls); SetRawInputAt(1, length); + SetPackedField<ComponentSizeShiftField>(component_size_shift); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } // Calls runtime so needs an environment. - bool NeedsEnvironment() const OVERRIDE { return true; } + bool NeedsEnvironment() const override { return true; } // May throw NegativeArraySizeException, OutOfMemoryError, etc. - bool CanThrow() const OVERRIDE { return true; } + bool CanThrow() const override { return true; } - bool CanBeNull() const OVERRIDE { return false; } + bool CanBeNull() const override { return false; } HLoadClass* GetLoadClass() const { DCHECK(InputAt(0)->IsLoadClass()); @@ -4808,13 +4901,26 @@ class HNewArray FINAL : public HExpression<2> { return InputAt(1); } + size_t GetComponentSizeShift() { + return GetPackedField<ComponentSizeShiftField>(); + } + DECLARE_INSTRUCTION(NewArray); protected: DEFAULT_COPY_CONSTRUCTOR(NewArray); + + private: + static constexpr size_t kFieldComponentSizeShift = kNumberOfGenericPackedBits; + static constexpr size_t kFieldComponentSizeShiftSize = MinimumBitsToStore(3u); + static constexpr size_t kNumberOfNewArrayPackedBits = + kFieldComponentSizeShift + kFieldComponentSizeShiftSize; + static_assert(kNumberOfNewArrayPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); + using ComponentSizeShiftField = + BitField<size_t, kFieldComponentSizeShift, kFieldComponentSizeShift>; }; -class HAdd FINAL : public HBinaryOperation { +class HAdd final : public HBinaryOperation { public: HAdd(DataType::Type result_type, HInstruction* left, @@ -4823,23 +4929,23 @@ class HAdd FINAL : public HBinaryOperation { : HBinaryOperation(kAdd, result_type, left, right, SideEffects::None(), dex_pc) { } - bool IsCommutative() const OVERRIDE { return true; } + bool IsCommutative() const override { return true; } template <typename T> static T Compute(T x, T y) { return x + y; } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return GetBlock()->GetGraph()->GetFloatConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return GetBlock()->GetGraph()->GetDoubleConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } @@ -4850,7 +4956,7 @@ class HAdd FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Add); }; -class HSub FINAL : public HBinaryOperation { +class HSub final : public HBinaryOperation { public: HSub(DataType::Type result_type, HInstruction* left, @@ -4861,19 +4967,19 @@ class HSub FINAL : public HBinaryOperation { template <typename T> static T Compute(T x, T y) { return x - y; } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return GetBlock()->GetGraph()->GetFloatConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return GetBlock()->GetGraph()->GetDoubleConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } @@ -4884,7 +4990,7 @@ class HSub FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Sub); }; -class HMul FINAL : public HBinaryOperation { +class HMul final : public HBinaryOperation { public: HMul(DataType::Type result_type, HInstruction* left, @@ -4893,23 +4999,23 @@ class HMul FINAL : public HBinaryOperation { : HBinaryOperation(kMul, result_type, left, right, SideEffects::None(), dex_pc) { } - bool IsCommutative() const OVERRIDE { return true; } + bool IsCommutative() const override { return true; } template <typename T> static T Compute(T x, T y) { return x * y; } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return GetBlock()->GetGraph()->GetFloatConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return GetBlock()->GetGraph()->GetDoubleConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } @@ -4920,7 +5026,7 @@ class HMul FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Mul); }; -class HDiv FINAL : public HBinaryOperation { +class HDiv final : public HBinaryOperation { public: HDiv(DataType::Type result_type, HInstruction* left, @@ -4945,19 +5051,19 @@ class HDiv FINAL : public HBinaryOperation { return x / y; } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return GetBlock()->GetGraph()->GetFloatConstant( ComputeFP(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return GetBlock()->GetGraph()->GetDoubleConstant( ComputeFP(x->GetValue(), y->GetValue()), GetDexPc()); } @@ -4968,7 +5074,7 @@ class HDiv FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Div); }; -class HRem FINAL : public HBinaryOperation { +class HRem final : public HBinaryOperation { public: HRem(DataType::Type result_type, HInstruction* left, @@ -4993,19 +5099,19 @@ class HRem FINAL : public HBinaryOperation { return std::fmod(x, y); } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return GetBlock()->GetGraph()->GetFloatConstant( ComputeFP(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return GetBlock()->GetGraph()->GetDoubleConstant( ComputeFP(x->GetValue(), y->GetValue()), GetDexPc()); } @@ -5016,25 +5122,136 @@ class HRem FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Rem); }; -class HDivZeroCheck FINAL : public HExpression<1> { +class HMin final : public HBinaryOperation { + public: + HMin(DataType::Type result_type, + HInstruction* left, + HInstruction* right, + uint32_t dex_pc) + : HBinaryOperation(kMin, result_type, left, right, SideEffects::None(), dex_pc) {} + + bool IsCommutative() const override { return true; } + + // Evaluation for integral values. + template <typename T> static T ComputeIntegral(T x, T y) { + return (x <= y) ? x : y; + } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { + return GetBlock()->GetGraph()->GetIntConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { + return GetBlock()->GetGraph()->GetLongConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + // TODO: Evaluation for floating-point values. + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, + HFloatConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; } + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; } + + DECLARE_INSTRUCTION(Min); + + protected: + DEFAULT_COPY_CONSTRUCTOR(Min); +}; + +class HMax final : public HBinaryOperation { + public: + HMax(DataType::Type result_type, + HInstruction* left, + HInstruction* right, + uint32_t dex_pc) + : HBinaryOperation(kMax, result_type, left, right, SideEffects::None(), dex_pc) {} + + bool IsCommutative() const override { return true; } + + // Evaluation for integral values. + template <typename T> static T ComputeIntegral(T x, T y) { + return (x >= y) ? x : y; + } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { + return GetBlock()->GetGraph()->GetIntConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { + return GetBlock()->GetGraph()->GetLongConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + // TODO: Evaluation for floating-point values. + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, + HFloatConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; } + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; } + + DECLARE_INSTRUCTION(Max); + + protected: + DEFAULT_COPY_CONSTRUCTOR(Max); +}; + +class HAbs final : public HUnaryOperation { + public: + HAbs(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) + : HUnaryOperation(kAbs, result_type, input, dex_pc) {} + + // Evaluation for integral values. + template <typename T> static T ComputeIntegral(T x) { + return x < 0 ? -x : x; + } + + // Evaluation for floating-point values. + // Note, as a "quality of implementation", rather than pure "spec compliance", + // we require that Math.abs() clears the sign bit (but changes nothing else) + // for all floating-point numbers, including NaN (signaling NaN may become quiet though). + // http://b/30758343 + template <typename T, typename S> static T ComputeFP(T x) { + S bits = bit_cast<S, T>(x); + return bit_cast<T, S>(bits & std::numeric_limits<S>::max()); + } + + HConstant* Evaluate(HIntConstant* x) const override { + return GetBlock()->GetGraph()->GetIntConstant(ComputeIntegral(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x) const override { + return GetBlock()->GetGraph()->GetLongConstant(ComputeIntegral(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HFloatConstant* x) const override { + return GetBlock()->GetGraph()->GetFloatConstant( + ComputeFP<float, int32_t>(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HDoubleConstant* x) const override { + return GetBlock()->GetGraph()->GetDoubleConstant( + ComputeFP<double, int64_t>(x->GetValue()), GetDexPc()); + } + + DECLARE_INSTRUCTION(Abs); + + protected: + DEFAULT_COPY_CONSTRUCTOR(Abs); +}; + +class HDivZeroCheck final : public HExpression<1> { public: // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException` - // constructor. + // constructor. However it can only do it on a fatal slow path so execution never returns to the + // instruction following the current one; thus 'SideEffects::None()' is used. HDivZeroCheck(HInstruction* value, uint32_t dex_pc) - : HExpression(kDivZeroCheck, value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kDivZeroCheck, value->GetType(), SideEffects::None(), dex_pc) { SetRawInputAt(0, value); } - DataType::Type GetType() const OVERRIDE { return InputAt(0)->GetType(); } + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool NeedsEnvironment() const OVERRIDE { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool NeedsEnvironment() const override { return true; } + bool CanThrow() const override { return true; } DECLARE_INSTRUCTION(DivZeroCheck); @@ -5042,7 +5259,7 @@ class HDivZeroCheck FINAL : public HExpression<1> { DEFAULT_COPY_CONSTRUCTOR(DivZeroCheck); }; -class HShl FINAL : public HBinaryOperation { +class HShl final : public HBinaryOperation { public: HShl(DataType::Type result_type, HInstruction* value, @@ -5058,26 +5275,26 @@ class HShl FINAL : public HBinaryOperation { return value << (distance & max_shift_distance); } - HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const OVERRIDE { + HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(value->GetValue(), distance->GetValue(), kMaxIntShiftDistance), GetDexPc()); } - HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const OVERRIDE { + HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc()); } HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED, - HLongConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HLongConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for the (long, long) case."; UNREACHABLE(); } HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED, - HFloatConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED, - HDoubleConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5088,7 +5305,7 @@ class HShl FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Shl); }; -class HShr FINAL : public HBinaryOperation { +class HShr final : public HBinaryOperation { public: HShr(DataType::Type result_type, HInstruction* value, @@ -5104,26 +5321,26 @@ class HShr FINAL : public HBinaryOperation { return value >> (distance & max_shift_distance); } - HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const OVERRIDE { + HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(value->GetValue(), distance->GetValue(), kMaxIntShiftDistance), GetDexPc()); } - HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const OVERRIDE { + HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc()); } HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED, - HLongConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HLongConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for the (long, long) case."; UNREACHABLE(); } HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED, - HFloatConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED, - HDoubleConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5134,7 +5351,7 @@ class HShr FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Shr); }; -class HUShr FINAL : public HBinaryOperation { +class HUShr final : public HBinaryOperation { public: HUShr(DataType::Type result_type, HInstruction* value, @@ -5152,26 +5369,26 @@ class HUShr FINAL : public HBinaryOperation { return static_cast<T>(ux >> (distance & max_shift_distance)); } - HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const OVERRIDE { + HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(value->GetValue(), distance->GetValue(), kMaxIntShiftDistance), GetDexPc()); } - HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const OVERRIDE { + HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc()); } HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED, - HLongConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HLongConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for the (long, long) case."; UNREACHABLE(); } HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED, - HFloatConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED, - HDoubleConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5182,7 +5399,7 @@ class HUShr FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(UShr); }; -class HAnd FINAL : public HBinaryOperation { +class HAnd final : public HBinaryOperation { public: HAnd(DataType::Type result_type, HInstruction* left, @@ -5191,25 +5408,25 @@ class HAnd FINAL : public HBinaryOperation { : HBinaryOperation(kAnd, result_type, left, right, SideEffects::None(), dex_pc) { } - bool IsCommutative() const OVERRIDE { return true; } + bool IsCommutative() const override { return true; } template <typename T> static T Compute(T x, T y) { return x & y; } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5220,7 +5437,7 @@ class HAnd FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(And); }; -class HOr FINAL : public HBinaryOperation { +class HOr final : public HBinaryOperation { public: HOr(DataType::Type result_type, HInstruction* left, @@ -5229,25 +5446,25 @@ class HOr FINAL : public HBinaryOperation { : HBinaryOperation(kOr, result_type, left, right, SideEffects::None(), dex_pc) { } - bool IsCommutative() const OVERRIDE { return true; } + bool IsCommutative() const override { return true; } template <typename T> static T Compute(T x, T y) { return x | y; } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5258,7 +5475,7 @@ class HOr FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Or); }; -class HXor FINAL : public HBinaryOperation { +class HXor final : public HBinaryOperation { public: HXor(DataType::Type result_type, HInstruction* left, @@ -5267,25 +5484,25 @@ class HXor FINAL : public HBinaryOperation { : HBinaryOperation(kXor, result_type, left, right, SideEffects::None(), dex_pc) { } - bool IsCommutative() const OVERRIDE { return true; } + bool IsCommutative() const override { return true; } template <typename T> static T Compute(T x, T y) { return x ^ y; } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5296,7 +5513,7 @@ class HXor FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Xor); }; -class HRor FINAL : public HBinaryOperation { +class HRor final : public HBinaryOperation { public: HRor(DataType::Type result_type, HInstruction* value, HInstruction* distance) : HBinaryOperation(kRor, result_type, value, distance) { @@ -5317,26 +5534,26 @@ class HRor FINAL : public HBinaryOperation { } } - HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const OVERRIDE { + HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(value->GetValue(), distance->GetValue(), kMaxIntShiftDistance), GetDexPc()); } - HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const OVERRIDE { + HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc()); } HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED, - HLongConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HLongConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for the (long, long) case."; UNREACHABLE(); } HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED, - HFloatConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED, - HDoubleConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5349,7 +5566,7 @@ class HRor FINAL : public HBinaryOperation { // The value of a parameter in this method. Its location depends on // the calling convention. -class HParameterValue FINAL : public HExpression<0> { +class HParameterValue final : public HExpression<0> { public: HParameterValue(const DexFile& dex_file, dex::TypeIndex type_index, @@ -5369,7 +5586,7 @@ class HParameterValue FINAL : public HExpression<0> { uint8_t GetIndex() const { return index_; } bool IsThis() const { return GetPackedFlag<kFlagIsThis>(); } - bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); } + bool CanBeNull() const override { return GetPackedFlag<kFlagCanBeNull>(); } void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); } DECLARE_INSTRUCTION(ParameterValue); @@ -5379,7 +5596,7 @@ class HParameterValue FINAL : public HExpression<0> { private: // Whether or not the parameter value corresponds to 'this' argument. - static constexpr size_t kFlagIsThis = kNumberOfExpressionPackedBits; + static constexpr size_t kFlagIsThis = kNumberOfGenericPackedBits; static constexpr size_t kFlagCanBeNull = kFlagIsThis + 1; static constexpr size_t kNumberOfParameterValuePackedBits = kFlagCanBeNull + 1; static_assert(kNumberOfParameterValuePackedBits <= kMaxNumberOfPackedBits, @@ -5392,30 +5609,30 @@ class HParameterValue FINAL : public HExpression<0> { const uint8_t index_; }; -class HNot FINAL : public HUnaryOperation { +class HNot final : public HUnaryOperation { public: HNot(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) : HUnaryOperation(kNot, result_type, input, dex_pc) { } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } template <typename T> static T Compute(T x) { return ~x; } - HConstant* Evaluate(HIntConstant* x) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x) const override { return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x) const override { return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5426,14 +5643,14 @@ class HNot FINAL : public HUnaryOperation { DEFAULT_COPY_CONSTRUCTOR(Not); }; -class HBooleanNot FINAL : public HUnaryOperation { +class HBooleanNot final : public HUnaryOperation { public: explicit HBooleanNot(HInstruction* input, uint32_t dex_pc = kNoDexPc) : HUnaryOperation(kBooleanNot, DataType::Type::kBool, input, dex_pc) { } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } @@ -5442,18 +5659,18 @@ class HBooleanNot FINAL : public HUnaryOperation { return !x; } - HConstant* Evaluate(HIntConstant* x) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x) const override { return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for long values"; UNREACHABLE(); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5464,7 +5681,7 @@ class HBooleanNot FINAL : public HUnaryOperation { DEFAULT_COPY_CONSTRUCTOR(BooleanNot); }; -class HTypeConversion FINAL : public HExpression<1> { +class HTypeConversion final : public HExpression<1> { public: // Instantiate a type conversion of `input` to `result_type`. HTypeConversion(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) @@ -5478,10 +5695,15 @@ class HTypeConversion FINAL : public HExpression<1> { DataType::Type GetInputType() const { return GetInput()->GetType(); } DataType::Type GetResultType() const { return GetType(); } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } + // Return whether the conversion is implicit. This includes conversion to the same type. + bool IsImplicitConversion() const { + return DataType::IsTypeConversionImplicit(GetInputType(), GetResultType()); + } // Try to statically evaluate the conversion and return a HConstant // containing the result. If the input cannot be converted, return nullptr. @@ -5495,26 +5717,27 @@ class HTypeConversion FINAL : public HExpression<1> { static constexpr uint32_t kNoRegNumber = -1; -class HNullCheck FINAL : public HExpression<1> { +class HNullCheck final : public HExpression<1> { public: // `HNullCheck` can trigger GC, as it may call the `NullPointerException` - // constructor. + // constructor. However it can only do it on a fatal slow path so execution never returns to the + // instruction following the current one; thus 'SideEffects::None()' is used. HNullCheck(HInstruction* value, uint32_t dex_pc) - : HExpression(kNullCheck, value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kNullCheck, value->GetType(), SideEffects::None(), dex_pc) { SetRawInputAt(0, value); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool NeedsEnvironment() const OVERRIDE { return true; } + bool NeedsEnvironment() const override { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool CanThrow() const override { return true; } - bool CanBeNull() const OVERRIDE { return false; } + bool CanBeNull() const override { return false; } DECLARE_INSTRUCTION(NullCheck); @@ -5559,7 +5782,7 @@ class FieldInfo : public ValueObject { const DexFile& dex_file_; }; -class HInstanceFieldGet FINAL : public HExpression<1> { +class HInstanceFieldGet final : public HExpression<1> { public: HInstanceFieldGet(HInstruction* value, ArtField* field, @@ -5584,19 +5807,19 @@ class HInstanceFieldGet FINAL : public HExpression<1> { SetRawInputAt(0, value); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return !IsVolatile(); } + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return !IsVolatile(); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { const HInstanceFieldGet* other_get = other->AsInstanceFieldGet(); return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue(); } - bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const override { return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value()); } - size_t ComputeHashCode() const OVERRIDE { + size_t ComputeHashCode() const override { return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue(); } @@ -5621,7 +5844,7 @@ class HInstanceFieldGet FINAL : public HExpression<1> { const FieldInfo field_info_; }; -class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { +class HInstanceFieldSet final : public HExpression<2> { public: HInstanceFieldSet(HInstruction* object, HInstruction* value, @@ -5633,9 +5856,9 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { uint16_t declaring_class_def_index, const DexFile& dex_file, uint32_t dex_pc) - : HTemplateInstruction(kInstanceFieldSet, - SideEffects::FieldWriteOfType(field_type, is_volatile), - dex_pc), + : HExpression(kInstanceFieldSet, + SideEffects::FieldWriteOfType(field_type, is_volatile), + dex_pc), field_info_(field, field_offset, field_type, @@ -5648,9 +5871,9 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { SetRawInputAt(1, value); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } - bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const override { return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value()); } @@ -5676,7 +5899,7 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { const FieldInfo field_info_; }; -class HArrayGet FINAL : public HExpression<2> { +class HArrayGet final : public HExpression<2> { public: HArrayGet(HInstruction* array, HInstruction* index, @@ -5687,7 +5910,7 @@ class HArrayGet FINAL : public HExpression<2> { type, SideEffects::ArrayReadOfType(type), dex_pc, - /* is_string_char_at */ false) { + /* is_string_char_at= */ false) { } HArrayGet(HInstruction* array, @@ -5702,12 +5925,12 @@ class HArrayGet FINAL : public HExpression<2> { SetRawInputAt(1, index); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override { // TODO: We can be smarter here. // Currently, unless the array is the result of NewArray, the array access is always // preceded by some form of null NullCheck necessary for the bounds check, usually @@ -5761,13 +5984,13 @@ class HArrayGet FINAL : public HExpression<2> { // a particular HArrayGet is actually a String.charAt() by looking at the type // of the input but that requires holding the mutator lock, so we prefer to use // a flag, so that code generators don't need to do the locking. - static constexpr size_t kFlagIsStringCharAt = kNumberOfExpressionPackedBits; + static constexpr size_t kFlagIsStringCharAt = kNumberOfGenericPackedBits; static constexpr size_t kNumberOfArrayGetPackedBits = kFlagIsStringCharAt + 1; static_assert(kNumberOfArrayGetPackedBits <= HInstruction::kMaxNumberOfPackedBits, "Too many packed fields."); }; -class HArraySet FINAL : public HTemplateInstruction<3> { +class HArraySet final : public HExpression<3> { public: HArraySet(HInstruction* array, HInstruction* index, @@ -5789,7 +6012,7 @@ class HArraySet FINAL : public HTemplateInstruction<3> { DataType::Type expected_component_type, SideEffects side_effects, uint32_t dex_pc) - : HTemplateInstruction(kArraySet, side_effects, dex_pc) { + : HExpression(kArraySet, side_effects, dex_pc) { SetPackedField<ExpectedComponentTypeField>(expected_component_type); SetPackedFlag<kFlagNeedsTypeCheck>(value->GetType() == DataType::Type::kReference); SetPackedFlag<kFlagValueCanBeNull>(true); @@ -5799,17 +6022,17 @@ class HArraySet FINAL : public HTemplateInstruction<3> { SetRawInputAt(2, value); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } - bool NeedsEnvironment() const OVERRIDE { + bool NeedsEnvironment() const override { // We call a runtime method to throw ArrayStoreException. return NeedsTypeCheck(); } // Can throw ArrayStoreException. - bool CanThrow() const OVERRIDE { return NeedsTypeCheck(); } + bool CanThrow() const override { return NeedsTypeCheck(); } - bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override { // TODO: Same as for ArrayGet. return false; } @@ -5886,7 +6109,7 @@ class HArraySet FINAL : public HTemplateInstruction<3> { BitField<DataType::Type, kFieldExpectedComponentType, kFieldExpectedComponentTypeSize>; }; -class HArrayLength FINAL : public HExpression<1> { +class HArrayLength final : public HExpression<1> { public: HArrayLength(HInstruction* array, uint32_t dex_pc, bool is_string_length = false) : HExpression(kArrayLength, DataType::Type::kInt32, SideEffects::None(), dex_pc) { @@ -5896,12 +6119,12 @@ class HArrayLength FINAL : public HExpression<1> { SetRawInputAt(0, array); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const override { return obj == InputAt(0); } @@ -5918,36 +6141,37 @@ class HArrayLength FINAL : public HExpression<1> { // determine whether a particular HArrayLength is actually a String.length() by // looking at the type of the input but that requires holding the mutator lock, so // we prefer to use a flag, so that code generators don't need to do the locking. - static constexpr size_t kFlagIsStringLength = kNumberOfExpressionPackedBits; + static constexpr size_t kFlagIsStringLength = kNumberOfGenericPackedBits; static constexpr size_t kNumberOfArrayLengthPackedBits = kFlagIsStringLength + 1; static_assert(kNumberOfArrayLengthPackedBits <= HInstruction::kMaxNumberOfPackedBits, "Too many packed fields."); }; -class HBoundsCheck FINAL : public HExpression<2> { +class HBoundsCheck final : public HExpression<2> { public: // `HBoundsCheck` can trigger GC, as it may call the `IndexOutOfBoundsException` - // constructor. + // constructor. However it can only do it on a fatal slow path so execution never returns to the + // instruction following the current one; thus 'SideEffects::None()' is used. HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc, bool is_string_char_at = false) - : HExpression(kBoundsCheck, index->GetType(), SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kBoundsCheck, index->GetType(), SideEffects::None(), dex_pc) { DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(index->GetType())); SetPackedFlag<kFlagIsStringCharAt>(is_string_char_at); SetRawInputAt(0, index); SetRawInputAt(1, length); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool NeedsEnvironment() const OVERRIDE { return true; } + bool NeedsEnvironment() const override { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool CanThrow() const override { return true; } bool IsStringCharAt() const { return GetPackedFlag<kFlagIsStringCharAt>(); } @@ -5959,19 +6183,22 @@ class HBoundsCheck FINAL : public HExpression<2> { DEFAULT_COPY_CONSTRUCTOR(BoundsCheck); private: - static constexpr size_t kFlagIsStringCharAt = kNumberOfExpressionPackedBits; + static constexpr size_t kFlagIsStringCharAt = kNumberOfGenericPackedBits; + static constexpr size_t kNumberOfBoundsCheckPackedBits = kFlagIsStringCharAt + 1; + static_assert(kNumberOfBoundsCheckPackedBits <= HInstruction::kMaxNumberOfPackedBits, + "Too many packed fields."); }; -class HSuspendCheck FINAL : public HTemplateInstruction<0> { +class HSuspendCheck final : public HExpression<0> { public: explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kSuspendCheck, SideEffects::CanTriggerGC(), dex_pc), + : HExpression(kSuspendCheck, SideEffects::CanTriggerGC(), dex_pc), slow_path_(nullptr) { } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } - bool NeedsEnvironment() const OVERRIDE { + bool NeedsEnvironment() const override { return true; } @@ -5991,13 +6218,13 @@ class HSuspendCheck FINAL : public HTemplateInstruction<0> { // Pseudo-instruction which provides the native debugger with mapping information. // It ensures that we can generate line number and local variables at this point. -class HNativeDebugInfo : public HTemplateInstruction<0> { +class HNativeDebugInfo : public HExpression<0> { public: explicit HNativeDebugInfo(uint32_t dex_pc) - : HTemplateInstruction<0>(kNativeDebugInfo, SideEffects::None(), dex_pc) { + : HExpression<0>(kNativeDebugInfo, SideEffects::None(), dex_pc) { } - bool NeedsEnvironment() const OVERRIDE { + bool NeedsEnvironment() const override { return true; } @@ -6010,7 +6237,7 @@ class HNativeDebugInfo : public HTemplateInstruction<0> { /** * Instruction to load a Class object. */ -class HLoadClass FINAL : public HInstruction { +class HLoadClass final : public HInstruction { public: // Determines how to load the Class. enum class LoadKind { @@ -6024,18 +6251,18 @@ class HLoadClass FINAL : public HInstruction { // Used for boot image classes referenced by boot image code. kBootImageLinkTimePcRelative, - // Use a known boot image Class* address, embedded in the code by the codegen. - // Used for boot image classes referenced by apps in AOT- and JIT-compiled code. - kBootImageAddress, - - // Use a PC-relative load from a boot image ClassTable mmapped into the .bss - // of the oat file. - kBootImageClassTable, + // Load from an entry in the .data.bimg.rel.ro using a PC-relative load. + // Used for boot image classes referenced by apps in AOT-compiled code. + kBootImageRelRo, // Load from an entry in the .bss section using a PC-relative load. - // Used for classes outside boot image when .bss is accessible with a PC-relative load. + // Used for classes outside boot image referenced by AOT-compiled app and boot image code. kBssEntry, + // Use a known boot image Class* address, embedded in the code by the codegen. + // Used for boot image classes referenced by apps in JIT-compiled code. + kJitBootImageAddress, + // Load from the root table associated with the JIT compiled method. kJitTableAddress, @@ -6053,12 +6280,14 @@ class HLoadClass FINAL : public HInstruction { bool is_referrers_class, uint32_t dex_pc, bool needs_access_check) - : HInstruction(kLoadClass, SideEffectsForArchRuntimeCalls(), dex_pc), + : HInstruction(kLoadClass, + DataType::Type::kReference, + SideEffectsForArchRuntimeCalls(), + dex_pc), special_input_(HUserRecord<HInstruction*>(current_method)), type_index_(type_index), dex_file_(dex_file), - klass_(klass), - loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) { + klass_(klass) { // Referrers class should not need access check. We never inline unverified // methods so we can't possibly end up in this situation. DCHECK(!is_referrers_class || !needs_access_check); @@ -6068,9 +6297,10 @@ class HLoadClass FINAL : public HInstruction { SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check); SetPackedFlag<kFlagIsInBootImage>(false); SetPackedFlag<kFlagGenerateClInitCheck>(false); + SetPackedFlag<kFlagValidLoadedClassRTI>(false); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } void SetLoadKind(LoadKind load_kind); @@ -6078,15 +6308,21 @@ class HLoadClass FINAL : public HInstruction { return GetPackedField<LoadKindField>(); } - bool CanBeMoved() const OVERRIDE { return true; } + bool HasPcRelativeLoadKind() const { + return GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || + GetLoadKind() == LoadKind::kBootImageRelRo || + GetLoadKind() == LoadKind::kBssEntry; + } - bool InstructionDataEquals(const HInstruction* other) const; + bool CanBeMoved() const override { return true; } - size_t ComputeHashCode() const OVERRIDE { return type_index_.index_; } + bool InstructionDataEquals(const HInstruction* other) const override; - bool CanBeNull() const OVERRIDE { return false; } + size_t ComputeHashCode() const override { return type_index_.index_; } - bool NeedsEnvironment() const OVERRIDE { + bool CanBeNull() const override { return false; } + + bool NeedsEnvironment() const override { return CanCallRuntime(); } @@ -6104,31 +6340,34 @@ class HLoadClass FINAL : public HInstruction { GetLoadKind() == LoadKind::kBssEntry; } - bool CanThrow() const OVERRIDE { + bool CanThrow() const override { return NeedsAccessCheck() || MustGenerateClinitCheck() || // If the class is in the boot image, the lookup in the runtime call cannot throw. - // This keeps CanThrow() consistent between non-PIC (using kBootImageAddress) and - // PIC and subsequently avoids a DCE behavior dependency on the PIC option. ((GetLoadKind() == LoadKind::kRuntimeCall || GetLoadKind() == LoadKind::kBssEntry) && !IsInBootImage()); } ReferenceTypeInfo GetLoadedClassRTI() { - return loaded_class_rti_; + if (GetPackedFlag<kFlagValidLoadedClassRTI>()) { + // Note: The is_exact flag from the return value should not be used. + return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact= */ true); + } else { + return ReferenceTypeInfo::CreateInvalid(); + } } - void SetLoadedClassRTI(ReferenceTypeInfo rti) { - // Make sure we only set exact types (the loaded class should never be merged). - DCHECK(rti.IsExact()); - loaded_class_rti_ = rti; + // Loaded class RTI is marked as valid by RTP if the klass_ is admissible. + void SetValidLoadedClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(klass_ != nullptr); + SetPackedFlag<kFlagValidLoadedClassRTI>(true); } dex::TypeIndex GetTypeIndex() const { return type_index_; } const DexFile& GetDexFile() const { return dex_file_; } - bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { + bool NeedsDexCacheOfDeclaringClass() const override { return GetLoadKind() == LoadKind::kRuntimeCall; } @@ -6141,6 +6380,13 @@ class HLoadClass FINAL : public HInstruction { bool IsInBootImage() const { return GetPackedFlag<kFlagIsInBootImage>(); } bool MustGenerateClinitCheck() const { return GetPackedFlag<kFlagGenerateClInitCheck>(); } + bool MustResolveTypeOnSlowPath() const { + // Check that this instruction has a slow path. + DCHECK(GetLoadKind() != LoadKind::kRuntimeCall); // kRuntimeCall calls on main path. + DCHECK(GetLoadKind() == LoadKind::kBssEntry || MustGenerateClinitCheck()); + return GetLoadKind() == LoadKind::kBssEntry; + } + void MarkInBootImage() { SetPackedFlag<kFlagIsInBootImage>(true); } @@ -6148,15 +6394,11 @@ class HLoadClass FINAL : public HInstruction { void AddSpecialInput(HInstruction* special_input); using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL { + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final { return ArrayRef<HUserRecord<HInstruction*>>( &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u); } - DataType::Type GetType() const OVERRIDE { - return DataType::Type::kReference; - } - Handle<mirror::Class> GetClass() const { return klass_; } @@ -6175,14 +6417,14 @@ class HLoadClass FINAL : public HInstruction { static constexpr size_t kFieldLoadKind = kFlagGenerateClInitCheck + 1; static constexpr size_t kFieldLoadKindSize = MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast)); - static constexpr size_t kNumberOfLoadClassPackedBits = kFieldLoadKind + kFieldLoadKindSize; + static constexpr size_t kFlagValidLoadedClassRTI = kFieldLoadKind + kFieldLoadKindSize; + static constexpr size_t kNumberOfLoadClassPackedBits = kFlagValidLoadedClassRTI + 1; static_assert(kNumberOfLoadClassPackedBits < kMaxNumberOfPackedBits, "Too many packed fields."); using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>; static bool HasTypeReference(LoadKind load_kind) { return load_kind == LoadKind::kReferrersClass || load_kind == LoadKind::kBootImageLinkTimePcRelative || - load_kind == LoadKind::kBootImageClassTable || load_kind == LoadKind::kBssEntry || load_kind == LoadKind::kRuntimeCall; } @@ -6203,8 +6445,6 @@ class HLoadClass FINAL : public HInstruction { const DexFile& dex_file_; Handle<mirror::Class> klass_; - - ReferenceTypeInfo loaded_class_rti_; }; std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs); @@ -6227,15 +6467,15 @@ inline void HLoadClass::AddSpecialInput(HInstruction* special_input) { // The special input is used for PC-relative loads on some architectures, // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || - GetLoadKind() == LoadKind::kBootImageAddress || - GetLoadKind() == LoadKind::kBootImageClassTable || - GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind(); + GetLoadKind() == LoadKind::kBootImageRelRo || + GetLoadKind() == LoadKind::kBssEntry || + GetLoadKind() == LoadKind::kJitBootImageAddress) << GetLoadKind(); DCHECK(special_input_.GetInstruction() == nullptr); special_input_ = HUserRecord<HInstruction*>(special_input); special_input->AddUseAt(this, 0); } -class HLoadString FINAL : public HInstruction { +class HLoadString final : public HInstruction { public: // Determines how to load the String. enum class LoadKind { @@ -6243,18 +6483,18 @@ class HLoadString FINAL : public HInstruction { // Used for boot image strings referenced by boot image code. kBootImageLinkTimePcRelative, - // Use a known boot image String* address, embedded in the code by the codegen. - // Used for boot image strings referenced by apps in AOT- and JIT-compiled code. - kBootImageAddress, - - // Use a PC-relative load from a boot image InternTable mmapped into the .bss - // of the oat file. - kBootImageInternTable, + // Load from an entry in the .data.bimg.rel.ro using a PC-relative load. + // Used for boot image strings referenced by apps in AOT-compiled code. + kBootImageRelRo, // Load from an entry in the .bss section using a PC-relative load. - // Used for strings outside boot image when .bss is accessible with a PC-relative load. + // Used for strings outside boot image referenced by AOT-compiled app and boot image code. kBssEntry, + // Use a known boot image String* address, embedded in the code by the codegen. + // Used for boot image strings referenced by apps in JIT-compiled code. + kJitBootImageAddress, + // Load from the root table associated with the JIT compiled method. kJitTableAddress, @@ -6269,14 +6509,17 @@ class HLoadString FINAL : public HInstruction { dex::StringIndex string_index, const DexFile& dex_file, uint32_t dex_pc) - : HInstruction(kLoadString, SideEffectsForArchRuntimeCalls(), dex_pc), + : HInstruction(kLoadString, + DataType::Type::kReference, + SideEffectsForArchRuntimeCalls(), + dex_pc), special_input_(HUserRecord<HInstruction*>(current_method)), string_index_(string_index), dex_file_(dex_file) { SetPackedField<LoadKindField>(LoadKind::kRuntimeCall); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } void SetLoadKind(LoadKind load_kind); @@ -6284,6 +6527,12 @@ class HLoadString FINAL : public HInstruction { return GetPackedField<LoadKindField>(); } + bool HasPcRelativeLoadKind() const { + return GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || + GetLoadKind() == LoadKind::kBootImageRelRo || + GetLoadKind() == LoadKind::kBssEntry; + } + const DexFile& GetDexFile() const { return dex_file_; } @@ -6300,31 +6549,31 @@ class HLoadString FINAL : public HInstruction { string_ = str; } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE; + bool InstructionDataEquals(const HInstruction* other) const override; - size_t ComputeHashCode() const OVERRIDE { return string_index_.index_; } + size_t ComputeHashCode() const override { return string_index_.index_; } // Will call the runtime if we need to load the string through // the dex cache and the string is not guaranteed to be there yet. - bool NeedsEnvironment() const OVERRIDE { + bool NeedsEnvironment() const override { LoadKind load_kind = GetLoadKind(); if (load_kind == LoadKind::kBootImageLinkTimePcRelative || - load_kind == LoadKind::kBootImageAddress || - load_kind == LoadKind::kBootImageInternTable || + load_kind == LoadKind::kBootImageRelRo || + load_kind == LoadKind::kJitBootImageAddress || load_kind == LoadKind::kJitTableAddress) { return false; } return true; } - bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { + bool NeedsDexCacheOfDeclaringClass() const override { return GetLoadKind() == LoadKind::kRuntimeCall; } - bool CanBeNull() const OVERRIDE { return false; } - bool CanThrow() const OVERRIDE { return NeedsEnvironment(); } + bool CanBeNull() const override { return false; } + bool CanThrow() const override { return NeedsEnvironment(); } static SideEffects SideEffectsForArchRuntimeCalls() { return SideEffects::CanTriggerGC(); @@ -6333,15 +6582,11 @@ class HLoadString FINAL : public HInstruction { void AddSpecialInput(HInstruction* special_input); using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL { + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final { return ArrayRef<HUserRecord<HInstruction*>>( &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u); } - DataType::Type GetType() const OVERRIDE { - return DataType::Type::kReference; - } - DECLARE_INSTRUCTION(LoadString); protected: @@ -6389,9 +6634,9 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) { // The special input is used for PC-relative loads on some architectures, // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || - GetLoadKind() == LoadKind::kBootImageAddress || - GetLoadKind() == LoadKind::kBootImageInternTable || - GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind(); + GetLoadKind() == LoadKind::kBootImageRelRo || + GetLoadKind() == LoadKind::kBssEntry || + GetLoadKind() == LoadKind::kJitBootImageAddress) << GetLoadKind(); // HLoadString::GetInputRecords() returns an empty array at this point, // so use the GetInputRecords() from the base class to set the input record. DCHECK(special_input_.GetInstruction() == nullptr); @@ -6399,10 +6644,98 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) { special_input->AddUseAt(this, 0); } +class HLoadMethodHandle final : public HInstruction { + public: + HLoadMethodHandle(HCurrentMethod* current_method, + uint16_t method_handle_idx, + const DexFile& dex_file, + uint32_t dex_pc) + : HInstruction(kLoadMethodHandle, + DataType::Type::kReference, + SideEffectsForArchRuntimeCalls(), + dex_pc), + special_input_(HUserRecord<HInstruction*>(current_method)), + method_handle_idx_(method_handle_idx), + dex_file_(dex_file) { + } + + using HInstruction::GetInputRecords; // Keep the const version visible. + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final { + return ArrayRef<HUserRecord<HInstruction*>>( + &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u); + } + + bool IsClonable() const override { return true; } + + uint16_t GetMethodHandleIndex() const { return method_handle_idx_; } + + const DexFile& GetDexFile() const { return dex_file_; } + + static SideEffects SideEffectsForArchRuntimeCalls() { + return SideEffects::CanTriggerGC(); + } + + DECLARE_INSTRUCTION(LoadMethodHandle); + + protected: + DEFAULT_COPY_CONSTRUCTOR(LoadMethodHandle); + + private: + // The special input is the HCurrentMethod for kRuntimeCall. + HUserRecord<HInstruction*> special_input_; + + const uint16_t method_handle_idx_; + const DexFile& dex_file_; +}; + +class HLoadMethodType final : public HInstruction { + public: + HLoadMethodType(HCurrentMethod* current_method, + dex::ProtoIndex proto_index, + const DexFile& dex_file, + uint32_t dex_pc) + : HInstruction(kLoadMethodType, + DataType::Type::kReference, + SideEffectsForArchRuntimeCalls(), + dex_pc), + special_input_(HUserRecord<HInstruction*>(current_method)), + proto_index_(proto_index), + dex_file_(dex_file) { + } + + using HInstruction::GetInputRecords; // Keep the const version visible. + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final { + return ArrayRef<HUserRecord<HInstruction*>>( + &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u); + } + + bool IsClonable() const override { return true; } + + dex::ProtoIndex GetProtoIndex() const { return proto_index_; } + + const DexFile& GetDexFile() const { return dex_file_; } + + static SideEffects SideEffectsForArchRuntimeCalls() { + return SideEffects::CanTriggerGC(); + } + + DECLARE_INSTRUCTION(LoadMethodType); + + protected: + DEFAULT_COPY_CONSTRUCTOR(LoadMethodType); + + private: + // The special input is the HCurrentMethod for kRuntimeCall. + HUserRecord<HInstruction*> special_input_; + + const dex::ProtoIndex proto_index_; + const DexFile& dex_file_; +}; + /** * Performs an initialization check on its Class object input. */ -class HClinitCheck FINAL : public HExpression<1> { +class HClinitCheck final : public HExpression<1> { public: HClinitCheck(HLoadClass* constant, uint32_t dex_pc) : HExpression( @@ -6412,19 +6745,18 @@ class HClinitCheck FINAL : public HExpression<1> { dex_pc) { SetRawInputAt(0, constant); } - - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + // TODO: Make ClinitCheck clonable. + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool NeedsEnvironment() const OVERRIDE { + bool NeedsEnvironment() const override { // May call runtime to initialize the class. return true; } - bool CanThrow() const OVERRIDE { return true; } + bool CanThrow() const override { return true; } HLoadClass* GetLoadClass() const { DCHECK(InputAt(0)->IsLoadClass()); @@ -6438,7 +6770,7 @@ class HClinitCheck FINAL : public HExpression<1> { DEFAULT_COPY_CONSTRUCTOR(ClinitCheck); }; -class HStaticFieldGet FINAL : public HExpression<1> { +class HStaticFieldGet final : public HExpression<1> { public: HStaticFieldGet(HInstruction* cls, ArtField* field, @@ -6464,15 +6796,15 @@ class HStaticFieldGet FINAL : public HExpression<1> { } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return !IsVolatile(); } + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return !IsVolatile(); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { const HStaticFieldGet* other_get = other->AsStaticFieldGet(); return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue(); } - size_t ComputeHashCode() const OVERRIDE { + size_t ComputeHashCode() const override { return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue(); } @@ -6497,7 +6829,7 @@ class HStaticFieldGet FINAL : public HExpression<1> { const FieldInfo field_info_; }; -class HStaticFieldSet FINAL : public HTemplateInstruction<2> { +class HStaticFieldSet final : public HExpression<2> { public: HStaticFieldSet(HInstruction* cls, HInstruction* value, @@ -6509,9 +6841,9 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> { uint16_t declaring_class_def_index, const DexFile& dex_file, uint32_t dex_pc) - : HTemplateInstruction(kStaticFieldSet, - SideEffects::FieldWriteOfType(field_type, is_volatile), - dex_pc), + : HExpression(kStaticFieldSet, + SideEffects::FieldWriteOfType(field_type, is_volatile), + dex_pc), field_info_(field, field_offset, field_type, @@ -6524,7 +6856,7 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> { SetRawInputAt(1, value); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } DataType::Type GetFieldType() const { return field_info_.GetFieldType(); } @@ -6548,7 +6880,7 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> { const FieldInfo field_info_; }; -class HUnresolvedInstanceFieldGet FINAL : public HExpression<1> { +class HUnresolvedInstanceFieldGet final : public HExpression<1> { public: HUnresolvedInstanceFieldGet(HInstruction* obj, DataType::Type field_type, @@ -6562,9 +6894,9 @@ class HUnresolvedInstanceFieldGet FINAL : public HExpression<1> { SetRawInputAt(0, obj); } - bool IsClonable() const OVERRIDE { return true; } - bool NeedsEnvironment() const OVERRIDE { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } + bool NeedsEnvironment() const override { return true; } + bool CanThrow() const override { return true; } DataType::Type GetFieldType() const { return GetType(); } uint32_t GetFieldIndex() const { return field_index_; } @@ -6578,16 +6910,14 @@ class HUnresolvedInstanceFieldGet FINAL : public HExpression<1> { const uint32_t field_index_; }; -class HUnresolvedInstanceFieldSet FINAL : public HTemplateInstruction<2> { +class HUnresolvedInstanceFieldSet final : public HExpression<2> { public: HUnresolvedInstanceFieldSet(HInstruction* obj, HInstruction* value, DataType::Type field_type, uint32_t field_index, uint32_t dex_pc) - : HTemplateInstruction(kUnresolvedInstanceFieldSet, - SideEffects::AllExceptGCDependency(), - dex_pc), + : HExpression(kUnresolvedInstanceFieldSet, SideEffects::AllExceptGCDependency(), dex_pc), field_index_(field_index) { SetPackedField<FieldTypeField>(field_type); DCHECK_EQ(DataType::Kind(field_type), DataType::Kind(value->GetType())); @@ -6595,9 +6925,9 @@ class HUnresolvedInstanceFieldSet FINAL : public HTemplateInstruction<2> { SetRawInputAt(1, value); } - bool IsClonable() const OVERRIDE { return true; } - bool NeedsEnvironment() const OVERRIDE { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } + bool NeedsEnvironment() const override { return true; } + bool CanThrow() const override { return true; } DataType::Type GetFieldType() const { return GetPackedField<FieldTypeField>(); } uint32_t GetFieldIndex() const { return field_index_; } @@ -6620,7 +6950,7 @@ class HUnresolvedInstanceFieldSet FINAL : public HTemplateInstruction<2> { const uint32_t field_index_; }; -class HUnresolvedStaticFieldGet FINAL : public HExpression<0> { +class HUnresolvedStaticFieldGet final : public HExpression<0> { public: HUnresolvedStaticFieldGet(DataType::Type field_type, uint32_t field_index, @@ -6632,9 +6962,9 @@ class HUnresolvedStaticFieldGet FINAL : public HExpression<0> { field_index_(field_index) { } - bool IsClonable() const OVERRIDE { return true; } - bool NeedsEnvironment() const OVERRIDE { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } + bool NeedsEnvironment() const override { return true; } + bool CanThrow() const override { return true; } DataType::Type GetFieldType() const { return GetType(); } uint32_t GetFieldIndex() const { return field_index_; } @@ -6648,24 +6978,22 @@ class HUnresolvedStaticFieldGet FINAL : public HExpression<0> { const uint32_t field_index_; }; -class HUnresolvedStaticFieldSet FINAL : public HTemplateInstruction<1> { +class HUnresolvedStaticFieldSet final : public HExpression<1> { public: HUnresolvedStaticFieldSet(HInstruction* value, DataType::Type field_type, uint32_t field_index, uint32_t dex_pc) - : HTemplateInstruction(kUnresolvedStaticFieldSet, - SideEffects::AllExceptGCDependency(), - dex_pc), + : HExpression(kUnresolvedStaticFieldSet, SideEffects::AllExceptGCDependency(), dex_pc), field_index_(field_index) { SetPackedField<FieldTypeField>(field_type); DCHECK_EQ(DataType::Kind(field_type), DataType::Kind(value->GetType())); SetRawInputAt(0, value); } - bool IsClonable() const OVERRIDE { return true; } - bool NeedsEnvironment() const OVERRIDE { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } + bool NeedsEnvironment() const override { return true; } + bool CanThrow() const override { return true; } DataType::Type GetFieldType() const { return GetPackedField<FieldTypeField>(); } uint32_t GetFieldIndex() const { return field_index_; } @@ -6689,13 +7017,13 @@ class HUnresolvedStaticFieldSet FINAL : public HTemplateInstruction<1> { }; // Implement the move-exception DEX instruction. -class HLoadException FINAL : public HExpression<0> { +class HLoadException final : public HExpression<0> { public: explicit HLoadException(uint32_t dex_pc = kNoDexPc) : HExpression(kLoadException, DataType::Type::kReference, SideEffects::None(), dex_pc) { } - bool CanBeNull() const OVERRIDE { return false; } + bool CanBeNull() const override { return false; } DECLARE_INSTRUCTION(LoadException); @@ -6705,10 +7033,10 @@ class HLoadException FINAL : public HExpression<0> { // Implicit part of move-exception which clears thread-local exception storage. // Must not be removed because the runtime expects the TLS to get cleared. -class HClearException FINAL : public HTemplateInstruction<0> { +class HClearException final : public HExpression<0> { public: explicit HClearException(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kClearException, SideEffects::AllWrites(), dex_pc) { + : HExpression(kClearException, SideEffects::AllWrites(), dex_pc) { } DECLARE_INSTRUCTION(ClearException); @@ -6717,20 +7045,20 @@ class HClearException FINAL : public HTemplateInstruction<0> { DEFAULT_COPY_CONSTRUCTOR(ClearException); }; -class HThrow FINAL : public HTemplateInstruction<1> { +class HThrow final : public HExpression<1> { public: HThrow(HInstruction* exception, uint32_t dex_pc) - : HTemplateInstruction(kThrow, SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kThrow, SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, exception); } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsControlFlow() const override { return true; } - bool NeedsEnvironment() const OVERRIDE { return true; } + bool NeedsEnvironment() const override { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool CanThrow() const override { return true; } - bool AlwaysThrows() const OVERRIDE { return true; } + bool AlwaysThrows() const override { return true; } DECLARE_INSTRUCTION(Throw); @@ -6750,75 +7078,165 @@ enum class TypeCheckKind { kInterfaceCheck, // No optimization yet when checking against an interface. kArrayObjectCheck, // Can just check if the array is not primitive. kArrayCheck, // No optimization yet when checking against a generic array. + kBitstringCheck, // Compare the type check bitstring. kLast = kArrayCheck }; std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs); -class HInstanceOf FINAL : public HExpression<2> { +// Note: HTypeCheckInstruction is just a helper class, not an abstract instruction with an +// `IsTypeCheckInstruction()`. (New virtual methods in the HInstruction class have a high cost.) +class HTypeCheckInstruction : public HVariableInputSizeInstruction { public: - HInstanceOf(HInstruction* object, - HLoadClass* target_class, - TypeCheckKind check_kind, - uint32_t dex_pc) - : HExpression(kInstanceOf, - DataType::Type::kBool, - SideEffectsForArchRuntimeCalls(check_kind), - dex_pc) { + HTypeCheckInstruction(InstructionKind kind, + DataType::Type type, + HInstruction* object, + HInstruction* target_class_or_null, + TypeCheckKind check_kind, + Handle<mirror::Class> klass, + uint32_t dex_pc, + ArenaAllocator* allocator, + HIntConstant* bitstring_path_to_root, + HIntConstant* bitstring_mask, + SideEffects side_effects) + : HVariableInputSizeInstruction( + kind, + type, + side_effects, + dex_pc, + allocator, + /* number_of_inputs= */ check_kind == TypeCheckKind::kBitstringCheck ? 4u : 2u, + kArenaAllocTypeCheckInputs), + klass_(klass) { SetPackedField<TypeCheckKindField>(check_kind); SetPackedFlag<kFlagMustDoNullCheck>(true); + SetPackedFlag<kFlagValidTargetClassRTI>(false); SetRawInputAt(0, object); - SetRawInputAt(1, target_class); + SetRawInputAt(1, target_class_or_null); + DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_path_to_root != nullptr); + DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_mask != nullptr); + if (check_kind == TypeCheckKind::kBitstringCheck) { + DCHECK(target_class_or_null->IsNullConstant()); + SetRawInputAt(2, bitstring_path_to_root); + SetRawInputAt(3, bitstring_mask); + } else { + DCHECK(target_class_or_null->IsLoadClass()); + } } HLoadClass* GetTargetClass() const { + DCHECK_NE(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); HInstruction* load_class = InputAt(1); DCHECK(load_class->IsLoadClass()); return load_class->AsLoadClass(); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } + uint32_t GetBitstringPathToRoot() const { + DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); + HInstruction* path_to_root = InputAt(2); + DCHECK(path_to_root->IsIntConstant()); + return static_cast<uint32_t>(path_to_root->AsIntConstant()->GetValue()); + } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { - return true; + uint32_t GetBitstringMask() const { + DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); + HInstruction* mask = InputAt(3); + DCHECK(mask->IsIntConstant()); + return static_cast<uint32_t>(mask->AsIntConstant()->GetValue()); } - bool NeedsEnvironment() const OVERRIDE { - return CanCallRuntime(GetTypeCheckKind()); + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + + bool InstructionDataEquals(const HInstruction* other) const override { + DCHECK(other->IsInstanceOf() || other->IsCheckCast()) << other->DebugName(); + return GetPackedFields() == down_cast<const HTypeCheckInstruction*>(other)->GetPackedFields(); } - // Used only in code generation. bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); } void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); } TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); } bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; } - static bool CanCallRuntime(TypeCheckKind check_kind) { - // Mips currently does runtime calls for any other checks. - return check_kind != TypeCheckKind::kExactCheck; + ReferenceTypeInfo GetTargetClassRTI() { + if (GetPackedFlag<kFlagValidTargetClassRTI>()) { + // Note: The is_exact flag from the return value should not be used. + return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact= */ true); + } else { + return ReferenceTypeInfo::CreateInvalid(); + } } - static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) { - return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None(); + // Target class RTI is marked as valid by RTP if the klass_ is admissible. + void SetValidTargetClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(klass_ != nullptr); + SetPackedFlag<kFlagValidTargetClassRTI>(true); } - DECLARE_INSTRUCTION(InstanceOf); + Handle<mirror::Class> GetClass() const { + return klass_; + } protected: - DEFAULT_COPY_CONSTRUCTOR(InstanceOf); + DEFAULT_COPY_CONSTRUCTOR(TypeCheckInstruction); private: - static constexpr size_t kFieldTypeCheckKind = kNumberOfExpressionPackedBits; + static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits; static constexpr size_t kFieldTypeCheckKindSize = MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast)); static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize; - static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagMustDoNullCheck + 1; + static constexpr size_t kFlagValidTargetClassRTI = kFlagMustDoNullCheck + 1; + static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagValidTargetClassRTI + 1; static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>; + + Handle<mirror::Class> klass_; }; -class HBoundType FINAL : public HExpression<1> { +class HInstanceOf final : public HTypeCheckInstruction { + public: + HInstanceOf(HInstruction* object, + HInstruction* target_class_or_null, + TypeCheckKind check_kind, + Handle<mirror::Class> klass, + uint32_t dex_pc, + ArenaAllocator* allocator, + HIntConstant* bitstring_path_to_root, + HIntConstant* bitstring_mask) + : HTypeCheckInstruction(kInstanceOf, + DataType::Type::kBool, + object, + target_class_or_null, + check_kind, + klass, + dex_pc, + allocator, + bitstring_path_to_root, + bitstring_mask, + SideEffectsForArchRuntimeCalls(check_kind)) {} + + bool IsClonable() const override { return true; } + + bool NeedsEnvironment() const override { + return CanCallRuntime(GetTypeCheckKind()); + } + + static bool CanCallRuntime(TypeCheckKind check_kind) { + // Mips currently does runtime calls for any other checks. + return check_kind != TypeCheckKind::kExactCheck; + } + + static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) { + return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None(); + } + + DECLARE_INSTRUCTION(InstanceOf); + + protected: + DEFAULT_COPY_CONSTRUCTOR(InstanceOf); +}; + +class HBoundType final : public HExpression<1> { public: explicit HBoundType(HInstruction* input, uint32_t dex_pc = kNoDexPc) : HExpression(kBoundType, DataType::Type::kReference, SideEffects::None(), dex_pc), @@ -6829,7 +7247,8 @@ class HBoundType FINAL : public HExpression<1> { SetRawInputAt(0, input); } - bool IsClonable() const OVERRIDE { return true; } + bool InstructionDataEquals(const HInstruction* other) const override; + bool IsClonable() const override { return true; } // {Get,Set}Upper* should only be used in reference type propagation. const ReferenceTypeInfo& GetUpperBound() const { return upper_bound_; } @@ -6841,7 +7260,7 @@ class HBoundType FINAL : public HExpression<1> { SetPackedFlag<kFlagCanBeNull>(can_be_null); } - bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); } + bool CanBeNull() const override { return GetPackedFlag<kFlagCanBeNull>(); } DECLARE_INSTRUCTION(BoundType); @@ -6851,7 +7270,7 @@ class HBoundType FINAL : public HExpression<1> { private: // Represents the top constraint that can_be_null_ cannot exceed (i.e. if this // is false then CanBeNull() cannot be true). - static constexpr size_t kFlagUpperCanBeNull = kNumberOfExpressionPackedBits; + static constexpr size_t kFlagUpperCanBeNull = kNumberOfGenericPackedBits; static constexpr size_t kFlagCanBeNull = kFlagUpperCanBeNull + 1; static constexpr size_t kNumberOfBoundTypePackedBits = kFlagCanBeNull + 1; static_assert(kNumberOfBoundTypePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); @@ -6865,57 +7284,40 @@ class HBoundType FINAL : public HExpression<1> { ReferenceTypeInfo upper_bound_; }; -class HCheckCast FINAL : public HTemplateInstruction<2> { +class HCheckCast final : public HTypeCheckInstruction { public: HCheckCast(HInstruction* object, - HLoadClass* target_class, + HInstruction* target_class_or_null, TypeCheckKind check_kind, - uint32_t dex_pc) - : HTemplateInstruction(kCheckCast, SideEffects::CanTriggerGC(), dex_pc) { - SetPackedField<TypeCheckKindField>(check_kind); - SetPackedFlag<kFlagMustDoNullCheck>(true); - SetRawInputAt(0, object); - SetRawInputAt(1, target_class); - } - - HLoadClass* GetTargetClass() const { - HInstruction* load_class = InputAt(1); - DCHECK(load_class->IsLoadClass()); - return load_class->AsLoadClass(); - } - - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { - return true; - } - - bool NeedsEnvironment() const OVERRIDE { + Handle<mirror::Class> klass, + uint32_t dex_pc, + ArenaAllocator* allocator, + HIntConstant* bitstring_path_to_root, + HIntConstant* bitstring_mask) + : HTypeCheckInstruction(kCheckCast, + DataType::Type::kVoid, + object, + target_class_or_null, + check_kind, + klass, + dex_pc, + allocator, + bitstring_path_to_root, + bitstring_mask, + SideEffects::CanTriggerGC()) {} + + bool IsClonable() const override { return true; } + bool NeedsEnvironment() const override { // Instruction may throw a CheckCastError. return true; } - bool CanThrow() const OVERRIDE { return true; } - - bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); } - void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); } - TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); } - bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; } + bool CanThrow() const override { return true; } DECLARE_INSTRUCTION(CheckCast); protected: DEFAULT_COPY_CONSTRUCTOR(CheckCast); - - private: - static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits; - static constexpr size_t kFieldTypeCheckKindSize = - MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast)); - static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize; - static constexpr size_t kNumberOfCheckCastPackedBits = kFlagMustDoNullCheck + 1; - static_assert(kNumberOfCheckCastPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>; }; /** @@ -6944,17 +7346,16 @@ enum MemBarrierKind { }; std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind); -class HMemoryBarrier FINAL : public HTemplateInstruction<0> { +class HMemoryBarrier final : public HExpression<0> { public: explicit HMemoryBarrier(MemBarrierKind barrier_kind, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction( - kMemoryBarrier, - SideEffects::AllWritesAndReads(), // Assume write/read on all fields/arrays. - dex_pc) { + : HExpression(kMemoryBarrier, + SideEffects::AllWritesAndReads(), // Assume write/read on all fields/arrays. + dex_pc) { SetPackedField<BarrierKindField>(barrier_kind); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } MemBarrierKind GetBarrierKind() { return GetPackedField<BarrierKindField>(); } @@ -7027,10 +7428,10 @@ class HMemoryBarrier FINAL : public HTemplateInstruction<0> { // } // // See also: -// * CompilerDriver::RequiresConstructorBarrier +// * DexCompilationUnit::RequiresConstructorBarrier // * QuasiAtomic::ThreadFenceForConstructor // -class HConstructorFence FINAL : public HVariableInputSizeInstruction { +class HConstructorFence final : public HVariableInputSizeInstruction { // A fence has variable inputs because the inputs can be removed // after prepare_for_register_allocation phase. // (TODO: In the future a fence could freeze multiple objects @@ -7073,7 +7474,7 @@ class HConstructorFence FINAL : public HVariableInputSizeInstruction { SideEffects::AllReads(), dex_pc, allocator, - /* number_of_inputs */ 1, + /* number_of_inputs= */ 1, kArenaAllocConstructorFenceInputs) { DCHECK(fence_object != nullptr); SetRawInputAt(0, fence_object); @@ -7127,7 +7528,7 @@ class HConstructorFence FINAL : public HVariableInputSizeInstruction { DEFAULT_COPY_CONSTRUCTOR(ConstructorFence); }; -class HMonitorOperation FINAL : public HTemplateInstruction<1> { +class HMonitorOperation final : public HExpression<1> { public: enum class OperationKind { kEnter, @@ -7136,18 +7537,17 @@ class HMonitorOperation FINAL : public HTemplateInstruction<1> { }; HMonitorOperation(HInstruction* object, OperationKind kind, uint32_t dex_pc) - : HTemplateInstruction( - kMonitorOperation, - SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays. - dex_pc) { + : HExpression(kMonitorOperation, + SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays. + dex_pc) { SetPackedField<OperationKindField>(kind); SetRawInputAt(0, object); } // Instruction may go into runtime, so we need an environment. - bool NeedsEnvironment() const OVERRIDE { return true; } + bool NeedsEnvironment() const override { return true; } - bool CanThrow() const OVERRIDE { + bool CanThrow() const override { // Verifier guarantees that monitor-exit cannot throw. // This is important because it allows the HGraphBuilder to remove // a dead throw-catch loop generated for `synchronized` blocks/methods. @@ -7173,7 +7573,7 @@ class HMonitorOperation FINAL : public HTemplateInstruction<1> { using OperationKindField = BitField<OperationKind, kFieldOperationKind, kFieldOperationKindSize>; }; -class HSelect FINAL : public HExpression<3> { +class HSelect final : public HExpression<3> { public: HSelect(HInstruction* condition, HInstruction* true_value, @@ -7191,17 +7591,17 @@ class HSelect FINAL : public HExpression<3> { SetRawInputAt(2, condition); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } HInstruction* GetFalseValue() const { return InputAt(0); } HInstruction* GetTrueValue() const { return InputAt(1); } HInstruction* GetCondition() const { return InputAt(2); } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool CanBeNull() const OVERRIDE { + bool CanBeNull() const override { return GetTrueValue()->CanBeNull() || GetFalseValue()->CanBeNull(); } @@ -7289,10 +7689,10 @@ std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs); static constexpr size_t kDefaultNumberOfMoves = 4; -class HParallelMove FINAL : public HTemplateInstruction<0> { +class HParallelMove final : public HExpression<0> { public: explicit HParallelMove(ArenaAllocator* allocator, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kParallelMove, SideEffects::None(), dex_pc), + : HExpression(kParallelMove, SideEffects::None(), dex_pc), moves_(allocator->Adapter(kArenaAllocMoveOperands)) { moves_.reserve(kDefaultNumberOfMoves); } @@ -7351,7 +7751,7 @@ class HParallelMove FINAL : public HTemplateInstruction<0> { // never used across anything that can trigger GC. // The result of this instruction is not a pointer in the sense of `DataType::Type::kreference`. // So we represent it by the type `DataType::Type::kInt`. -class HIntermediateAddress FINAL : public HExpression<2> { +class HIntermediateAddress final : public HExpression<2> { public: HIntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc) : HExpression(kIntermediateAddress, @@ -7365,12 +7765,12 @@ class HIntermediateAddress FINAL : public HExpression<2> { SetRawInputAt(1, offset); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool IsActualObject() const OVERRIDE { return false; } + bool IsActualObject() const override { return false; } HInstruction* GetBaseAddress() const { return InputAt(0); } HInstruction* GetOffset() const { return InputAt(1); } @@ -7392,7 +7792,7 @@ class HIntermediateAddress FINAL : public HExpression<2> { #ifdef ART_ENABLE_CODEGEN_mips #include "nodes_mips.h" #endif -#ifdef ART_ENABLE_CODEGEN_x86 +#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) #include "nodes_x86.h" #endif @@ -7443,7 +7843,7 @@ class HGraphDelegateVisitor : public HGraphVisitor { // Visit functions that delegate to to super class. #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE { Visit##super(instr); } + void Visit##name(H##name* instr) override { Visit##super(instr); } FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) @@ -7465,7 +7865,7 @@ class CloneAndReplaceInstructionVisitor : public HGraphDelegateVisitor { explicit CloneAndReplaceInstructionVisitor(HGraph* graph) : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count_(0) {} - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { if (instruction->IsClonable()) { ReplaceInstrOrPhiByClone(instruction); instr_replaced_by_clones_count_++; @@ -7584,8 +7984,30 @@ inline bool IsZeroBitPattern(HInstruction* instruction) { return instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern(); } +// Implement HInstruction::Is##type() for concrete instructions. #define INSTRUCTION_TYPE_CHECK(type, super) \ - inline bool HInstruction::Is##type() const { return GetKind() == k##type; } \ + inline bool HInstruction::Is##type() const { return GetKind() == k##type; } + FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK) +#undef INSTRUCTION_TYPE_CHECK + +// Implement HInstruction::Is##type() for abstract instructions. +#define INSTRUCTION_TYPE_CHECK_RESULT(type, super) \ + std::is_base_of<BaseType, H##type>::value, +#define INSTRUCTION_TYPE_CHECK(type, super) \ + inline bool HInstruction::Is##type() const { \ + DCHECK_LT(GetKind(), kLastInstructionKind); \ + using BaseType = H##type; \ + static constexpr bool results[] = { \ + FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK_RESULT) \ + }; \ + return results[static_cast<size_t>(GetKind())]; \ + } + + FOR_EACH_ABSTRACT_INSTRUCTION(INSTRUCTION_TYPE_CHECK) +#undef INSTRUCTION_TYPE_CHECK +#undef INSTRUCTION_TYPE_CHECK_RESULT + +#define INSTRUCTION_TYPE_CAST(type, super) \ inline const H##type* HInstruction::As##type() const { \ return Is##type() ? down_cast<const H##type*>(this) : nullptr; \ } \ @@ -7593,8 +8015,9 @@ inline bool IsZeroBitPattern(HInstruction* instruction) { return Is##type() ? static_cast<H##type*>(this) : nullptr; \ } - FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK) -#undef INSTRUCTION_TYPE_CHECK + FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CAST) +#undef INSTRUCTION_TYPE_CAST + // Create space in `blocks` for adding `number_of_new_blocks` entries // starting at location `at`. Blocks after `at` are moved accordingly. diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h index d0e0fef946..4993f5737e 100644 --- a/compiler/optimizing/nodes_mips.h +++ b/compiler/optimizing/nodes_mips.h @@ -30,7 +30,7 @@ class HMipsComputeBaseMethodAddress : public HExpression<0> { kNoDexPc) { } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(MipsComputeBaseMethodAddress); @@ -39,21 +39,21 @@ class HMipsComputeBaseMethodAddress : public HExpression<0> { }; // Mips version of HPackedSwitch that holds a pointer to the base method address. -class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> { +class HMipsPackedSwitch final : public HExpression<2> { public: HMipsPackedSwitch(int32_t start_value, int32_t num_entries, HInstruction* input, HMipsComputeBaseMethodAddress* method_base, uint32_t dex_pc) - : HTemplateInstruction(kMipsPackedSwitch, SideEffects::None(), dex_pc), + : HExpression(kMipsPackedSwitch, SideEffects::None(), dex_pc), start_value_(start_value), num_entries_(num_entries) { SetRawInputAt(0, input); SetRawInputAt(1, method_base); } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsControlFlow() const override { return true; } int32_t GetStartValue() const { return start_value_; } @@ -91,7 +91,7 @@ class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> { // // Note: as the instruction doesn't involve base array address into computations it has no side // effects. -class HIntermediateArrayAddressIndex FINAL : public HExpression<2> { +class HIntermediateArrayAddressIndex final : public HExpression<2> { public: HIntermediateArrayAddressIndex(HInstruction* index, HInstruction* shift, uint32_t dex_pc) : HExpression(kIntermediateArrayAddressIndex, @@ -102,11 +102,11 @@ class HIntermediateArrayAddressIndex FINAL : public HExpression<2> { SetRawInputAt(1, shift); } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool IsActualObject() const OVERRIDE { return false; } + bool IsActualObject() const override { return false; } HInstruction* GetIndex() const { return InputAt(0); } HInstruction* GetShift() const { return InputAt(1); } diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h index 29358e1141..7dcac1787e 100644 --- a/compiler/optimizing/nodes_shared.h +++ b/compiler/optimizing/nodes_shared.h @@ -24,7 +24,7 @@ namespace art { -class HMultiplyAccumulate FINAL : public HExpression<3> { +class HMultiplyAccumulate final : public HExpression<3> { public: HMultiplyAccumulate(DataType::Type type, InstructionKind op, @@ -39,14 +39,14 @@ class HMultiplyAccumulate FINAL : public HExpression<3> { SetRawInputAt(kInputMulRightIndex, mul_right); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } static constexpr int kInputAccumulatorIndex = 0; static constexpr int kInputMulLeftIndex = 1; static constexpr int kInputMulRightIndex = 2; - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other) const override { return op_kind_ == other->AsMultiplyAccumulate()->op_kind_; } @@ -62,7 +62,7 @@ class HMultiplyAccumulate FINAL : public HExpression<3> { const InstructionKind op_kind_; }; -class HBitwiseNegatedRight FINAL : public HBinaryOperation { +class HBitwiseNegatedRight final : public HBinaryOperation { public: HBitwiseNegatedRight(DataType::Type result_type, InstructionKind op, @@ -97,21 +97,21 @@ class HBitwiseNegatedRight FINAL : public HBinaryOperation { } } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -145,7 +145,7 @@ class HBitwiseNegatedRight FINAL : public HBinaryOperation { // // Note: as the instruction doesn't involve base array address into computations it has no side // effects (in comparison of HIntermediateAddress). -class HIntermediateAddressIndex FINAL : public HExpression<3> { +class HIntermediateAddressIndex final : public HExpression<3> { public: HIntermediateAddressIndex( HInstruction* index, HInstruction* offset, HInstruction* shift, uint32_t dex_pc) @@ -158,12 +158,12 @@ class HIntermediateAddressIndex FINAL : public HExpression<3> { SetRawInputAt(2, shift); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool IsActualObject() const OVERRIDE { return false; } + bool IsActualObject() const override { return false; } HInstruction* GetIndex() const { return InputAt(0); } HInstruction* GetOffset() const { return InputAt(1); } @@ -175,7 +175,7 @@ class HIntermediateAddressIndex FINAL : public HExpression<3> { DEFAULT_COPY_CONSTRUCTOR(IntermediateAddressIndex); }; -class HDataProcWithShifterOp FINAL : public HExpression<2> { +class HDataProcWithShifterOp final : public HExpression<2> { public: enum OpKind { kLSL, // Logical shift left. @@ -212,9 +212,9 @@ class HDataProcWithShifterOp FINAL : public HExpression<2> { SetRawInputAt(1, right); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other_instr) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other_instr) const override { const HDataProcWithShifterOp* other = other_instr->AsDataProcWithShifterOp(); return instr_kind_ == other->instr_kind_ && op_kind_ == other->op_kind_ && diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index 20f6cf01ed..efe4d6b000 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -79,13 +79,14 @@ class HVecOperation : public HVariableInputSizeInstruction { size_t vector_length, uint32_t dex_pc) : HVariableInputSizeInstruction(kind, + kSIMDType, side_effects, dex_pc, allocator, number_of_inputs, kArenaAllocVectorNode), vector_length_(vector_length) { - SetPackedField<TypeField>(packed_type); + SetPackedField<PackedTypeField>(packed_type); DCHECK_LT(1u, vector_length); } @@ -99,14 +100,9 @@ class HVecOperation : public HVariableInputSizeInstruction { return vector_length_ * DataType::Size(GetPackedType()); } - // Returns the type of the vector operation. - DataType::Type GetType() const OVERRIDE { - return kSIMDType; - } - // Returns the true component type packed in a vector. DataType::Type GetPackedType() const { - return GetPackedField<TypeField>(); + return GetPackedField<PackedTypeField>(); } // Assumes vector nodes cannot be moved by default. Each concrete implementation @@ -121,12 +117,12 @@ class HVecOperation : public HVariableInputSizeInstruction { // Note: For newly introduced vector instructions HScheduler${ARCH}::IsSchedulingBarrier must be // altered to return true if the instruction might reside outside the SIMD loop body since SIMD // registers are not kept alive across vector loop boundaries (yet). - bool CanBeMoved() const OVERRIDE { return false; } + bool CanBeMoved() const override { return false; } // Tests if all data of a vector node (vector length and packed type) is equal. // Each concrete implementation that adds more fields should test equality of // those fields in its own method *and* call all super methods. - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsVecOperation()); const HVecOperation* o = other->AsVecOperation(); return GetVectorLength() == o->GetVectorLength() && GetPackedType() == o->GetPackedType(); @@ -185,12 +181,12 @@ class HVecOperation : public HVariableInputSizeInstruction { protected: // Additional packed bits. - static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits; - static constexpr size_t kFieldTypeSize = + static constexpr size_t kFieldPackedType = HInstruction::kNumberOfGenericPackedBits; + static constexpr size_t kFieldPackedTypeSize = MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); - static constexpr size_t kNumberOfVectorOpPackedBits = kFieldType + kFieldTypeSize; + static constexpr size_t kNumberOfVectorOpPackedBits = kFieldPackedType + kFieldPackedTypeSize; static_assert(kNumberOfVectorOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using TypeField = BitField<DataType::Type, kFieldType, kFieldTypeSize>; + using PackedTypeField = BitField<DataType::Type, kFieldPackedType, kFieldPackedTypeSize>; DEFAULT_COPY_CONSTRUCTOR(VecOperation); @@ -211,7 +207,7 @@ class HVecUnaryOperation : public HVecOperation { allocator, packed_type, SideEffects::None(), - /* number_of_inputs */ 1, + /* number_of_inputs= */ 1, vector_length, dex_pc) { SetRawInputAt(0, input); @@ -239,7 +235,7 @@ class HVecBinaryOperation : public HVecOperation { allocator, packed_type, SideEffects::None(), - /* number_of_inputs */ 2, + /* number_of_inputs= */ 2, vector_length, dex_pc) { SetRawInputAt(0, left); @@ -284,7 +280,7 @@ class HVecMemoryOperation : public HVecOperation { HInstruction* GetArray() const { return InputAt(0); } HInstruction* GetIndex() const { return InputAt(1); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsVecMemoryOperation()); const HVecMemoryOperation* o = other->AsVecMemoryOperation(); return HVecOperation::InstructionDataEquals(o) && GetAlignment() == o->GetAlignment(); @@ -319,7 +315,7 @@ inline static bool HasConsistentPackedTypes(HInstruction* input, DataType::Type // Replicates the given scalar into a vector, // viz. replicate(x) = [ x, .. , x ]. -class HVecReplicateScalar FINAL : public HVecUnaryOperation { +class HVecReplicateScalar final : public HVecUnaryOperation { public: HVecReplicateScalar(ArenaAllocator* allocator, HInstruction* scalar, @@ -333,7 +329,7 @@ class HVecReplicateScalar FINAL : public HVecUnaryOperation { // A replicate needs to stay in place, since SIMD registers are not // kept alive across vector loop boundaries (yet). - bool CanBeMoved() const OVERRIDE { return false; } + bool CanBeMoved() const override { return false; } DECLARE_INSTRUCTION(VecReplicateScalar); @@ -345,7 +341,7 @@ class HVecReplicateScalar FINAL : public HVecUnaryOperation { // viz. extract[ x1, .. , xn ] = x_i. // // TODO: for now only i == 1 case supported. -class HVecExtractScalar FINAL : public HVecUnaryOperation { +class HVecExtractScalar final : public HVecUnaryOperation { public: HVecExtractScalar(ArenaAllocator* allocator, HInstruction* input, @@ -358,16 +354,14 @@ class HVecExtractScalar FINAL : public HVecUnaryOperation { DCHECK(HasConsistentPackedTypes(input, packed_type)); DCHECK_LT(index, vector_length); DCHECK_EQ(index, 0u); - } - - // Yields a single component in the vector. - DataType::Type GetType() const OVERRIDE { - return GetPackedType(); + // Yields a single component in the vector. + // Overrides the kSIMDType set by the VecOperation constructor. + SetPackedField<TypeField>(packed_type); } // An extract needs to stay in place, since SIMD registers are not // kept alive across vector loop boundaries (yet). - bool CanBeMoved() const OVERRIDE { return false; } + bool CanBeMoved() const override { return false; } DECLARE_INSTRUCTION(VecExtractScalar); @@ -378,7 +372,7 @@ class HVecExtractScalar FINAL : public HVecUnaryOperation { // Reduces the given vector into the first element as sum/min/max, // viz. sum-reduce[ x1, .. , xn ] = [ y, ---- ], where y = sum xi // and the "-" denotes "don't care" (implementation dependent). -class HVecReduce FINAL : public HVecUnaryOperation { +class HVecReduce final : public HVecUnaryOperation { public: enum ReductionKind { kSum = 1, @@ -390,21 +384,21 @@ class HVecReduce FINAL : public HVecUnaryOperation { HInstruction* input, DataType::Type packed_type, size_t vector_length, - ReductionKind kind, + ReductionKind reduction_kind, uint32_t dex_pc) : HVecUnaryOperation(kVecReduce, allocator, input, packed_type, vector_length, dex_pc), - kind_(kind) { + reduction_kind_(reduction_kind) { DCHECK(HasConsistentPackedTypes(input, packed_type)); } - ReductionKind GetKind() const { return kind_; } + ReductionKind GetReductionKind() const { return reduction_kind_; } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsVecReduce()); const HVecReduce* o = other->AsVecReduce(); - return HVecOperation::InstructionDataEquals(o) && GetKind() == o->GetKind(); + return HVecOperation::InstructionDataEquals(o) && GetReductionKind() == o->GetReductionKind(); } DECLARE_INSTRUCTION(VecReduce); @@ -413,12 +407,12 @@ class HVecReduce FINAL : public HVecUnaryOperation { DEFAULT_COPY_CONSTRUCTOR(VecReduce); private: - const ReductionKind kind_; + const ReductionKind reduction_kind_; }; // Converts every component in the vector, // viz. cnv[ x1, .. , xn ] = [ cnv(x1), .. , cnv(xn) ]. -class HVecCnv FINAL : public HVecUnaryOperation { +class HVecCnv final : public HVecUnaryOperation { public: HVecCnv(ArenaAllocator* allocator, HInstruction* input, @@ -433,7 +427,7 @@ class HVecCnv FINAL : public HVecUnaryOperation { DataType::Type GetInputType() const { return InputAt(0)->AsVecOperation()->GetPackedType(); } DataType::Type GetResultType() const { return GetPackedType(); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecCnv); @@ -443,7 +437,7 @@ class HVecCnv FINAL : public HVecUnaryOperation { // Negates every component in the vector, // viz. neg[ x1, .. , xn ] = [ -x1, .. , -xn ]. -class HVecNeg FINAL : public HVecUnaryOperation { +class HVecNeg final : public HVecUnaryOperation { public: HVecNeg(ArenaAllocator* allocator, HInstruction* input, @@ -454,7 +448,7 @@ class HVecNeg FINAL : public HVecUnaryOperation { DCHECK(HasConsistentPackedTypes(input, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecNeg); @@ -465,7 +459,7 @@ class HVecNeg FINAL : public HVecUnaryOperation { // Takes absolute value of every component in the vector, // viz. abs[ x1, .. , xn ] = [ |x1|, .. , |xn| ] // for signed operand x. -class HVecAbs FINAL : public HVecUnaryOperation { +class HVecAbs final : public HVecUnaryOperation { public: HVecAbs(ArenaAllocator* allocator, HInstruction* input, @@ -476,7 +470,7 @@ class HVecAbs FINAL : public HVecUnaryOperation { DCHECK(HasConsistentPackedTypes(input, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecAbs); @@ -487,7 +481,7 @@ class HVecAbs FINAL : public HVecUnaryOperation { // Bitwise- or boolean-nots every component in the vector, // viz. not[ x1, .. , xn ] = [ ~x1, .. , ~xn ], or // not[ x1, .. , xn ] = [ !x1, .. , !xn ] for boolean. -class HVecNot FINAL : public HVecUnaryOperation { +class HVecNot final : public HVecUnaryOperation { public: HVecNot(ArenaAllocator* allocator, HInstruction* input, @@ -498,7 +492,7 @@ class HVecNot FINAL : public HVecUnaryOperation { DCHECK(input->IsVecOperation()); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecNot); @@ -512,7 +506,7 @@ class HVecNot FINAL : public HVecUnaryOperation { // Adds every component in the two vectors, // viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 + y1, .. , xn + yn ]. -class HVecAdd FINAL : public HVecBinaryOperation { +class HVecAdd final : public HVecBinaryOperation { public: HVecAdd(ArenaAllocator* allocator, HInstruction* left, @@ -525,7 +519,7 @@ class HVecAdd FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(right, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecAdd); @@ -533,11 +527,36 @@ class HVecAdd FINAL : public HVecBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(VecAdd); }; +// Adds every component in the two vectors using saturation arithmetic, +// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 +_sat y1, .. , xn +_sat yn ] +// for either both signed or both unsigned operands x, y (reflected in packed_type). +class HVecSaturationAdd final : public HVecBinaryOperation { + public: + HVecSaturationAdd(ArenaAllocator* allocator, + HInstruction* left, + HInstruction* right, + DataType::Type packed_type, + size_t vector_length, + uint32_t dex_pc) + : HVecBinaryOperation( + kVecSaturationAdd, allocator, left, right, packed_type, vector_length, dex_pc) { + DCHECK(HasConsistentPackedTypes(left, packed_type)); + DCHECK(HasConsistentPackedTypes(right, packed_type)); + } + + bool CanBeMoved() const override { return true; } + + DECLARE_INSTRUCTION(VecSaturationAdd); + + protected: + DEFAULT_COPY_CONSTRUCTOR(VecSaturationAdd); +}; + // Performs halving add on every component in the two vectors, viz. // rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ] // truncated [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ] // for either both signed or both unsigned operands x, y (reflected in packed_type). -class HVecHalvingAdd FINAL : public HVecBinaryOperation { +class HVecHalvingAdd final : public HVecBinaryOperation { public: HVecHalvingAdd(ArenaAllocator* allocator, HInstruction* left, @@ -555,9 +574,9 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation { bool IsRounded() const { return GetPackedFlag<kFieldHAddIsRounded>(); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsVecHalvingAdd()); const HVecHalvingAdd* o = other->AsVecHalvingAdd(); return HVecOperation::InstructionDataEquals(o) && IsRounded() == o->IsRounded(); @@ -577,7 +596,7 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation { // Subtracts every component in the two vectors, // viz. [ x1, .. , xn ] - [ y1, .. , yn ] = [ x1 - y1, .. , xn - yn ]. -class HVecSub FINAL : public HVecBinaryOperation { +class HVecSub final : public HVecBinaryOperation { public: HVecSub(ArenaAllocator* allocator, HInstruction* left, @@ -590,7 +609,7 @@ class HVecSub FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(right, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecSub); @@ -598,9 +617,34 @@ class HVecSub FINAL : public HVecBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(VecSub); }; +// Subtracts every component in the two vectors using saturation arithmetic, +// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 -_sat y1, .. , xn -_sat yn ] +// for either both signed or both unsigned operands x, y (reflected in packed_type). +class HVecSaturationSub final : public HVecBinaryOperation { + public: + HVecSaturationSub(ArenaAllocator* allocator, + HInstruction* left, + HInstruction* right, + DataType::Type packed_type, + size_t vector_length, + uint32_t dex_pc) + : HVecBinaryOperation( + kVecSaturationSub, allocator, left, right, packed_type, vector_length, dex_pc) { + DCHECK(HasConsistentPackedTypes(left, packed_type)); + DCHECK(HasConsistentPackedTypes(right, packed_type)); + } + + bool CanBeMoved() const override { return true; } + + DECLARE_INSTRUCTION(VecSaturationSub); + + protected: + DEFAULT_COPY_CONSTRUCTOR(VecSaturationSub); +}; + // Multiplies every component in the two vectors, // viz. [ x1, .. , xn ] * [ y1, .. , yn ] = [ x1 * y1, .. , xn * yn ]. -class HVecMul FINAL : public HVecBinaryOperation { +class HVecMul final : public HVecBinaryOperation { public: HVecMul(ArenaAllocator* allocator, HInstruction* left, @@ -613,7 +657,7 @@ class HVecMul FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(right, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecMul); @@ -623,7 +667,7 @@ class HVecMul FINAL : public HVecBinaryOperation { // Divides every component in the two vectors, // viz. [ x1, .. , xn ] / [ y1, .. , yn ] = [ x1 / y1, .. , xn / yn ]. -class HVecDiv FINAL : public HVecBinaryOperation { +class HVecDiv final : public HVecBinaryOperation { public: HVecDiv(ArenaAllocator* allocator, HInstruction* left, @@ -636,7 +680,7 @@ class HVecDiv FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(right, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecDiv); @@ -647,7 +691,7 @@ class HVecDiv FINAL : public HVecBinaryOperation { // Takes minimum of every component in the two vectors, // viz. MIN( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ min(x1, y1), .. , min(xn, yn) ] // for either both signed or both unsigned operands x, y (reflected in packed_type). -class HVecMin FINAL : public HVecBinaryOperation { +class HVecMin final : public HVecBinaryOperation { public: HVecMin(ArenaAllocator* allocator, HInstruction* left, @@ -660,7 +704,7 @@ class HVecMin FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(right, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecMin); @@ -671,7 +715,7 @@ class HVecMin FINAL : public HVecBinaryOperation { // Takes maximum of every component in the two vectors, // viz. MAX( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ max(x1, y1), .. , max(xn, yn) ] // for either both signed or both unsigned operands x, y (reflected in packed_type). -class HVecMax FINAL : public HVecBinaryOperation { +class HVecMax final : public HVecBinaryOperation { public: HVecMax(ArenaAllocator* allocator, HInstruction* left, @@ -684,7 +728,7 @@ class HVecMax FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(right, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecMax); @@ -694,7 +738,7 @@ class HVecMax FINAL : public HVecBinaryOperation { // Bitwise-ands every component in the two vectors, // viz. [ x1, .. , xn ] & [ y1, .. , yn ] = [ x1 & y1, .. , xn & yn ]. -class HVecAnd FINAL : public HVecBinaryOperation { +class HVecAnd final : public HVecBinaryOperation { public: HVecAnd(ArenaAllocator* allocator, HInstruction* left, @@ -706,7 +750,7 @@ class HVecAnd FINAL : public HVecBinaryOperation { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecAnd); @@ -716,7 +760,7 @@ class HVecAnd FINAL : public HVecBinaryOperation { // Bitwise-and-nots every component in the two vectors, // viz. [ x1, .. , xn ] and-not [ y1, .. , yn ] = [ ~x1 & y1, .. , ~xn & yn ]. -class HVecAndNot FINAL : public HVecBinaryOperation { +class HVecAndNot final : public HVecBinaryOperation { public: HVecAndNot(ArenaAllocator* allocator, HInstruction* left, @@ -729,7 +773,7 @@ class HVecAndNot FINAL : public HVecBinaryOperation { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecAndNot); @@ -739,7 +783,7 @@ class HVecAndNot FINAL : public HVecBinaryOperation { // Bitwise-ors every component in the two vectors, // viz. [ x1, .. , xn ] | [ y1, .. , yn ] = [ x1 | y1, .. , xn | yn ]. -class HVecOr FINAL : public HVecBinaryOperation { +class HVecOr final : public HVecBinaryOperation { public: HVecOr(ArenaAllocator* allocator, HInstruction* left, @@ -751,7 +795,7 @@ class HVecOr FINAL : public HVecBinaryOperation { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecOr); @@ -761,7 +805,7 @@ class HVecOr FINAL : public HVecBinaryOperation { // Bitwise-xors every component in the two vectors, // viz. [ x1, .. , xn ] ^ [ y1, .. , yn ] = [ x1 ^ y1, .. , xn ^ yn ]. -class HVecXor FINAL : public HVecBinaryOperation { +class HVecXor final : public HVecBinaryOperation { public: HVecXor(ArenaAllocator* allocator, HInstruction* left, @@ -773,7 +817,7 @@ class HVecXor FINAL : public HVecBinaryOperation { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecXor); @@ -783,7 +827,7 @@ class HVecXor FINAL : public HVecBinaryOperation { // Logically shifts every component in the vector left by the given distance, // viz. [ x1, .. , xn ] << d = [ x1 << d, .. , xn << d ]. -class HVecShl FINAL : public HVecBinaryOperation { +class HVecShl final : public HVecBinaryOperation { public: HVecShl(ArenaAllocator* allocator, HInstruction* left, @@ -795,7 +839,7 @@ class HVecShl FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(left, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecShl); @@ -805,7 +849,7 @@ class HVecShl FINAL : public HVecBinaryOperation { // Arithmetically shifts every component in the vector right by the given distance, // viz. [ x1, .. , xn ] >> d = [ x1 >> d, .. , xn >> d ]. -class HVecShr FINAL : public HVecBinaryOperation { +class HVecShr final : public HVecBinaryOperation { public: HVecShr(ArenaAllocator* allocator, HInstruction* left, @@ -817,7 +861,7 @@ class HVecShr FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(left, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecShr); @@ -827,7 +871,7 @@ class HVecShr FINAL : public HVecBinaryOperation { // Logically shifts every component in the vector right by the given distance, // viz. [ x1, .. , xn ] >>> d = [ x1 >>> d, .. , xn >>> d ]. -class HVecUShr FINAL : public HVecBinaryOperation { +class HVecUShr final : public HVecBinaryOperation { public: HVecUShr(ArenaAllocator* allocator, HInstruction* left, @@ -839,7 +883,7 @@ class HVecUShr FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(left, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecUShr); @@ -854,7 +898,7 @@ class HVecUShr FINAL : public HVecBinaryOperation { // Assigns the given scalar elements to a vector, // viz. set( array(x1, .. , xn) ) = [ x1, .. , xn ] if n == m, // set( array(x1, .. , xm) ) = [ x1, .. , xm, 0, .. , 0 ] if m < n. -class HVecSetScalars FINAL : public HVecOperation { +class HVecSetScalars final : public HVecOperation { public: HVecSetScalars(ArenaAllocator* allocator, HInstruction* scalars[], @@ -877,7 +921,7 @@ class HVecSetScalars FINAL : public HVecOperation { // Setting scalars needs to stay in place, since SIMD registers are not // kept alive across vector loop boundaries (yet). - bool CanBeMoved() const OVERRIDE { return false; } + bool CanBeMoved() const override { return false; } DECLARE_INSTRUCTION(VecSetScalars); @@ -887,7 +931,10 @@ class HVecSetScalars FINAL : public HVecOperation { // Multiplies every component in the two vectors, adds the result vector to the accumulator vector, // viz. [ a1, .. , an ] + [ x1, .. , xn ] * [ y1, .. , yn ] = [ a1 + x1 * y1, .. , an + xn * yn ]. -class HVecMultiplyAccumulate FINAL : public HVecOperation { +// For floating point types, Java rounding behavior must be preserved; the products are rounded to +// the proper precision before being added. "Fused" multiply-add operations available on several +// architectures are not usable since they would violate Java language rules. +class HVecMultiplyAccumulate final : public HVecOperation { public: HVecMultiplyAccumulate(ArenaAllocator* allocator, InstructionKind op, @@ -901,7 +948,7 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { allocator, packed_type, SideEffects::None(), - /* number_of_inputs */ 3, + /* number_of_inputs= */ 3, vector_length, dex_pc), op_kind_(op) { @@ -909,14 +956,17 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { DCHECK(HasConsistentPackedTypes(accumulator, packed_type)); DCHECK(HasConsistentPackedTypes(mul_left, packed_type)); DCHECK(HasConsistentPackedTypes(mul_right, packed_type)); + // Remove the following if we add an architecture that supports floating point multiply-add + // with Java-compatible rounding. + DCHECK(DataType::IsIntegralType(packed_type)); SetRawInputAt(0, accumulator); SetRawInputAt(1, mul_left); SetRawInputAt(2, mul_right); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsVecMultiplyAccumulate()); const HVecMultiplyAccumulate* o = other->AsVecMultiplyAccumulate(); return HVecOperation::InstructionDataEquals(o) && GetOpKind() == o->GetOpKind(); @@ -939,7 +989,7 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { // viz. SAD([ a1, .. , am ], [ x1, .. , xn ], [ y1, .. , yn ]) = // [ a1 + sum abs(xi-yi), .. , am + sum abs(xj-yj) ], // for m <= n, non-overlapping sums, and signed operands x, y. -class HVecSADAccumulate FINAL : public HVecOperation { +class HVecSADAccumulate final : public HVecOperation { public: HVecSADAccumulate(ArenaAllocator* allocator, HInstruction* accumulator, @@ -952,7 +1002,7 @@ class HVecSADAccumulate FINAL : public HVecOperation { allocator, packed_type, SideEffects::None(), - /* number_of_inputs */ 3, + /* number_of_inputs= */ 3, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(accumulator, packed_type)); @@ -971,9 +1021,69 @@ class HVecSADAccumulate FINAL : public HVecOperation { DEFAULT_COPY_CONSTRUCTOR(VecSADAccumulate); }; +// Performs dot product of two vectors and adds the result to wider precision components in +// the accumulator. +// +// viz. DOT_PRODUCT([ a1, .. , am], [ x1, .. , xn ], [ y1, .. , yn ]) = +// [ a1 + sum(xi * yi), .. , am + sum(xj * yj) ], +// for m <= n, non-overlapping sums, +// for either both signed or both unsigned operands x, y. +// +// Notes: +// - packed type reflects the type of sum reduction, not the type of the operands. +// - IsZeroExtending() is used to determine the kind of signed/zero extension to be +// performed for the operands. +// +// TODO: Support types other than kInt32 for packed type. +class HVecDotProd final : public HVecOperation { + public: + HVecDotProd(ArenaAllocator* allocator, + HInstruction* accumulator, + HInstruction* left, + HInstruction* right, + DataType::Type packed_type, + bool is_zero_extending, + size_t vector_length, + uint32_t dex_pc) + : HVecOperation(kVecDotProd, + allocator, + packed_type, + SideEffects::None(), + /* number_of_inputs= */ 3, + vector_length, + dex_pc) { + DCHECK(HasConsistentPackedTypes(accumulator, packed_type)); + DCHECK(DataType::IsIntegralType(packed_type)); + DCHECK(left->IsVecOperation()); + DCHECK(right->IsVecOperation()); + DCHECK_EQ(ToSignedType(left->AsVecOperation()->GetPackedType()), + ToSignedType(right->AsVecOperation()->GetPackedType())); + SetRawInputAt(0, accumulator); + SetRawInputAt(1, left); + SetRawInputAt(2, right); + SetPackedFlag<kFieldHDotProdIsZeroExtending>(is_zero_extending); + } + + bool IsZeroExtending() const { return GetPackedFlag<kFieldHDotProdIsZeroExtending>(); } + + bool CanBeMoved() const override { return true; } + + DECLARE_INSTRUCTION(VecDotProd); + + protected: + DEFAULT_COPY_CONSTRUCTOR(VecDotProd); + + private: + // Additional packed bits. + static constexpr size_t kFieldHDotProdIsZeroExtending = + HVecOperation::kNumberOfVectorOpPackedBits; + static constexpr size_t kNumberOfHDotProdPackedBits = kFieldHDotProdIsZeroExtending + 1; + static_assert(kNumberOfHDotProdPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); +}; + // Loads a vector from memory, viz. load(mem, 1) // yield the vector [ mem(1), .. , mem(n) ]. -class HVecLoad FINAL : public HVecMemoryOperation { +class HVecLoad final : public HVecMemoryOperation { public: HVecLoad(ArenaAllocator* allocator, HInstruction* base, @@ -987,7 +1097,7 @@ class HVecLoad FINAL : public HVecMemoryOperation { allocator, packed_type, side_effects, - /* number_of_inputs */ 2, + /* number_of_inputs= */ 2, vector_length, dex_pc) { SetRawInputAt(0, base); @@ -997,9 +1107,9 @@ class HVecLoad FINAL : public HVecMemoryOperation { bool IsStringCharAt() const { return GetPackedFlag<kFieldIsStringCharAt>(); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsVecLoad()); const HVecLoad* o = other->AsVecLoad(); return HVecMemoryOperation::InstructionDataEquals(o) && IsStringCharAt() == o->IsStringCharAt(); @@ -1019,7 +1129,7 @@ class HVecLoad FINAL : public HVecMemoryOperation { // Stores a vector to memory, viz. store(m, 1, [x1, .. , xn] ) // sets mem(1) = x1, .. , mem(n) = xn. -class HVecStore FINAL : public HVecMemoryOperation { +class HVecStore final : public HVecMemoryOperation { public: HVecStore(ArenaAllocator* allocator, HInstruction* base, @@ -1033,7 +1143,7 @@ class HVecStore FINAL : public HVecMemoryOperation { allocator, packed_type, side_effects, - /* number_of_inputs */ 3, + /* number_of_inputs= */ 3, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(value, packed_type)); @@ -1043,7 +1153,7 @@ class HVecStore FINAL : public HVecMemoryOperation { } // A store needs to stay in place. - bool CanBeMoved() const OVERRIDE { return false; } + bool CanBeMoved() const override { return false; } DECLARE_INSTRUCTION(VecStore); diff --git a/compiler/optimizing/nodes_vector_test.cc b/compiler/optimizing/nodes_vector_test.cc index af13449646..b0a665d704 100644 --- a/compiler/optimizing/nodes_vector_test.cc +++ b/compiler/optimizing/nodes_vector_test.cc @@ -401,9 +401,9 @@ TEST_F(NodesVectorTest, VectorKindMattersOnReduce) { EXPECT_TRUE(v2->CanBeMoved()); EXPECT_TRUE(v3->CanBeMoved()); - EXPECT_EQ(HVecReduce::kSum, v1->GetKind()); - EXPECT_EQ(HVecReduce::kMin, v2->GetKind()); - EXPECT_EQ(HVecReduce::kMax, v3->GetKind()); + EXPECT_EQ(HVecReduce::kSum, v1->GetReductionKind()); + EXPECT_EQ(HVecReduce::kMin, v2->GetReductionKind()); + EXPECT_EQ(HVecReduce::kMax, v3->GetReductionKind()); EXPECT_TRUE(v1->Equals(v1)); EXPECT_TRUE(v2->Equals(v2)); diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h index 4c32be7d15..8e8fbc1581 100644 --- a/compiler/optimizing/nodes_x86.h +++ b/compiler/optimizing/nodes_x86.h @@ -20,7 +20,7 @@ namespace art { // Compute the address of the method for X86 Constant area support. -class HX86ComputeBaseMethodAddress FINAL : public HExpression<0> { +class HX86ComputeBaseMethodAddress final : public HExpression<0> { public: // Treat the value as an int32_t, but it is really a 32 bit native pointer. HX86ComputeBaseMethodAddress() @@ -30,7 +30,7 @@ class HX86ComputeBaseMethodAddress FINAL : public HExpression<0> { kNoDexPc) { } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(X86ComputeBaseMethodAddress); @@ -39,7 +39,7 @@ class HX86ComputeBaseMethodAddress FINAL : public HExpression<0> { }; // Load a constant value from the constant table. -class HX86LoadFromConstantTable FINAL : public HExpression<2> { +class HX86LoadFromConstantTable final : public HExpression<2> { public: HX86LoadFromConstantTable(HX86ComputeBaseMethodAddress* method_base, HConstant* constant) @@ -66,7 +66,7 @@ class HX86LoadFromConstantTable FINAL : public HExpression<2> { }; // Version of HNeg with access to the constant table for FP types. -class HX86FPNeg FINAL : public HExpression<2> { +class HX86FPNeg final : public HExpression<2> { public: HX86FPNeg(DataType::Type result_type, HInstruction* input, @@ -89,21 +89,21 @@ class HX86FPNeg FINAL : public HExpression<2> { }; // X86 version of HPackedSwitch that holds a pointer to the base method address. -class HX86PackedSwitch FINAL : public HTemplateInstruction<2> { +class HX86PackedSwitch final : public HExpression<2> { public: HX86PackedSwitch(int32_t start_value, int32_t num_entries, HInstruction* input, HX86ComputeBaseMethodAddress* method_base, uint32_t dex_pc) - : HTemplateInstruction(kX86PackedSwitch, SideEffects::None(), dex_pc), + : HExpression(kX86PackedSwitch, SideEffects::None(), dex_pc), start_value_(start_value), num_entries_(num_entries) { SetRawInputAt(0, input); SetRawInputAt(1, method_base); } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsControlFlow() const override { return true; } int32_t GetStartValue() const { return start_value_; } @@ -128,6 +128,92 @@ class HX86PackedSwitch FINAL : public HTemplateInstruction<2> { const int32_t num_entries_; }; +class HX86AndNot final : public HBinaryOperation { + public: + HX86AndNot(DataType::Type result_type, + HInstruction* left, + HInstruction* right, + uint32_t dex_pc = kNoDexPc) + : HBinaryOperation(kX86AndNot, result_type, left, right, SideEffects::None(), dex_pc) { + } + + bool IsCommutative() const override { return false; } + + template <typename T> static T Compute(T x, T y) { return ~x & y; } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { + return GetBlock()->GetGraph()->GetIntConstant( + Compute(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { + return GetBlock()->GetGraph()->GetLongConstant( + Compute(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, + HFloatConstant* y ATTRIBUTE_UNUSED) const override { + LOG(FATAL) << DebugName() << " is not defined for float values"; + UNREACHABLE(); + } + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + LOG(FATAL) << DebugName() << " is not defined for double values"; + UNREACHABLE(); + } + + DECLARE_INSTRUCTION(X86AndNot); + + protected: + DEFAULT_COPY_CONSTRUCTOR(X86AndNot); +}; + +class HX86MaskOrResetLeastSetBit final : public HUnaryOperation { + public: + HX86MaskOrResetLeastSetBit(DataType::Type result_type, InstructionKind op, + HInstruction* input, uint32_t dex_pc = kNoDexPc) + : HUnaryOperation(kX86MaskOrResetLeastSetBit, result_type, input, dex_pc), + op_kind_(op) { + DCHECK_EQ(result_type, DataType::Kind(input->GetType())); + DCHECK(op == HInstruction::kAnd || op == HInstruction::kXor) << op; + } + template <typename T> + auto Compute(T x) const -> decltype(x & (x-1)) { + static_assert(std::is_same<decltype(x & (x-1)), decltype(x ^(x-1))>::value, + "Inconsistent bitwise types"); + switch (op_kind_) { + case HInstruction::kAnd: + return x & (x-1); + case HInstruction::kXor: + return x ^ (x-1); + default: + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); + } + } + + HConstant* Evaluate(HIntConstant* x) const override { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x) const override { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override { + LOG(FATAL) << DebugName() << "is not defined for float values"; + UNREACHABLE(); + } + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override { + LOG(FATAL) << DebugName() << "is not defined for double values"; + UNREACHABLE(); + } + InstructionKind GetOpKind() const { return op_kind_; } + + DECLARE_INSTRUCTION(X86MaskOrResetLeastSetBit); + + protected: + const InstructionKind op_kind_; + + DEFAULT_COPY_CONSTRUCTOR(X86MaskOrResetLeastSetBit); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_X86_H_ diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index 57db7a634c..8864a12301 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -28,10 +28,14 @@ #endif #ifdef ART_ENABLE_CODEGEN_x86 #include "pc_relative_fixups_x86.h" +#include "instruction_simplifier_x86.h" #endif #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) #include "x86_memory_gen.h" #endif +#ifdef ART_ENABLE_CODEGEN_x86_64 +#include "instruction_simplifier_x86_64.h" +#endif #include "bounds_check_elimination.h" #include "cha_guard_optimization.h" @@ -40,6 +44,7 @@ #include "constructor_fence_redundancy_elimination.h" #include "dead_code_elimination.h" #include "dex/code_item_accessors-inl.h" +#include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "gvn.h" #include "induction_var_analysis.h" @@ -83,14 +88,10 @@ const char* OptimizationPassName(OptimizationPass pass) { return HDeadCodeElimination::kDeadCodeEliminationPassName; case OptimizationPass::kInliner: return HInliner::kInlinerPassName; - case OptimizationPass::kSharpening: - return HSharpening::kSharpeningPassName; case OptimizationPass::kSelectGenerator: return HSelectGenerator::kSelectGeneratorPassName; case OptimizationPass::kInstructionSimplifier: return InstructionSimplifier::kInstructionSimplifierPassName; - case OptimizationPass::kIntrinsicsRecognizer: - return IntrinsicsRecognizer::kIntrinsicsRecognizerPassName; case OptimizationPass::kCHAGuardOptimization: return CHAGuardOptimization::kCHAGuardOptimizationPassName; case OptimizationPass::kCodeSinking: @@ -116,17 +117,26 @@ const char* OptimizationPassName(OptimizationPass pass) { #ifdef ART_ENABLE_CODEGEN_x86 case OptimizationPass::kPcRelativeFixupsX86: return x86::PcRelativeFixups::kPcRelativeFixupsX86PassName; + case OptimizationPass::kInstructionSimplifierX86: + return x86::InstructionSimplifierX86::kInstructionSimplifierX86PassName; +#endif +#ifdef ART_ENABLE_CODEGEN_x86_64 + case OptimizationPass::kInstructionSimplifierX86_64: + return x86_64::InstructionSimplifierX86_64::kInstructionSimplifierX86_64PassName; #endif #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) case OptimizationPass::kX86MemoryOperandGeneration: return x86::X86MemoryOperandGeneration::kX86MemoryOperandGenerationPassName; #endif + case OptimizationPass::kNone: + LOG(FATAL) << "kNone does not represent an actual pass"; + UNREACHABLE(); } } -#define X(x) if (name == OptimizationPassName((x))) return (x) +#define X(x) if (pass_name == OptimizationPassName((x))) return (x) -OptimizationPass OptimizationPassByName(const std::string& name) { +OptimizationPass OptimizationPassByName(const std::string& pass_name) { X(OptimizationPass::kBoundsCheckElimination); X(OptimizationPass::kCHAGuardOptimization); X(OptimizationPass::kCodeSinking); @@ -137,14 +147,12 @@ OptimizationPass OptimizationPassByName(const std::string& name) { X(OptimizationPass::kInductionVarAnalysis); X(OptimizationPass::kInliner); X(OptimizationPass::kInstructionSimplifier); - X(OptimizationPass::kIntrinsicsRecognizer); X(OptimizationPass::kInvariantCodeMotion); X(OptimizationPass::kLoadStoreAnalysis); X(OptimizationPass::kLoadStoreElimination); X(OptimizationPass::kLoopOptimization); X(OptimizationPass::kScheduling); X(OptimizationPass::kSelectGenerator); - X(OptimizationPass::kSharpening); X(OptimizationPass::kSideEffectsAnalysis); #ifdef ART_ENABLE_CODEGEN_arm X(OptimizationPass::kInstructionSimplifierArm); @@ -160,7 +168,7 @@ OptimizationPass OptimizationPassByName(const std::string& name) { X(OptimizationPass::kPcRelativeFixupsX86); X(OptimizationPass::kX86MemoryOperandGeneration); #endif - LOG(FATAL) << "Cannot find optimization " << name; + LOG(FATAL) << "Cannot find optimization " << pass_name; UNREACHABLE(); } @@ -173,7 +181,6 @@ ArenaVector<HOptimization*> ConstructOptimizations( HGraph* graph, OptimizingCompilerStats* stats, CodeGenerator* codegen, - CompilerDriver* driver, const DexCompilationUnit& dex_compilation_unit, VariableSizedHandleScope* handles) { ArenaVector<HOptimization*> optimizations(allocator->Adapter()); @@ -187,9 +194,9 @@ ArenaVector<HOptimization*> ConstructOptimizations( // Loop over the requested optimizations. for (size_t i = 0; i < length; i++) { - OptimizationPass pass = definitions[i].first; - const char* alt_name = definitions[i].second; - const char* name = alt_name != nullptr + OptimizationPass pass = definitions[i].pass; + const char* alt_name = definitions[i].pass_name; + const char* pass_name = alt_name != nullptr ? alt_name : OptimizationPassName(pass); HOptimization* opt = nullptr; @@ -199,47 +206,48 @@ ArenaVector<HOptimization*> ConstructOptimizations( // Analysis passes (kept in most recent for subsequent passes). // case OptimizationPass::kSideEffectsAnalysis: - opt = most_recent_side_effects = new (allocator) SideEffectsAnalysis(graph, name); + opt = most_recent_side_effects = new (allocator) SideEffectsAnalysis(graph, pass_name); break; case OptimizationPass::kInductionVarAnalysis: - opt = most_recent_induction = new (allocator) HInductionVarAnalysis(graph, name); + opt = most_recent_induction = new (allocator) HInductionVarAnalysis(graph, pass_name); break; case OptimizationPass::kLoadStoreAnalysis: - opt = most_recent_lsa = new (allocator) LoadStoreAnalysis(graph, name); + opt = most_recent_lsa = new (allocator) LoadStoreAnalysis(graph, pass_name); break; // // Passes that need prior analysis. // case OptimizationPass::kGlobalValueNumbering: CHECK(most_recent_side_effects != nullptr); - opt = new (allocator) GVNOptimization(graph, *most_recent_side_effects, name); + opt = new (allocator) GVNOptimization(graph, *most_recent_side_effects, pass_name); break; case OptimizationPass::kInvariantCodeMotion: CHECK(most_recent_side_effects != nullptr); - opt = new (allocator) LICM(graph, *most_recent_side_effects, stats, name); + opt = new (allocator) LICM(graph, *most_recent_side_effects, stats, pass_name); break; case OptimizationPass::kLoopOptimization: CHECK(most_recent_induction != nullptr); - opt = new (allocator) HLoopOptimization(graph, driver, most_recent_induction, stats, name); + opt = new (allocator) HLoopOptimization( + graph, &codegen->GetCompilerOptions(), most_recent_induction, stats, pass_name); break; case OptimizationPass::kBoundsCheckElimination: CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr); opt = new (allocator) BoundsCheckElimination( - graph, *most_recent_side_effects, most_recent_induction, name); + graph, *most_recent_side_effects, most_recent_induction, pass_name); break; case OptimizationPass::kLoadStoreElimination: CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr); opt = new (allocator) LoadStoreElimination( - graph, *most_recent_side_effects, *most_recent_lsa, stats, name); + graph, *most_recent_side_effects, *most_recent_lsa, stats, pass_name); break; // // Regular passes. // case OptimizationPass::kConstantFolding: - opt = new (allocator) HConstantFolding(graph, name); + opt = new (allocator) HConstantFolding(graph, pass_name); break; case OptimizationPass::kDeadCodeElimination: - opt = new (allocator) HDeadCodeElimination(graph, stats, name); + opt = new (allocator) HDeadCodeElimination(graph, stats, pass_name); break; case OptimizationPass::kInliner: { CodeItemDataAccessor accessor(*dex_compilation_unit.GetDexFile(), @@ -249,40 +257,33 @@ ArenaVector<HOptimization*> ConstructOptimizations( codegen, dex_compilation_unit, // outer_compilation_unit dex_compilation_unit, // outermost_compilation_unit - driver, handles, stats, accessor.RegistersSize(), - /* total_number_of_instructions */ 0, - /* parent */ nullptr, - /* depth */ 0, - name); + /* total_number_of_instructions= */ 0, + /* parent= */ nullptr, + /* depth= */ 0, + pass_name); break; } - case OptimizationPass::kSharpening: - opt = new (allocator) HSharpening(graph, codegen, driver, name); - break; case OptimizationPass::kSelectGenerator: - opt = new (allocator) HSelectGenerator(graph, handles, stats, name); + opt = new (allocator) HSelectGenerator(graph, handles, stats, pass_name); break; case OptimizationPass::kInstructionSimplifier: - opt = new (allocator) InstructionSimplifier(graph, codegen, driver, stats, name); - break; - case OptimizationPass::kIntrinsicsRecognizer: - opt = new (allocator) IntrinsicsRecognizer(graph, stats, name); + opt = new (allocator) InstructionSimplifier(graph, codegen, stats, pass_name); break; case OptimizationPass::kCHAGuardOptimization: - opt = new (allocator) CHAGuardOptimization(graph, name); + opt = new (allocator) CHAGuardOptimization(graph, pass_name); break; case OptimizationPass::kCodeSinking: - opt = new (allocator) CodeSinking(graph, stats, name); + opt = new (allocator) CodeSinking(graph, stats, pass_name); break; case OptimizationPass::kConstructorFenceRedundancyElimination: - opt = new (allocator) ConstructorFenceRedundancyElimination(graph, stats, name); + opt = new (allocator) ConstructorFenceRedundancyElimination(graph, stats, pass_name); break; case OptimizationPass::kScheduling: opt = new (allocator) HInstructionScheduling( - graph, driver->GetInstructionSet(), codegen, name); + graph, codegen->GetCompilerOptions().GetInstructionSet(), codegen, pass_name); break; // // Arch-specific passes. @@ -318,12 +319,23 @@ ArenaVector<HOptimization*> ConstructOptimizations( DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; opt = new (allocator) x86::X86MemoryOperandGeneration(graph, codegen, stats); break; + case OptimizationPass::kInstructionSimplifierX86: + opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats); + break; +#endif +#ifdef ART_ENABLE_CODEGEN_x86_64 + case OptimizationPass::kInstructionSimplifierX86_64: + opt = new (allocator) x86_64::InstructionSimplifierX86_64(graph, codegen, stats); + break; #endif + case OptimizationPass::kNone: + LOG(FATAL) << "kNone does not represent an actual pass"; + UNREACHABLE(); } // switch // Add each next optimization to result vector. CHECK(opt != nullptr); - DCHECK_STREQ(name, opt->GetPassName()); // sanity + DCHECK_STREQ(pass_name, opt->GetPassName()); // sanity optimizations.push_back(opt); } diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index c170f155fa..b84e03894c 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -24,7 +24,6 @@ namespace art { class CodeGenerator; -class CompilerDriver; class DexCompilationUnit; /** @@ -47,8 +46,9 @@ class HOptimization : public ArenaObject<kArenaAllocOptimization> { // 'instruction_simplifier$before_codegen'. const char* GetPassName() const { return pass_name_; } - // Perform the analysis itself. - virtual void Run() = 0; + // Perform the pass or analysis. Returns false if no optimizations occurred or no useful + // information was computed (this is best effort, returning true is always ok). + virtual bool Run() = 0; protected: HGraph* const graph_; @@ -76,14 +76,12 @@ enum class OptimizationPass { kInductionVarAnalysis, kInliner, kInstructionSimplifier, - kIntrinsicsRecognizer, kInvariantCodeMotion, kLoadStoreAnalysis, kLoadStoreElimination, kLoopOptimization, kScheduling, kSelectGenerator, - kSharpening, kSideEffectsAnalysis, #ifdef ART_ENABLE_CODEGEN_arm kInstructionSimplifierArm, @@ -97,25 +95,40 @@ enum class OptimizationPass { #endif #ifdef ART_ENABLE_CODEGEN_x86 kPcRelativeFixupsX86, + kInstructionSimplifierX86, +#endif +#ifdef ART_ENABLE_CODEGEN_x86_64 + kInstructionSimplifierX86_64, #endif #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) kX86MemoryOperandGeneration, #endif + kNone, + kLast = kNone }; // Lookup name of optimization pass. const char* OptimizationPassName(OptimizationPass pass); // Lookup optimization pass by name. -OptimizationPass OptimizationPassByName(const std::string& name); +OptimizationPass OptimizationPassByName(const std::string& pass_name); // Optimization definition consisting of an optimization pass -// and an optional alternative name (nullptr denotes default). -typedef std::pair<OptimizationPass, const char*> OptimizationDef; +// an optional alternative name (nullptr denotes default), and +// an optional pass dependence (kNone denotes no dependence). +struct OptimizationDef { + OptimizationDef(OptimizationPass p, const char* pn, OptimizationPass d) + : pass(p), pass_name(pn), depends_on(d) {} + OptimizationPass pass; + const char* pass_name; + OptimizationPass depends_on; +}; // Helper method for optimization definition array entries. -inline OptimizationDef OptDef(OptimizationPass pass, const char* name = nullptr) { - return std::make_pair(pass, name); +inline OptimizationDef OptDef(OptimizationPass pass, + const char* pass_name = nullptr, + OptimizationPass depends_on = OptimizationPass::kNone) { + return OptimizationDef(pass, pass_name, depends_on); } // Helper method to construct series of optimization passes. @@ -133,7 +146,6 @@ ArenaVector<HOptimization*> ConstructOptimizations( HGraph* graph, OptimizingCompilerStats* stats, CodeGenerator* codegen, - CompilerDriver* driver, const DexCompilationUnit& dex_compilation_unit, VariableSizedHandleScope* handles); diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index d20b681b49..a52031cced 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -34,8 +34,6 @@ namespace vixl32 = vixl::aarch32; -using vixl32::r0; - namespace art { // Run the tests only on host. @@ -47,25 +45,20 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { static constexpr bool kGenerateExpected = false; OptimizingCFITest() - : pool_and_allocator_(), - opts_(), - isa_features_(), - graph_(nullptr), + : graph_(nullptr), code_gen_(), blocks_(GetAllocator()->Adapter()) {} - ArenaAllocator* GetAllocator() { return pool_and_allocator_.GetAllocator(); } - void SetUpFrame(InstructionSet isa) { + OverrideInstructionSetFeatures(isa, "default"); + // Ensure that slow-debug is off, so that there is no unexpected read-barrier check emitted. SetRuntimeDebugFlagsEnabled(false); // Setup simple context. - std::string error; - isa_features_ = InstructionSetFeatures::FromVariant(isa, "default", &error); graph_ = CreateGraph(); // Generate simple frame with some spills. - code_gen_ = CodeGenerator::Create(graph_, isa, *isa_features_, opts_); + code_gen_ = CodeGenerator::Create(graph_, *compiler_options_); code_gen_->GetAssembler()->cfi().SetEnabled(true); code_gen_->InitializeCodeGenerationData(); const int frame_size = 64; @@ -105,15 +98,15 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { const std::vector<uint8_t>& expected_asm, const std::vector<uint8_t>& expected_cfi) { // Get the outputs. - const std::vector<uint8_t>& actual_asm = code_allocator_.GetMemory(); + ArrayRef<const uint8_t> actual_asm = code_allocator_.GetMemory(); Assembler* opt_asm = code_gen_->GetAssembler(); - const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data()); + ArrayRef<const uint8_t> actual_cfi(*(opt_asm->cfi().data())); if (kGenerateExpected) { GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi); } else { - EXPECT_EQ(expected_asm, actual_asm); - EXPECT_EQ(expected_cfi, actual_cfi); + EXPECT_EQ(ArrayRef<const uint8_t>(expected_asm), actual_asm); + EXPECT_EQ(ArrayRef<const uint8_t>(expected_cfi), actual_cfi); } } @@ -135,12 +128,12 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { public: InternalCodeAllocator() {} - virtual uint8_t* Allocate(size_t size) { + uint8_t* Allocate(size_t size) override { memory_.resize(size); return memory_.data(); } - const std::vector<uint8_t>& GetMemory() { return memory_; } + ArrayRef<const uint8_t> GetMemory() const override { return ArrayRef<const uint8_t>(memory_); } private: std::vector<uint8_t> memory_; @@ -148,9 +141,6 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator); }; - ArenaPoolAndAllocator pool_and_allocator_; - CompilerOptions opts_; - std::unique_ptr<const InstructionSetFeatures> isa_features_; HGraph* graph_; std::unique_ptr<CodeGenerator> code_gen_; ArenaVector<HBasicBlock*> blocks_; @@ -202,6 +192,7 @@ TEST_ISA(kMips64) #ifdef ART_ENABLE_CODEGEN_arm TEST_F(OptimizingCFITest, kThumb2Adjust) { + using vixl32::r0; std::vector<uint8_t> expected_asm( expected_asm_kThumb2_adjust, expected_asm_kThumb2_adjust + arraysize(expected_asm_kThumb2_adjust)); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index e42dfc10ba..f4bf11d3d3 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -26,11 +26,13 @@ #include "base/arena_allocator.h" #include "base/arena_containers.h" #include "base/dumpable.h" +#include "base/logging.h" #include "base/macros.h" #include "base/mutex.h" #include "base/scoped_arena_allocator.h" #include "base/timing_logger.h" #include "builder.h" +#include "class_root.h" #include "code_generator.h" #include "compiled_method.h" #include "compiler.h" @@ -39,7 +41,7 @@ #include "dex/dex_file_types.h" #include "dex/verification_results.h" #include "dex/verified_method.h" -#include "driver/compiler_driver-inl.h" +#include "driver/compiled_method_storage.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "graph_checker.h" @@ -60,6 +62,7 @@ #include "ssa_builder.h" #include "ssa_liveness_analysis.h" #include "ssa_phi_elimination.h" +#include "stack_map_stream.h" #include "utils/assembler.h" #include "verifier/verifier_compiler_binding.h" @@ -72,25 +75,21 @@ static constexpr const char* kPassNameSeparator = "$"; /** * Used by the code generator, to allocate the code in a vector. */ -class CodeVectorAllocator FINAL : public CodeAllocator { +class CodeVectorAllocator final : public CodeAllocator { public: explicit CodeVectorAllocator(ArenaAllocator* allocator) - : memory_(allocator->Adapter(kArenaAllocCodeBuffer)), - size_(0) {} + : memory_(allocator->Adapter(kArenaAllocCodeBuffer)) {} - virtual uint8_t* Allocate(size_t size) { - size_ = size; + uint8_t* Allocate(size_t size) override { memory_.resize(size); return &memory_[0]; } - size_t GetSize() const { return size_; } - const ArenaVector<uint8_t>& GetMemory() const { return memory_; } + ArrayRef<const uint8_t> GetMemory() const override { return ArrayRef<const uint8_t>(memory_); } uint8_t* GetData() { return memory_.data(); } private: ArenaVector<uint8_t> memory_; - size_t size_; DISALLOW_COPY_AND_ASSIGN(CodeVectorAllocator); }; @@ -108,21 +107,22 @@ class PassObserver : public ValueObject { PassObserver(HGraph* graph, CodeGenerator* codegen, std::ostream* visualizer_output, - CompilerDriver* compiler_driver, + const CompilerOptions& compiler_options, Mutex& dump_mutex) : graph_(graph), + last_seen_graph_size_(0), cached_method_name_(), - timing_logger_enabled_(compiler_driver->GetCompilerOptions().GetDumpTimings()), + timing_logger_enabled_(compiler_options.GetDumpPassTimings()), timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true), disasm_info_(graph->GetAllocator()), visualizer_oss_(), visualizer_output_(visualizer_output), - visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()), + visualizer_enabled_(!compiler_options.GetDumpCfgFileName().empty()), visualizer_(&visualizer_oss_, graph, *codegen), visualizer_dump_mutex_(dump_mutex), graph_in_bad_state_(false) { if (timing_logger_enabled_ || visualizer_enabled_) { - if (!IsVerboseMethod(compiler_driver, GetMethodName())) { + if (!IsVerboseMethod(compiler_options, GetMethodName())) { timing_logger_enabled_ = visualizer_enabled_ = false; } if (visualizer_enabled_) { @@ -162,7 +162,7 @@ class PassObserver : public ValueObject { VLOG(compiler) << "Starting pass: " << pass_name; // Dump graph first, then start timer. if (visualizer_enabled_) { - visualizer_.DumpGraph(pass_name, /* is_after_pass */ false, graph_in_bad_state_); + visualizer_.DumpGraph(pass_name, /* is_after_pass= */ false, graph_in_bad_state_); FlushVisualizer(); } if (timing_logger_enabled_) { @@ -178,13 +178,13 @@ class PassObserver : public ValueObject { visualizer_oss_.clear(); } - void EndPass(const char* pass_name) REQUIRES(!visualizer_dump_mutex_) { + void EndPass(const char* pass_name, bool pass_change) REQUIRES(!visualizer_dump_mutex_) { // Pause timer first, then dump graph. if (timing_logger_enabled_) { timing_logger_.EndTiming(); } if (visualizer_enabled_) { - visualizer_.DumpGraph(pass_name, /* is_after_pass */ true, graph_in_bad_state_); + visualizer_.DumpGraph(pass_name, /* is_after_pass= */ true, graph_in_bad_state_); FlushVisualizer(); } @@ -192,7 +192,7 @@ class PassObserver : public ValueObject { if (kIsDebugBuild) { if (!graph_in_bad_state_) { GraphChecker checker(graph_); - checker.Run(); + last_seen_graph_size_ = checker.Run(pass_change, last_seen_graph_size_); if (!checker.IsValid()) { LOG(FATAL) << "Error after " << pass_name << ": " << Dumpable<GraphChecker>(checker); } @@ -200,11 +200,11 @@ class PassObserver : public ValueObject { } } - static bool IsVerboseMethod(CompilerDriver* compiler_driver, const char* method_name) { + static bool IsVerboseMethod(const CompilerOptions& compiler_options, const char* method_name) { // Test an exact match to --verbose-methods. If verbose-methods is set, this overrides an // empty kStringFilter matching all methods. - if (compiler_driver->GetCompilerOptions().HasVerboseMethods()) { - return compiler_driver->GetCompilerOptions().IsVerboseMethod(method_name); + if (compiler_options.HasVerboseMethods()) { + return compiler_options.IsVerboseMethod(method_name); } // Test the kStringFilter sub-string. constexpr helper variable to silence unreachable-code @@ -218,6 +218,7 @@ class PassObserver : public ValueObject { } HGraph* const graph_; + size_t last_seen_graph_size_; std::string cached_method_name_; @@ -245,60 +246,64 @@ class PassScope : public ValueObject { public: PassScope(const char *pass_name, PassObserver* pass_observer) : pass_name_(pass_name), + pass_change_(true), // assume change pass_observer_(pass_observer) { pass_observer_->StartPass(pass_name_); } + void SetPassNotChanged() { + pass_change_ = false; + } + ~PassScope() { - pass_observer_->EndPass(pass_name_); + pass_observer_->EndPass(pass_name_, pass_change_); } private: const char* const pass_name_; + bool pass_change_; PassObserver* const pass_observer_; }; -class OptimizingCompiler FINAL : public Compiler { +class OptimizingCompiler final : public Compiler { public: - explicit OptimizingCompiler(CompilerDriver* driver); - ~OptimizingCompiler() OVERRIDE; + explicit OptimizingCompiler(const CompilerOptions& compiler_options, + CompiledMethodStorage* storage); + ~OptimizingCompiler() override; - bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const OVERRIDE; + bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const override; - CompiledMethod* Compile(const DexFile::CodeItem* code_item, + CompiledMethod* Compile(const dex::CodeItem* code_item, uint32_t access_flags, InvokeType invoke_type, uint16_t class_def_idx, uint32_t method_idx, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) const OVERRIDE; + Handle<mirror::DexCache> dex_cache) const override; CompiledMethod* JniCompile(uint32_t access_flags, uint32_t method_idx, const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) const OVERRIDE; + Handle<mirror::DexCache> dex_cache) const override; - uintptr_t GetEntryPointOf(ArtMethod* method) const OVERRIDE + uintptr_t GetEntryPointOf(ArtMethod* method) const override REQUIRES_SHARED(Locks::mutator_lock_) { return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize( - InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet()))); + InstructionSetPointerSize(GetCompilerOptions().GetInstructionSet()))); } - void Init() OVERRIDE; - - void UnInit() const OVERRIDE; - bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, + bool baseline, bool osr, jit::JitLogger* jit_logger) - OVERRIDE + override REQUIRES_SHARED(Locks::mutator_lock_); private: - void RunOptimizations(HGraph* graph, + bool RunOptimizations(HGraph* graph, CodeGenerator* codegen, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, @@ -313,25 +318,41 @@ class OptimizingCompiler FINAL : public Compiler { graph, compilation_stats_.get(), codegen, - GetCompilerDriver(), dex_compilation_unit, handles); DCHECK_EQ(length, optimizations.size()); - // Run the optimization passes one by one. + // Run the optimization passes one by one. Any "depends_on" pass refers back to + // the most recent occurrence of that pass, skipped or executed. + std::bitset<static_cast<size_t>(OptimizationPass::kLast) + 1u> pass_changes; + pass_changes[static_cast<size_t>(OptimizationPass::kNone)] = true; + bool change = false; for (size_t i = 0; i < length; ++i) { - PassScope scope(optimizations[i]->GetPassName(), pass_observer); - optimizations[i]->Run(); + if (pass_changes[static_cast<size_t>(definitions[i].depends_on)]) { + // Execute the pass and record whether it changed anything. + PassScope scope(optimizations[i]->GetPassName(), pass_observer); + bool pass_change = optimizations[i]->Run(); + pass_changes[static_cast<size_t>(definitions[i].pass)] = pass_change; + if (pass_change) { + change = true; + } else { + scope.SetPassNotChanged(); + } + } else { + // Skip the pass and record that nothing changed. + pass_changes[static_cast<size_t>(definitions[i].pass)] = false; + } } + return change; } - template <size_t length> void RunOptimizations( + template <size_t length> bool RunOptimizations( HGraph* graph, CodeGenerator* codegen, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, VariableSizedHandleScope* handles, const OptimizationDef (&definitions)[length]) const { - RunOptimizations( + return RunOptimizations( graph, codegen, dex_compilation_unit, pass_observer, handles, definitions, length); } @@ -346,7 +367,7 @@ class OptimizingCompiler FINAL : public Compiler { CompiledMethod* Emit(ArenaAllocator* allocator, CodeVectorAllocator* code_allocator, CodeGenerator* codegen, - const DexFile::CodeItem* item) const; + const dex::CodeItem* item) const; // Try compiling a method and return the code generator used for // compiling it. @@ -360,6 +381,7 @@ class OptimizingCompiler FINAL : public Compiler { CodeVectorAllocator* code_allocator, const DexCompilationUnit& dex_compilation_unit, ArtMethod* method, + bool baseline, bool osr, VariableSizedHandleScope* handles) const; @@ -370,19 +392,20 @@ class OptimizingCompiler FINAL : public Compiler { ArtMethod* method, VariableSizedHandleScope* handles) const; - void MaybeRunInliner(HGraph* graph, - CodeGenerator* codegen, - const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer, - VariableSizedHandleScope* handles) const; - - void RunArchOptimizations(HGraph* graph, + bool RunArchOptimizations(HGraph* graph, CodeGenerator* codegen, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, VariableSizedHandleScope* handles) const; - void GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo method_debug_info) + bool RunBaselineOptimizations(HGraph* graph, + CodeGenerator* codegen, + const DexCompilationUnit& dex_compilation_unit, + PassObserver* pass_observer, + VariableSizedHandleScope* handles) const; + + void GenerateJitDebugInfo(ArtMethod* method, + const debug::MethodDebugInfo& method_debug_info) REQUIRES_SHARED(Locks::mutator_lock_); std::unique_ptr<OptimizingCompilerStats> compilation_stats_; @@ -396,28 +419,22 @@ class OptimizingCompiler FINAL : public Compiler { static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */ -OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver) - : Compiler(driver, kMaximumCompilationTimeBeforeWarning), - dump_mutex_("Visualizer dump lock") {} - -void OptimizingCompiler::Init() { - // Enable C1visualizer output. Must be done in Init() because the compiler - // driver is not fully initialized when passed to the compiler's constructor. - CompilerDriver* driver = GetCompilerDriver(); - const std::string cfg_file_name = driver->GetCompilerOptions().GetDumpCfgFileName(); +OptimizingCompiler::OptimizingCompiler(const CompilerOptions& compiler_options, + CompiledMethodStorage* storage) + : Compiler(compiler_options, storage, kMaximumCompilationTimeBeforeWarning), + dump_mutex_("Visualizer dump lock") { + // Enable C1visualizer output. + const std::string& cfg_file_name = compiler_options.GetDumpCfgFileName(); if (!cfg_file_name.empty()) { std::ios_base::openmode cfg_file_mode = - driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out; + compiler_options.GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out; visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode)); } - if (driver->GetCompilerOptions().GetDumpStats()) { + if (compiler_options.GetDumpStats()) { compilation_stats_.reset(new OptimizingCompilerStats()); } } -void OptimizingCompiler::UnInit() const { -} - OptimizingCompiler::~OptimizingCompiler() { if (compilation_stats_.get() != nullptr) { compilation_stats_->Log(); @@ -439,33 +456,54 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) { || instruction_set == InstructionSet::kX86_64; } -void OptimizingCompiler::MaybeRunInliner(HGraph* graph, - CodeGenerator* codegen, - const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer, - VariableSizedHandleScope* handles) const { - const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); - bool should_inline = (compiler_options.GetInlineMaxCodeUnits() > 0); - if (!should_inline) { - return; +bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph, + CodeGenerator* codegen, + const DexCompilationUnit& dex_compilation_unit, + PassObserver* pass_observer, + VariableSizedHandleScope* handles) const { + switch (codegen->GetCompilerOptions().GetInstructionSet()) { +#ifdef ART_ENABLE_CODEGEN_mips + case InstructionSet::kMips: { + OptimizationDef mips_optimizations[] = { + OptDef(OptimizationPass::kPcRelativeFixupsMips) + }; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + mips_optimizations); + } +#endif +#ifdef ART_ENABLE_CODEGEN_x86 + case InstructionSet::kX86: { + OptimizationDef x86_optimizations[] = { + OptDef(OptimizationPass::kPcRelativeFixupsX86), + }; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + x86_optimizations); + } +#endif + default: + UNUSED(graph); + UNUSED(codegen); + UNUSED(dex_compilation_unit); + UNUSED(pass_observer); + UNUSED(handles); + return false; } - OptimizationDef optimizations[] = { - OptDef(OptimizationPass::kInliner) - }; - RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - optimizations); } -void OptimizingCompiler::RunArchOptimizations(HGraph* graph, +bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, CodeGenerator* codegen, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, VariableSizedHandleScope* handles) const { - switch (GetCompilerDriver()->GetInstructionSet()) { + switch (codegen->GetCompilerOptions().GetInstructionSet()) { #if defined(ART_ENABLE_CODEGEN_arm) case InstructionSet::kThumb2: case InstructionSet::kArm: { @@ -475,13 +513,12 @@ void OptimizingCompiler::RunArchOptimizations(HGraph* graph, OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), OptDef(OptimizationPass::kScheduling) }; - RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - arm_optimizations); - break; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + arm_optimizations); } #endif #ifdef ART_ENABLE_CODEGEN_arm64 @@ -492,13 +529,12 @@ void OptimizingCompiler::RunArchOptimizations(HGraph* graph, OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), OptDef(OptimizationPass::kScheduling) }; - RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - arm64_optimizations); - break; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + arm64_optimizations); } #endif #ifdef ART_ENABLE_CODEGEN_mips @@ -509,13 +545,12 @@ void OptimizingCompiler::RunArchOptimizations(HGraph* graph, OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), OptDef(OptimizationPass::kPcRelativeFixupsMips) }; - RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - mips_optimizations); - break; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + mips_optimizations); } #endif #ifdef ART_ENABLE_CODEGEN_mips64 @@ -524,50 +559,49 @@ void OptimizingCompiler::RunArchOptimizations(HGraph* graph, OptDef(OptimizationPass::kSideEffectsAnalysis), OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch") }; - RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - mips64_optimizations); - break; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + mips64_optimizations); } #endif #ifdef ART_ENABLE_CODEGEN_x86 case InstructionSet::kX86: { OptimizationDef x86_optimizations[] = { + OptDef(OptimizationPass::kInstructionSimplifierX86), OptDef(OptimizationPass::kSideEffectsAnalysis), OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), OptDef(OptimizationPass::kPcRelativeFixupsX86), OptDef(OptimizationPass::kX86MemoryOperandGeneration) }; - RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - x86_optimizations); - break; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + x86_optimizations); } #endif #ifdef ART_ENABLE_CODEGEN_x86_64 case InstructionSet::kX86_64: { OptimizationDef x86_64_optimizations[] = { + OptDef(OptimizationPass::kInstructionSimplifierX86_64), OptDef(OptimizationPass::kSideEffectsAnalysis), OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), OptDef(OptimizationPass::kX86MemoryOperandGeneration) }; - RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - x86_64_optimizations); - break; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + x86_64_optimizations); } #endif default: - break; + return false; } } @@ -580,7 +614,7 @@ static void AllocateRegisters(HGraph* graph, { PassScope scope(PrepareForRegisterAllocation::kPrepareForRegisterAllocationPassName, pass_observer); - PrepareForRegisterAllocation(graph, stats).Run(); + PrepareForRegisterAllocation(graph, codegen->GetCompilerOptions(), stats).Run(); } // Use local allocator shared by SSA liveness analysis and register allocator. // (Register allocator creates new objects in the liveness data.) @@ -609,16 +643,16 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, VariableSizedHandleScope* handles) const { - const std::vector<std::string>* pass_names = - GetCompilerDriver()->GetCompilerOptions().GetPassesToRun(); + const std::vector<std::string>* pass_names = GetCompilerOptions().GetPassesToRun(); if (pass_names != nullptr) { // If passes were defined on command-line, build the optimization // passes and run these instead of the built-in optimizations. + // TODO: a way to define depends_on via command-line? const size_t length = pass_names->size(); std::vector<OptimizationDef> optimizations; for (const std::string& pass_name : *pass_names) { std::string opt_name = ConvertPassNameToOptimizationName(pass_name); - optimizations.push_back(OptDef(OptimizationPassByName(opt_name.c_str()), pass_name.c_str())); + optimizations.push_back(OptDef(OptimizationPassByName(opt_name), pass_name.c_str())); } RunOptimizations(graph, codegen, @@ -630,49 +664,62 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, return; } - OptimizationDef optimizations1[] = { - OptDef(OptimizationPass::kIntrinsicsRecognizer), - OptDef(OptimizationPass::kSharpening), + OptimizationDef optimizations[] = { + // Initial optimizations. OptDef(OptimizationPass::kConstantFolding), OptDef(OptimizationPass::kInstructionSimplifier), - OptDef(OptimizationPass::kDeadCodeElimination, "dead_code_elimination$initial") - }; - RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - optimizations1); - - MaybeRunInliner(graph, codegen, dex_compilation_unit, pass_observer, handles); - - OptimizationDef optimizations2[] = { - // SelectGenerator depends on the InstructionSimplifier removing - // redundant suspend checks to recognize empty blocks. - OptDef(OptimizationPass::kSelectGenerator), - // TODO: if we don't inline we can also skip fold2. - OptDef(OptimizationPass::kConstantFolding, "constant_folding$after_inlining"), - OptDef(OptimizationPass::kInstructionSimplifier, "instruction_simplifier$after_inlining"), - OptDef(OptimizationPass::kDeadCodeElimination, "dead_code_elimination$after_inlining"), - OptDef(OptimizationPass::kSideEffectsAnalysis, "side_effects$before_gvn"), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$initial"), + // Inlining. + OptDef(OptimizationPass::kInliner), + // Simplification (only if inlining occurred). + OptDef(OptimizationPass::kConstantFolding, + "constant_folding$after_inlining", + OptimizationPass::kInliner), + OptDef(OptimizationPass::kInstructionSimplifier, + "instruction_simplifier$after_inlining", + OptimizationPass::kInliner), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$after_inlining", + OptimizationPass::kInliner), + // GVN. + OptDef(OptimizationPass::kSideEffectsAnalysis, + "side_effects$before_gvn"), OptDef(OptimizationPass::kGlobalValueNumbering), + // Simplification (TODO: only if GVN occurred). + OptDef(OptimizationPass::kSelectGenerator), + OptDef(OptimizationPass::kConstantFolding, + "constant_folding$after_gvn"), + OptDef(OptimizationPass::kInstructionSimplifier, + "instruction_simplifier$after_gvn"), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$after_gvn"), + // High-level optimizations. + OptDef(OptimizationPass::kSideEffectsAnalysis, + "side_effects$before_licm"), OptDef(OptimizationPass::kInvariantCodeMotion), OptDef(OptimizationPass::kInductionVarAnalysis), OptDef(OptimizationPass::kBoundsCheckElimination), OptDef(OptimizationPass::kLoopOptimization), - // Evaluates code generated by dynamic bce. - OptDef(OptimizationPass::kConstantFolding, "constant_folding$after_bce"), - OptDef(OptimizationPass::kInstructionSimplifier, "instruction_simplifier$after_bce"), - OptDef(OptimizationPass::kSideEffectsAnalysis, "side_effects$before_lse"), + // Simplification. + OptDef(OptimizationPass::kConstantFolding, + "constant_folding$after_bce"), + OptDef(OptimizationPass::kInstructionSimplifier, + "instruction_simplifier$after_bce"), + // Other high-level optimizations. + OptDef(OptimizationPass::kSideEffectsAnalysis, + "side_effects$before_lse"), OptDef(OptimizationPass::kLoadStoreAnalysis), OptDef(OptimizationPass::kLoadStoreElimination), OptDef(OptimizationPass::kCHAGuardOptimization), - OptDef(OptimizationPass::kDeadCodeElimination, "dead_code_elimination$final"), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$final"), OptDef(OptimizationPass::kCodeSinking), // The codegen has a few assumptions that only the instruction simplifier // can satisfy. For example, the code generator does not expect to see a // HTypeConversion from a type to the same type. - OptDef(OptimizationPass::kInstructionSimplifier, "instruction_simplifier$before_codegen"), + OptDef(OptimizationPass::kInstructionSimplifier, + "instruction_simplifier$before_codegen"), // Eliminate constructor fences after code sinking to avoid // complicated sinking logic to split a fence with many inputs. OptDef(OptimizationPass::kConstructorFenceRedundancyElimination) @@ -682,7 +729,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, dex_compilation_unit, pass_observer, handles, - optimizations2); + optimizations); RunArchOptimizations(graph, codegen, dex_compilation_unit, pass_observer, handles); } @@ -703,34 +750,28 @@ static ArenaVector<linker::LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator, CodeVectorAllocator* code_allocator, CodeGenerator* codegen, - const DexFile::CodeItem* code_item_for_osr_check) const { + const dex::CodeItem* code_item_for_osr_check) const { ArenaVector<linker::LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); - ArenaVector<uint8_t> stack_map(allocator->Adapter(kArenaAllocStackMaps)); - ArenaVector<uint8_t> method_info(allocator->Adapter(kArenaAllocStackMaps)); - size_t stack_map_size = 0; - size_t method_info_size = 0; - codegen->ComputeStackMapAndMethodInfoSize(&stack_map_size, &method_info_size); - stack_map.resize(stack_map_size); - method_info.resize(method_info_size); - codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()), - MemoryRegion(method_info.data(), method_info.size()), - code_item_for_osr_check); + ScopedArenaVector<uint8_t> stack_map = codegen->BuildStackMaps(code_item_for_osr_check); + CompiledMethodStorage* storage = GetCompiledMethodStorage(); CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( - GetCompilerDriver(), + storage, codegen->GetInstructionSet(), - ArrayRef<const uint8_t>(code_allocator->GetMemory()), - // Follow Quick's behavior and set the frame size to zero if it is - // considered "empty" (see the definition of - // art::CodeGenerator::HasEmptyFrame). - codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), - codegen->GetCoreSpillMask(), - codegen->GetFpuSpillMask(), - ArrayRef<const uint8_t>(method_info), + code_allocator->GetMemory(), ArrayRef<const uint8_t>(stack_map), ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), ArrayRef<const linker::LinkerPatch>(linker_patches)); + for (const linker::LinkerPatch& patch : linker_patches) { + if (codegen->NeedsThunkCode(patch) && storage->GetThunkCode(patch).empty()) { + ArenaVector<uint8_t> code(allocator->Adapter()); + std::string debug_name; + codegen->EmitThunkCode(patch, &code, &debug_name); + storage->SetThunkCode(patch, ArrayRef<const uint8_t>(code), debug_name); + } + } + return compiled_method; } @@ -739,14 +780,15 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, CodeVectorAllocator* code_allocator, const DexCompilationUnit& dex_compilation_unit, ArtMethod* method, + bool baseline, bool osr, VariableSizedHandleScope* handles) const { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kAttemptBytecodeCompilation); - CompilerDriver* compiler_driver = GetCompilerDriver(); - InstructionSet instruction_set = compiler_driver->GetInstructionSet(); + const CompilerOptions& compiler_options = GetCompilerOptions(); + InstructionSet instruction_set = compiler_options.GetInstructionSet(); const DexFile& dex_file = *dex_compilation_unit.GetDexFile(); uint32_t method_idx = dex_compilation_unit.GetDexMethodIndex(); - const DexFile::CodeItem* code_item = dex_compilation_unit.GetCodeItem(); + const dex::CodeItem* code_item = dex_compilation_unit.GetCodeItem(); // Always use the Thumb-2 assembler: some runtime functionality // (like implicit stack overflow checks) assume Thumb-2. @@ -767,7 +809,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, // Implementation of the space filter: do not compile a code item whose size in // code units is bigger than 128. static constexpr size_t kSpaceFilterOptimizingThreshold = 128; - const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions(); if ((compiler_options.GetCompilerFilter() == CompilerFilter::kSpace) && (CodeItemInstructionAccessor(dex_file, code_item).InsnsSizeInCodeUnits() > kSpaceFilterOptimizingThreshold)) { @@ -776,43 +817,58 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, } CodeItemDebugInfoAccessor code_item_accessor(dex_file, code_item, method_idx); + + bool dead_reference_safe; + ArrayRef<const uint8_t> interpreter_metadata; + // For AOT compilation, we may not get a method, for example if its class is erroneous, + // possibly due to an unavailable superclass. JIT should always have a method. + DCHECK(Runtime::Current()->IsAotCompiler() || method != nullptr); + if (method != nullptr) { + const dex::ClassDef* containing_class; + { + ScopedObjectAccess soa(Thread::Current()); + containing_class = &method->GetClassDef(); + interpreter_metadata = method->GetQuickenedInfo(); + } + // MethodContainsRSensitiveAccess is currently slow, but HasDeadReferenceSafeAnnotation() + // is currently rarely true. + dead_reference_safe = + annotations::HasDeadReferenceSafeAnnotation(dex_file, *containing_class) + && !annotations::MethodContainsRSensitiveAccess(dex_file, *containing_class, method_idx); + } else { + // If we could not resolve the class, conservatively assume it's dead-reference unsafe. + dead_reference_safe = false; + } + HGraph* graph = new (allocator) HGraph( allocator, arena_stack, dex_file, method_idx, - compiler_driver->GetInstructionSet(), + compiler_options.GetInstructionSet(), kInvalidInvokeType, - compiler_driver->GetCompilerOptions().GetDebuggable(), - osr); + dead_reference_safe, + compiler_options.GetDebuggable(), + /* osr= */ osr); - ArrayRef<const uint8_t> interpreter_metadata; - // For AOT compilation, we may not get a method, for example if its class is erroneous. - // JIT should always have a method. - DCHECK(Runtime::Current()->IsAotCompiler() || method != nullptr); if (method != nullptr) { graph->SetArtMethod(method); - ScopedObjectAccess soa(Thread::Current()); - interpreter_metadata = method->GetQuickenedInfo(); } std::unique_ptr<CodeGenerator> codegen( CodeGenerator::Create(graph, - instruction_set, - *compiler_driver->GetInstructionSetFeatures(), - compiler_driver->GetCompilerOptions(), + compiler_options, compilation_stats_.get())); if (codegen.get() == nullptr) { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledNoCodegen); return nullptr; } - codegen->GetAssembler()->cfi().SetEnabled( - compiler_driver->GetCompilerOptions().GenerateAnyDebugInfo()); + codegen->GetAssembler()->cfi().SetEnabled(compiler_options.GenerateAnyDebugInfo()); PassObserver pass_observer(graph, codegen.get(), visualizer_output_.get(), - compiler_driver, + compiler_options, dump_mutex_); { @@ -822,7 +878,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, code_item_accessor, &dex_compilation_unit, &dex_compilation_unit, - compiler_driver, codegen.get(), compilation_stats_.get(), interpreter_metadata, @@ -833,23 +888,28 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, case kAnalysisSkipped: { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledSkipped); - } break; + } case kAnalysisInvalidBytecode: { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledInvalidBytecode); - } break; + } case kAnalysisFailThrowCatchLoop: { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledThrowCatchLoop); - } break; + } case kAnalysisFailAmbiguousArrayOp: { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledAmbiguousArrayOp); + break; } + case kAnalysisFailIrreducibleLoopAndStringInit: { + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledIrreducibleLoopAndStringInit); break; + } case kAnalysisSuccess: UNREACHABLE(); } @@ -858,11 +918,11 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, } } - RunOptimizations(graph, - codegen.get(), - dex_compilation_unit, - &pass_observer, - handles); + if (baseline) { + RunBaselineOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer, handles); + } else { + RunOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer, handles); + } RegisterAllocator::Strategy regalloc_strategy = compiler_options.GetRegisterAllocationStrategy(); @@ -887,8 +947,8 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( ArtMethod* method, VariableSizedHandleScope* handles) const { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kAttemptIntrinsicCompilation); - CompilerDriver* compiler_driver = GetCompilerDriver(); - InstructionSet instruction_set = compiler_driver->GetInstructionSet(); + const CompilerOptions& compiler_options = GetCompilerOptions(); + InstructionSet instruction_set = compiler_options.GetInstructionSet(); const DexFile& dex_file = *dex_compilation_unit.GetDexFile(); uint32_t method_idx = dex_compilation_unit.GetDexMethodIndex(); @@ -906,10 +966,11 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( arena_stack, dex_file, method_idx, - compiler_driver->GetInstructionSet(), + compiler_options.GetInstructionSet(), kInvalidInvokeType, - compiler_driver->GetCompilerOptions().GetDebuggable(), - /* osr */ false); + /* dead_reference_safe= */ true, // Intrinsics don't affect dead reference safety. + compiler_options.GetDebuggable(), + /* osr= */ false); DCHECK(Runtime::Current()->IsAotCompiler()); DCHECK(method != nullptr); @@ -917,20 +978,17 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( std::unique_ptr<CodeGenerator> codegen( CodeGenerator::Create(graph, - instruction_set, - *compiler_driver->GetInstructionSetFeatures(), - compiler_driver->GetCompilerOptions(), + compiler_options, compilation_stats_.get())); if (codegen.get() == nullptr) { return nullptr; } - codegen->GetAssembler()->cfi().SetEnabled( - compiler_driver->GetCompilerOptions().GenerateAnyDebugInfo()); + codegen->GetAssembler()->cfi().SetEnabled(compiler_options.GenerateAnyDebugInfo()); PassObserver pass_observer(graph, codegen.get(), visualizer_output_.get(), - compiler_driver, + compiler_options, dump_mutex_); { @@ -940,18 +998,16 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( CodeItemDebugInfoAccessor(), // Null code item. &dex_compilation_unit, &dex_compilation_unit, - compiler_driver, codegen.get(), compilation_stats_.get(), - /* interpreter_metadata */ ArrayRef<const uint8_t>(), + /* interpreter_metadata= */ ArrayRef<const uint8_t>(), handles); builder.BuildIntrinsicGraph(method); } OptimizationDef optimizations[] = { - OptDef(OptimizationPass::kIntrinsicsRecognizer), - // Some intrinsics are converted to HIR by the simplifier and the codegen also - // has a few assumptions that only the instruction simplifier can satisfy. + // The codegen has a few assumptions that only the instruction simplifier + // can satisfy. OptDef(OptimizationPass::kInstructionSimplifier), }; RunOptimizations(graph, @@ -966,7 +1022,7 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( AllocateRegisters(graph, codegen.get(), &pass_observer, - compiler_driver->GetCompilerOptions().GetRegisterAllocationStrategy(), + compiler_options.GetRegisterAllocationStrategy(), compilation_stats_.get()); if (!codegen->IsLeafMethod()) { VLOG(compiler) << "Intrinsic method is not leaf: " << method->GetIntrinsic() @@ -983,7 +1039,7 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( return codegen.release(); } -CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, +CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item, uint32_t access_flags, InvokeType invoke_type, uint16_t class_def_idx, @@ -991,13 +1047,13 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, Handle<mirror::ClassLoader> jclass_loader, const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const { - CompilerDriver* compiler_driver = GetCompilerDriver(); + const CompilerOptions& compiler_options = GetCompilerOptions(); CompiledMethod* compiled_method = nullptr; Runtime* runtime = Runtime::Current(); DCHECK(runtime->IsAotCompiler()); - const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx); + const VerifiedMethod* verified_method = compiler_options.GetVerifiedMethod(&dex_file, method_idx); DCHECK(!verified_method->HasRuntimeThrow()); - if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) || + if (compiler_options.IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) || verifier::CanCompilerHandleVerificationFailure( verified_method->GetEncounteredVerificationFailures())) { ArenaAllocator allocator(runtime->GetArenaPool()); @@ -1006,6 +1062,15 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, std::unique_ptr<CodeGenerator> codegen; bool compiled_intrinsic = false; { + ScopedObjectAccess soa(Thread::Current()); + ArtMethod* method = + runtime->GetClassLinker()->ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>( + method_idx, dex_cache, jclass_loader, /*referrer=*/ nullptr, invoke_type); + DCHECK_EQ(method == nullptr, soa.Self()->IsExceptionPending()); + soa.Self()->ClearException(); // Suppress exception if any. + VariableSizedHandleScope handles(soa.Self()); + Handle<mirror::Class> compiling_class = + handles.NewHandle(method != nullptr ? method->GetDeclaringClass() : nullptr); DexCompilationUnit dex_compilation_unit( jclass_loader, runtime->GetClassLinker(), @@ -1014,16 +1079,13 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, class_def_idx, method_idx, access_flags, - /* verified_method */ nullptr, // Not needed by the Optimizing compiler. - dex_cache); - ScopedObjectAccess soa(Thread::Current()); - ArtMethod* method = compiler_driver->ResolveMethod( - soa, dex_cache, jclass_loader, &dex_compilation_unit, method_idx, invoke_type); - VariableSizedHandleScope handles(soa.Self()); + /*verified_method=*/ nullptr, // Not needed by the Optimizing compiler. + dex_cache, + compiling_class); // Go to native so that we don't block GC during compilation. ScopedThreadSuspension sts(soa.Self(), kNative); if (method != nullptr && UNLIKELY(method->IsIntrinsic())) { - DCHECK(compiler_driver->GetCompilerOptions().IsBootImage()); + DCHECK(compiler_options.IsBootImage()); codegen.reset( TryCompileIntrinsic(&allocator, &arena_stack, @@ -1042,7 +1104,8 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, &code_allocator, dex_compilation_unit, method, - /* osr */ false, + compiler_options.IsBaseline(), + /* osr= */ false, &handles)); } } @@ -1070,7 +1133,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, } } else { MethodCompilationStat method_stat; - if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { + if (compiler_options.VerifyAtRuntime()) { method_stat = MethodCompilationStat::kNotCompiledVerifyAtRuntime; } else { method_stat = MethodCompilationStat::kNotCompiledVerificationError; @@ -1079,8 +1142,8 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, } if (kIsDebugBuild && - IsCompilingWithCoreImage() && - IsInstructionSetSupported(compiler_driver->GetInstructionSet())) { + compiler_options.CompilingWithCoreImage() && + IsInstructionSetSupported(compiler_options.GetInstructionSet())) { // For testing purposes, we put a special marker on method names // that should be compiled with this compiler (when the // instruction set is supported). This makes sure we're not @@ -1093,31 +1156,50 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, return compiled_method; } +static ScopedArenaVector<uint8_t> CreateJniStackMap(ScopedArenaAllocator* allocator, + const JniCompiledMethod& jni_compiled_method) { + // StackMapStream is quite large, so allocate it using the ScopedArenaAllocator + // to stay clear of the frame size limit. + std::unique_ptr<StackMapStream> stack_map_stream( + new (allocator) StackMapStream(allocator, jni_compiled_method.GetInstructionSet())); + stack_map_stream->BeginMethod( + jni_compiled_method.GetFrameSize(), + jni_compiled_method.GetCoreSpillMask(), + jni_compiled_method.GetFpSpillMask(), + /* num_dex_registers= */ 0); + stack_map_stream->EndMethod(); + return stack_map_stream->Encode(); +} + CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, uint32_t method_idx, const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const { - if (GetCompilerDriver()->GetCompilerOptions().IsBootImage()) { + Runtime* runtime = Runtime::Current(); + ArenaAllocator allocator(runtime->GetArenaPool()); + ArenaStack arena_stack(runtime->GetArenaPool()); + + const CompilerOptions& compiler_options = GetCompilerOptions(); + if (compiler_options.IsBootImage()) { ScopedObjectAccess soa(Thread::Current()); - Runtime* runtime = Runtime::Current(); ArtMethod* method = runtime->GetClassLinker()->LookupResolvedMethod( - method_idx, dex_cache.Get(), /* class_loader */ nullptr); + method_idx, dex_cache.Get(), /*class_loader=*/ nullptr); if (method != nullptr && UNLIKELY(method->IsIntrinsic())) { + VariableSizedHandleScope handles(soa.Self()); ScopedNullHandle<mirror::ClassLoader> class_loader; // null means boot class path loader. + Handle<mirror::Class> compiling_class = handles.NewHandle(method->GetDeclaringClass()); DexCompilationUnit dex_compilation_unit( class_loader, runtime->GetClassLinker(), dex_file, - /* code_item */ nullptr, - /* class_def_idx */ DexFile::kDexNoIndex16, + /*code_item=*/ nullptr, + /*class_def_idx=*/ DexFile::kDexNoIndex16, method_idx, access_flags, - /* verified_method */ nullptr, - dex_cache); - ArenaAllocator allocator(runtime->GetArenaPool()); - ArenaStack arena_stack(runtime->GetArenaPool()); + /*verified_method=*/ nullptr, + dex_cache, + compiling_class); CodeVectorAllocator code_allocator(&allocator); - VariableSizedHandleScope handles(soa.Self()); // Go to native so that we don't block GC during compilation. ScopedThreadSuspension sts(soa.Self(), kNative); std::unique_ptr<CodeGenerator> codegen( @@ -1131,7 +1213,7 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, CompiledMethod* compiled_method = Emit(&allocator, &code_allocator, codegen.get(), - /* code_item_for_osr_check */ nullptr); + /* item= */ nullptr); compiled_method->MarkAsIntrinsic(); return compiled_method; } @@ -1139,28 +1221,24 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, } JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod( - GetCompilerDriver(), access_flags, method_idx, dex_file); + compiler_options, access_flags, method_idx, dex_file); MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledNativeStub); + + ScopedArenaAllocator stack_map_allocator(&arena_stack); // Will hold the stack map. + ScopedArenaVector<uint8_t> stack_map = CreateJniStackMap(&stack_map_allocator, + jni_compiled_method); return CompiledMethod::SwapAllocCompiledMethod( - GetCompilerDriver(), + GetCompiledMethodStorage(), jni_compiled_method.GetInstructionSet(), jni_compiled_method.GetCode(), - jni_compiled_method.GetFrameSize(), - jni_compiled_method.GetCoreSpillMask(), - jni_compiled_method.GetFpSpillMask(), - /* method_info */ ArrayRef<const uint8_t>(), - /* vmap_table */ ArrayRef<const uint8_t>(), + ArrayRef<const uint8_t>(stack_map), jni_compiled_method.GetCfi(), - /* patches */ ArrayRef<const linker::LinkerPatch>()); + /* patches= */ ArrayRef<const linker::LinkerPatch>()); } -Compiler* CreateOptimizingCompiler(CompilerDriver* driver) { - return new OptimizingCompiler(driver); -} - -bool IsCompilingWithCoreImage() { - const std::string& image = Runtime::Current()->GetImageLocation(); - return CompilerDriver::IsCoreImageFilename(image); +Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options, + CompiledMethodStorage* storage) { + return new OptimizingCompiler(compiler_options, storage); } bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) { @@ -1168,23 +1246,10 @@ bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) { return Runtime::Current() == nullptr || !Runtime::Current()->IsAotCompiler(); } -bool CanEncodeInlinedMethodInStackMap(const DexFile& caller_dex_file, ArtMethod* callee) { - if (!Runtime::Current()->IsAotCompiler()) { - // JIT can always encode methods in stack maps. - return true; - } - if (IsSameDexFile(caller_dex_file, *callee->GetDexFile())) { - return true; - } - // TODO(ngeoffray): Support more AOT cases for inlining: - // - methods in multidex - // - methods in boot image for on-device non-PIC compilation. - return false; -} - bool OptimizingCompiler::JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, + bool baseline, bool osr, jit::JitLogger* jit_logger) { StackHandleScope<3> hs(self); @@ -1195,7 +1260,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, const DexFile* dex_file = method->GetDexFile(); const uint16_t class_def_idx = method->GetClassDefIndex(); - const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset()); + const dex::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset()); const uint32_t method_idx = method->GetDexMethodIndex(); const uint32_t access_flags = method->GetAccessFlags(); @@ -1203,37 +1268,52 @@ bool OptimizingCompiler::JitCompile(Thread* self, ArenaAllocator allocator(runtime->GetJitArenaPool()); if (UNLIKELY(method->IsNative())) { + const CompilerOptions& compiler_options = GetCompilerOptions(); JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod( - GetCompilerDriver(), access_flags, method_idx, *dex_file); - ScopedNullHandle<mirror::ObjectArray<mirror::Object>> roots; + compiler_options, access_flags, method_idx, *dex_file); + std::vector<Handle<mirror::Object>> roots; ArenaSet<ArtMethod*, std::less<ArtMethod*>> cha_single_implementation_list( allocator.Adapter(kArenaAllocCHA)); + ArenaStack arena_stack(runtime->GetJitArenaPool()); + // StackMapStream is large and it does not fit into this frame, so we need helper method. + ScopedArenaAllocator stack_map_allocator(&arena_stack); // Will hold the stack map. + ScopedArenaVector<uint8_t> stack_map = CreateJniStackMap(&stack_map_allocator, + jni_compiled_method); + uint8_t* stack_map_data = nullptr; + uint8_t* roots_data = nullptr; + uint32_t data_size = code_cache->ReserveData(self, + stack_map.size(), + /* number_of_roots= */ 0, + method, + &stack_map_data, + &roots_data); + if (stack_map_data == nullptr || roots_data == nullptr) { + MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit); + return false; + } + memcpy(stack_map_data, stack_map.data(), stack_map.size()); + const void* code = code_cache->CommitCode( self, method, - /* stack_map_data */ nullptr, - /* method_info_data */ nullptr, - /* roots_data */ nullptr, - jni_compiled_method.GetFrameSize(), - jni_compiled_method.GetCoreSpillMask(), - jni_compiled_method.GetFpSpillMask(), + stack_map_data, + roots_data, jni_compiled_method.GetCode().data(), jni_compiled_method.GetCode().size(), - /* data_size */ 0u, + data_size, osr, roots, - /* has_should_deoptimize_flag */ false, + /* has_should_deoptimize_flag= */ false, cha_single_implementation_list); if (code == nullptr) { return false; } - const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); if (compiler_options.GenerateAnyDebugInfo()) { const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); debug::MethodDebugInfo info = {}; - DCHECK(info.custom_name.empty()); + info.custom_name = "art_jni_trampoline"; info.dex_file = dex_file; info.class_def_index = class_def_idx; info.dex_method_index = method_idx; @@ -1265,6 +1345,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, std::unique_ptr<CodeGenerator> codegen; { + Handle<mirror::Class> compiling_class = handles.NewHandle(method->GetDeclaringClass()); DexCompilationUnit dex_compilation_unit( class_loader, runtime->GetClassLinker(), @@ -1273,8 +1354,9 @@ bool OptimizingCompiler::JitCompile(Thread* self, class_def_idx, method_idx, access_flags, - /* verified_method */ nullptr, - dex_cache); + /*verified_method=*/ nullptr, + dex_cache, + compiling_class); // Go to native so that we don't block GC during compilation. ScopedThreadSuspension sts(self, kNative); @@ -1284,6 +1366,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, &code_allocator, dex_compilation_unit, method, + baseline, osr, &handles)); if (codegen.get() == nullptr) { @@ -1291,55 +1374,37 @@ bool OptimizingCompiler::JitCompile(Thread* self, } } - size_t stack_map_size = 0; - size_t method_info_size = 0; - codegen->ComputeStackMapAndMethodInfoSize(&stack_map_size, &method_info_size); + ScopedArenaVector<uint8_t> stack_map = codegen->BuildStackMaps(code_item); size_t number_of_roots = codegen->GetNumberOfJitRoots(); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - // We allocate an object array to ensure the JIT roots that we will collect in EmitJitRoots - // will be visible by the GC between EmitLiterals and CommitCode. Once CommitCode is - // executed, this array is not needed. - Handle<mirror::ObjectArray<mirror::Object>> roots( - hs.NewHandle(mirror::ObjectArray<mirror::Object>::Alloc( - self, class_linker->GetClassRoot(ClassLinker::kObjectArrayClass), number_of_roots))); - if (roots == nullptr) { - // Out of memory, just clear the exception to avoid any Java exception uncaught problems. - MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit); - DCHECK(self->IsExceptionPending()); - self->ClearException(); - return false; - } uint8_t* stack_map_data = nullptr; - uint8_t* method_info_data = nullptr; uint8_t* roots_data = nullptr; uint32_t data_size = code_cache->ReserveData(self, - stack_map_size, - method_info_size, + stack_map.size(), number_of_roots, method, &stack_map_data, - &method_info_data, &roots_data); if (stack_map_data == nullptr || roots_data == nullptr) { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit); return false; } - codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), - MemoryRegion(method_info_data, method_info_size), - code_item); - codegen->EmitJitRoots(code_allocator.GetData(), roots, roots_data); + memcpy(stack_map_data, stack_map.data(), stack_map.size()); + std::vector<Handle<mirror::Object>> roots; + codegen->EmitJitRoots(code_allocator.GetData(), roots_data, &roots); + // The root Handle<>s filled by the codegen reference entries in the VariableSizedHandleScope. + DCHECK(std::all_of(roots.begin(), + roots.end(), + [&handles](Handle<mirror::Object> root){ + return handles.Contains(root.GetReference()); + })); const void* code = code_cache->CommitCode( self, method, stack_map_data, - method_info_data, roots_data, - codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), - codegen->GetCoreSpillMask(), - codegen->GetFpuSpillMask(), code_allocator.GetMemory().data(), - code_allocator.GetSize(), + code_allocator.GetMemory().size(), data_size, osr, roots, @@ -1352,7 +1417,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, return false; } - const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); + const CompilerOptions& compiler_options = GetCompilerOptions(); if (compiler_options.GenerateAnyDebugInfo()) { const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); @@ -1369,16 +1434,16 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.is_optimized = true; info.is_code_address_text_relative = false; info.code_address = code_address; - info.code_size = code_allocator.GetSize(); + info.code_size = code_allocator.GetMemory().size(); info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); - info.code_info = stack_map_size == 0 ? nullptr : stack_map_data; + info.code_info = stack_map.size() == 0 ? nullptr : stack_map_data; info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()); GenerateJitDebugInfo(method, info); } Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); if (jit_logger != nullptr) { - jit_logger->WriteLog(code, code_allocator.GetSize(), method); + jit_logger->WriteLog(code, code_allocator.GetMemory().size(), method); } if (kArenaAllocatorCountAllocations) { @@ -1397,26 +1462,31 @@ bool OptimizingCompiler::JitCompile(Thread* self, return true; } -void OptimizingCompiler::GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo info) { - const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); +void OptimizingCompiler::GenerateJitDebugInfo(ArtMethod* method ATTRIBUTE_UNUSED, + const debug::MethodDebugInfo& info) { + const CompilerOptions& compiler_options = GetCompilerOptions(); DCHECK(compiler_options.GenerateAnyDebugInfo()); - - // If both flags are passed, generate full debug info. - const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); - - // Create entry for the single method that we just compiled. - std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( - GetCompilerDriver()->GetInstructionSet(), - GetCompilerDriver()->GetInstructionSetFeatures(), - mini_debug_info, - ArrayRef<const debug::MethodDebugInfo>(&info, 1)); - MutexLock mu(Thread::Current(), *Locks::native_debug_interface_lock_); - AddNativeDebugInfoForJit(reinterpret_cast<const void*>(info.code_address), elf_file); - - VLOG(jit) - << "JIT mini-debug-info added for " << ArtMethod::PrettyMethod(method) - << " size=" << PrettySize(elf_file.size()) - << " total_size=" << PrettySize(GetJitNativeDebugInfoMemUsage()); + TimingLogger logger("Generate JIT debug info logger", true, VLOG_IS_ON(jit)); + { + TimingLogger::ScopedTiming st("Generate JIT debug info", &logger); + + // If both flags are passed, generate full debug info. + const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); + + // Create entry for the single method that we just compiled. + std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( + compiler_options.GetInstructionSet(), + compiler_options.GetInstructionSetFeatures(), + mini_debug_info, + info); + AddNativeDebugInfoForJit(Thread::Current(), + reinterpret_cast<const void*>(info.code_address), + elf_file, + debug::PackElfFileForJIT, + compiler_options.GetInstructionSet(), + compiler_options.GetInstructionSetFeatures()); + } + Runtime::Current()->GetJit()->AddTimingLogger(logger); } } // namespace art diff --git a/compiler/optimizing/optimizing_compiler.h b/compiler/optimizing/optimizing_compiler.h index d8cea30a6b..cd6d684590 100644 --- a/compiler/optimizing/optimizing_compiler.h +++ b/compiler/optimizing/optimizing_compiler.h @@ -17,26 +17,21 @@ #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_H_ #define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_H_ +#include "base/globals.h" #include "base/mutex.h" -#include "globals.h" namespace art { class ArtMethod; class Compiler; -class CompilerDriver; +class CompiledMethodStorage; +class CompilerOptions; class DexFile; -Compiler* CreateOptimizingCompiler(CompilerDriver* driver); - -// Returns whether we are compiling against a "core" image, which -// is an indicative we are running tests. The compiler will use that -// information for checking invariants. -bool IsCompilingWithCoreImage(); +Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options, + CompiledMethodStorage* storage); bool EncodeArtMethodInInlineInfo(ArtMethod* method); -bool CanEncodeInlinedMethodInStackMap(const DexFile& caller_dex_file, ArtMethod* callee) - REQUIRES_SHARED(Locks::mutator_lock_); } // namespace art diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 00194ff1fe..ddd57f5f1a 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -22,9 +22,10 @@ #include <string> #include <type_traits> +#include <android-base/logging.h> + #include "base/atomic.h" #include "base/globals.h" -#include "base/logging.h" // For VLOG_IS_ON. namespace art { @@ -59,6 +60,7 @@ enum class MethodCompilationStat { kNotCompiledUnsupportedIsa, kNotCompiledVerificationError, kNotCompiledVerifyAtRuntime, + kNotCompiledIrreducibleLoopAndStringInit, kInlinedMonomorphicCall, kInlinedPolymorphicCall, kMonomorphicCall, @@ -99,6 +101,7 @@ enum class MethodCompilationStat { kConstructorFenceRemovedLSE, kConstructorFenceRemovedPFRA, kConstructorFenceRemovedCFRE, + kBitstringTypeCheck, kJitOutOfMemoryForCommit, kLastStat }; @@ -124,11 +127,6 @@ class OptimizingCompilerStats { } void Log() const { - if (!kIsDebugBuild && !VLOG_IS_ON(compiler)) { - // Log only in debug builds or if the compiler is verbose. - return; - } - uint32_t compiled_intrinsics = GetStat(MethodCompilationStat::kCompiledIntrinsic); uint32_t compiled_native_stubs = GetStat(MethodCompilationStat::kCompiledNativeStub); uint32_t bytecode_attempts = diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 6dcbadba6e..e5f694109a 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -20,6 +20,7 @@ #include <memory> #include <vector> +#include "base/malloc_arena_pool.h" #include "base/scoped_arena_allocator.h" #include "builder.h" #include "common_compiler_test.h" @@ -28,6 +29,7 @@ #include "dex/dex_instruction.h" #include "dex/standard_dex_file.h" #include "driver/dex_compilation_unit.h" +#include "graph_checker.h" #include "handle_scope-inl.h" #include "mirror/class_loader.h" #include "mirror/dex_cache.h" @@ -97,7 +99,7 @@ class ArenaPoolAndAllocator { ScopedArenaAllocator* GetScopedAllocator() { return &scoped_allocator_; } private: - ArenaPool pool_; + MallocArenaPool pool_; ArenaAllocator allocator_; ArenaStack arena_stack_; ScopedArenaAllocator scoped_allocator_; @@ -153,7 +155,7 @@ class OptimizingUnitTestHelper { void* aligned_data = GetAllocator()->Alloc(code_item_size); memcpy(aligned_data, &data[0], code_item_size); CHECK_ALIGNED(aligned_data, StandardDexFile::CodeItem::kAlignment); - const DexFile::CodeItem* code_item = reinterpret_cast<const DexFile::CodeItem*>(aligned_data); + const dex::CodeItem* code_item = reinterpret_cast<const dex::CodeItem*>(aligned_data); { ScopedObjectAccess soa(Thread::Current()); @@ -163,13 +165,13 @@ class OptimizingUnitTestHelper { const DexCompilationUnit* dex_compilation_unit = new (graph->GetAllocator()) DexCompilationUnit( handles_->NewHandle<mirror::ClassLoader>(nullptr), - /* class_linker */ nullptr, + /* class_linker= */ nullptr, graph->GetDexFile(), code_item, - /* class_def_index */ DexFile::kDexNoIndex16, - /* method_idx */ dex::kDexNoIndex, - /* access_flags */ 0u, - /* verified_method */ nullptr, + /* class_def_index= */ DexFile::kDexNoIndex16, + /* method_idx= */ dex::kDexNoIndex, + /* access_flags= */ 0u, + /* verified_method= */ nullptr, handles_->NewHandle<mirror::DexCache>(nullptr)); CodeItemDebugInfoAccessor accessor(graph->GetDexFile(), code_item, /*dex_method_idx*/ 0u); HGraphBuilder builder(graph, dex_compilation_unit, accessor, handles_.get(), return_type); @@ -186,6 +188,77 @@ class OptimizingUnitTestHelper { class OptimizingUnitTest : public CommonCompilerTest, public OptimizingUnitTestHelper {}; +// OptimizingUnitTest with some handy functions to ease the graph creation. +class ImprovedOptimizingUnitTest : public OptimizingUnitTest { + public: + ImprovedOptimizingUnitTest() : graph_(CreateGraph()), + entry_block_(nullptr), + return_block_(nullptr), + exit_block_(nullptr), + parameter_(nullptr) {} + + virtual ~ImprovedOptimizingUnitTest() {} + + void InitGraph() { + entry_block_ = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(entry_block_); + graph_->SetEntryBlock(entry_block_); + + return_block_ = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(return_block_); + + exit_block_ = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(exit_block_); + graph_->SetExitBlock(exit_block_); + + entry_block_->AddSuccessor(return_block_); + return_block_->AddSuccessor(exit_block_); + + parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kInt32); + entry_block_->AddInstruction(parameter_); + return_block_->AddInstruction(new (GetAllocator()) HReturnVoid()); + exit_block_->AddInstruction(new (GetAllocator()) HExit()); + } + + bool CheckGraph() { + GraphChecker checker(graph_); + checker.Run(); + if (!checker.IsValid()) { + for (const std::string& error : checker.GetErrors()) { + std::cout << error << std::endl; + } + return false; + } + return true; + } + + HEnvironment* ManuallyBuildEnvFor(HInstruction* instruction, + ArenaVector<HInstruction*>* current_locals) { + HEnvironment* environment = new (GetAllocator()) HEnvironment( + (GetAllocator()), + current_locals->size(), + graph_->GetArtMethod(), + instruction->GetDexPc(), + instruction); + + environment->CopyFrom(ArrayRef<HInstruction* const>(*current_locals)); + instruction->SetRawEnvironment(environment); + return environment; + } + + protected: + HGraph* graph_; + + HBasicBlock* entry_block_; + HBasicBlock* return_block_; + HBasicBlock* exit_block_; + + HInstruction* parameter_; +}; + // Naive string diff data type. typedef std::list<std::pair<std::string, std::string>> diff_t; diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index e6e069f96e..5fadcab402 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -58,7 +58,7 @@ class ParallelMoveResolverWithSwap : public ParallelMoveResolver { virtual ~ParallelMoveResolverWithSwap() {} // Resolve a set of parallel moves, emitting assembler instructions. - void EmitNativeCode(HParallelMove* parallel_move) OVERRIDE; + void EmitNativeCode(HParallelMove* parallel_move) override; protected: class ScratchRegisterScope : public ValueObject { @@ -133,7 +133,7 @@ class ParallelMoveResolverNoSwap : public ParallelMoveResolver { virtual ~ParallelMoveResolverNoSwap() {} // Resolve a set of parallel moves, emitting assembler instructions. - void EmitNativeCode(HParallelMove* parallel_move) OVERRIDE; + void EmitNativeCode(HParallelMove* parallel_move) override; protected: // Called at the beginning of EmitNativeCode(). A subclass may put some architecture dependent diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index cb87cabe1c..a8ab6cdd0c 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -15,6 +15,7 @@ */ #include "base/arena_allocator.h" +#include "base/malloc_arena_pool.h" #include "nodes.h" #include "parallel_move_resolver.h" @@ -55,7 +56,7 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { explicit TestParallelMoveResolverWithSwap(ArenaAllocator* allocator) : ParallelMoveResolverWithSwap(allocator) {} - void EmitMove(size_t index) OVERRIDE { + void EmitMove(size_t index) override { MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; @@ -67,7 +68,7 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { message_ << ")"; } - void EmitSwap(size_t index) OVERRIDE { + void EmitSwap(size_t index) override { MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; @@ -79,8 +80,8 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { message_ << ")"; } - void SpillScratch(int reg ATTRIBUTE_UNUSED) OVERRIDE {} - void RestoreScratch(int reg ATTRIBUTE_UNUSED) OVERRIDE {} + void SpillScratch(int reg ATTRIBUTE_UNUSED) override {} + void RestoreScratch(int reg ATTRIBUTE_UNUSED) override {} std::string GetMessage() const { return message_.str(); @@ -98,13 +99,13 @@ class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap { explicit TestParallelMoveResolverNoSwap(ArenaAllocator* allocator) : ParallelMoveResolverNoSwap(allocator), scratch_index_(kScratchRegisterStartIndexForTest) {} - void PrepareForEmitNativeCode() OVERRIDE { + void PrepareForEmitNativeCode() override { scratch_index_ = kScratchRegisterStartIndexForTest; } - void FinishEmitNativeCode() OVERRIDE {} + void FinishEmitNativeCode() override {} - Location AllocateScratchLocationFor(Location::Kind kind) OVERRIDE { + Location AllocateScratchLocationFor(Location::Kind kind) override { if (kind == Location::kStackSlot || kind == Location::kFpuRegister || kind == Location::kRegister) { kind = Location::kRegister; @@ -124,9 +125,9 @@ class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap { return scratch; } - void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) OVERRIDE {} + void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) override {} - void EmitMove(size_t index) OVERRIDE { + void EmitMove(size_t index) override { MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; @@ -173,14 +174,14 @@ class ParallelMoveTest : public ::testing::Test { template<> const bool ParallelMoveTest<TestParallelMoveResolverWithSwap>::has_swap = true; template<> const bool ParallelMoveTest<TestParallelMoveResolverNoSwap>::has_swap = false; -typedef ::testing::Types<TestParallelMoveResolverWithSwap, TestParallelMoveResolverNoSwap> - ParallelMoveResolverTestTypes; +using ParallelMoveResolverTestTypes = + ::testing::Types<TestParallelMoveResolverWithSwap, TestParallelMoveResolverNoSwap>; TYPED_TEST_CASE(ParallelMoveTest, ParallelMoveResolverTestTypes); TYPED_TEST(ParallelMoveTest, Dependency) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -207,7 +208,7 @@ TYPED_TEST(ParallelMoveTest, Dependency) { } TYPED_TEST(ParallelMoveTest, Cycle) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -257,7 +258,7 @@ TYPED_TEST(ParallelMoveTest, Cycle) { } TYPED_TEST(ParallelMoveTest, ConstantLast) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); @@ -276,7 +277,7 @@ TYPED_TEST(ParallelMoveTest, ConstantLast) { } TYPED_TEST(ParallelMoveTest, Pairs) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -453,7 +454,7 @@ TYPED_TEST(ParallelMoveTest, Pairs) { } TYPED_TEST(ParallelMoveTest, MultiCycles) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -551,7 +552,7 @@ TYPED_TEST(ParallelMoveTest, MultiCycles) { // Test that we do 64bits moves before 32bits moves. TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -610,7 +611,7 @@ TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) { } TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves2) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index 9d5358514e..05208ff65c 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -58,7 +58,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { DCHECK(base_ != nullptr); } - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override { // If this is an invoke with PC-relative load kind, // we need to add the base as the special input. if (invoke->HasPcRelativeMethodLoadKind() && @@ -70,13 +70,13 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } } - void VisitLoadClass(HLoadClass* load_class) OVERRIDE { + void VisitLoadClass(HLoadClass* load_class) override { HLoadClass::LoadKind load_kind = load_class->GetLoadKind(); switch (load_kind) { case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: + case HLoadClass::LoadKind::kJitBootImageAddress: // Add a base register for PC-relative literals on R2. InitializePCRelativeBasePointer(); load_class->AddSpecialInput(base_); @@ -86,13 +86,13 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } } - void VisitLoadString(HLoadString* load_string) OVERRIDE { + void VisitLoadString(HLoadString* load_string) override { HLoadString::LoadKind load_kind = load_string->GetLoadKind(); switch (load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageAddress: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: + case HLoadString::LoadKind::kJitBootImageAddress: // Add a base register for PC-relative literals on R2. InitializePCRelativeBasePointer(); load_string->AddSpecialInput(base_); @@ -102,7 +102,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } } - void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE { + void VisitPackedSwitch(HPackedSwitch* switch_insn) override { if (switch_insn->GetNumEntries() <= InstructionCodeGeneratorMIPS::kPackedSwitchJumpTableThreshold) { return; @@ -128,20 +128,21 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { HMipsComputeBaseMethodAddress* base_; }; -void PcRelativeFixups::Run() { +bool PcRelativeFixups::Run() { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_); if (mips_codegen->GetInstructionSetFeatures().IsR6()) { // Do nothing for R6 because it has PC-relative addressing. - return; + return false; } if (graph_->HasIrreducibleLoops()) { // Do not run this optimization, as irreducible loops do not work with an instruction // that can be live-in at the irreducible loop header. - return; + return false; } PCRelativeHandlerVisitor visitor(graph_, codegen_); visitor.VisitInsertionOrder(); visitor.MoveBaseIfNeeded(); + return true; } } // namespace mips diff --git a/compiler/optimizing/pc_relative_fixups_mips.h b/compiler/optimizing/pc_relative_fixups_mips.h index ec2c711f8d..872370bcb7 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.h +++ b/compiler/optimizing/pc_relative_fixups_mips.h @@ -34,7 +34,7 @@ class PcRelativeFixups : public HOptimization { static constexpr const char* kPcRelativeFixupsMipsPassName = "pc_relative_fixups_mips"; - void Run() OVERRIDE; + bool Run() override; private: CodeGenerator* codegen_; diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index f92f4b274a..1d8d1a6e90 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -41,60 +41,54 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } private: - void VisitAdd(HAdd* add) OVERRIDE { + void VisitAdd(HAdd* add) override { BinaryFP(add); } - void VisitSub(HSub* sub) OVERRIDE { + void VisitSub(HSub* sub) override { BinaryFP(sub); } - void VisitMul(HMul* mul) OVERRIDE { + void VisitMul(HMul* mul) override { BinaryFP(mul); } - void VisitDiv(HDiv* div) OVERRIDE { + void VisitDiv(HDiv* div) override { BinaryFP(div); } - void VisitCompare(HCompare* compare) OVERRIDE { + void VisitCompare(HCompare* compare) override { BinaryFP(compare); } - void VisitReturn(HReturn* ret) OVERRIDE { + void VisitReturn(HReturn* ret) override { HConstant* value = ret->InputAt(0)->AsConstant(); if ((value != nullptr && DataType::IsFloatingPointType(value->GetType()))) { ReplaceInput(ret, value, 0, true); } } - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override { HandleInvoke(invoke); } - void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE { + void VisitInvokeVirtual(HInvokeVirtual* invoke) override { HandleInvoke(invoke); } - void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE { + void VisitInvokeInterface(HInvokeInterface* invoke) override { HandleInvoke(invoke); } - void VisitLoadClass(HLoadClass* load_class) OVERRIDE { - HLoadClass::LoadKind load_kind = load_class->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadClass::LoadKind::kBootImageClassTable || - load_kind == HLoadClass::LoadKind::kBssEntry) { + void VisitLoadClass(HLoadClass* load_class) override { + if (load_class->HasPcRelativeLoadKind()) { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_class); load_class->AddSpecialInput(method_address); } } - void VisitLoadString(HLoadString* load_string) OVERRIDE { - HLoadString::LoadKind load_kind = load_string->GetLoadKind(); - if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadString::LoadKind::kBootImageInternTable || - load_kind == HLoadString::LoadKind::kBssEntry) { + void VisitLoadString(HLoadString* load_string) override { + if (load_string->HasPcRelativeLoadKind()) { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_string); load_string->AddSpecialInput(method_address); } @@ -107,31 +101,31 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } } - void VisitEqual(HEqual* cond) OVERRIDE { + void VisitEqual(HEqual* cond) override { BinaryFP(cond); } - void VisitNotEqual(HNotEqual* cond) OVERRIDE { + void VisitNotEqual(HNotEqual* cond) override { BinaryFP(cond); } - void VisitLessThan(HLessThan* cond) OVERRIDE { + void VisitLessThan(HLessThan* cond) override { BinaryFP(cond); } - void VisitLessThanOrEqual(HLessThanOrEqual* cond) OVERRIDE { + void VisitLessThanOrEqual(HLessThanOrEqual* cond) override { BinaryFP(cond); } - void VisitGreaterThan(HGreaterThan* cond) OVERRIDE { + void VisitGreaterThan(HGreaterThan* cond) override { BinaryFP(cond); } - void VisitGreaterThanOrEqual(HGreaterThanOrEqual* cond) OVERRIDE { + void VisitGreaterThanOrEqual(HGreaterThanOrEqual* cond) override { BinaryFP(cond); } - void VisitNeg(HNeg* neg) OVERRIDE { + void VisitNeg(HNeg* neg) override { if (DataType::IsFloatingPointType(neg->GetType())) { // We need to replace the HNeg with a HX86FPNeg in order to address the constant area. HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(neg); @@ -146,7 +140,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } } - void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE { + void VisitPackedSwitch(HPackedSwitch* switch_insn) override { if (switch_insn->GetNumEntries() <= InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) { return; @@ -199,18 +193,19 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } void HandleInvoke(HInvoke* invoke) { - // If this is an invoke-static/-direct with PC-relative dex cache array - // addressing, we need the PC-relative address base. HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); - // We can't add a pointer to the constant area if we already have a current - // method pointer. This may arise when sharpening doesn't remove the current - // method pointer from the invoke. - if (invoke_static_or_direct != nullptr && - invoke_static_or_direct->HasCurrentMethodInput()) { + + // We can't add the method address if we already have a current method pointer. + // This may arise when sharpening doesn't remove the current method pointer from the invoke. + if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasCurrentMethodInput()) { + // Note: This happens only for recursive calls (including compiling an intrinsic + // by faking a call to itself; we use kRuntimeCall for this case). DCHECK(!invoke_static_or_direct->HasPcRelativeMethodLoadKind()); return; } + // If this is an invoke-static/-direct with PC-relative addressing (within boot image + // or using .bss or .data.bimg.rel.ro), we need the PC-relative address base. bool base_added = false; if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeMethodLoadKind() && @@ -230,15 +225,16 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } } - // These intrinsics need the constant area. switch (invoke->GetIntrinsic()) { - case Intrinsics::kMathAbsDouble: - case Intrinsics::kMathAbsFloat: - case Intrinsics::kMathMaxDoubleDouble: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMinDoubleDouble: - case Intrinsics::kMathMinFloatFloat: + case Intrinsics::kIntegerValueOf: + // This intrinsic can be call free if it loads the address of the boot image object. + // If we're compiling PIC, we need the address base for loading from .data.bimg.rel.ro. + if (!codegen_->GetCompilerOptions().GetCompilePic()) { + break; + } + FALLTHROUGH_INTENDED; case Intrinsics::kMathRoundFloat: + // This intrinsic needs the constant area. if (!base_added) { DCHECK(invoke_static_or_direct != nullptr); DCHECK(!invoke_static_or_direct->HasCurrentMethodInput()); @@ -259,10 +255,11 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { HX86ComputeBaseMethodAddress* base_; }; -void PcRelativeFixups::Run() { +bool PcRelativeFixups::Run() { PCRelativeHandlerVisitor visitor(graph_, codegen_); visitor.VisitInsertionOrder(); visitor.MoveBaseIfNeeded(); + return true; } } // namespace x86 diff --git a/compiler/optimizing/pc_relative_fixups_x86.h b/compiler/optimizing/pc_relative_fixups_x86.h index 72fa71ea94..3b470a6502 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.h +++ b/compiler/optimizing/pc_relative_fixups_x86.h @@ -34,7 +34,7 @@ class PcRelativeFixups : public HOptimization { static constexpr const char* kPcRelativeFixupsX86PassName = "pc_relative_fixups_x86"; - void Run() OVERRIDE; + bool Run() override; private: CodeGenerator* codegen_; diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index f843c008d8..fbdbf9d086 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -17,7 +17,8 @@ #include "prepare_for_register_allocation.h" #include "dex/dex_file_types.h" -#include "jni_internal.h" +#include "driver/compiler_options.h" +#include "jni/jni_internal.h" #include "optimizing_compiler_stats.h" #include "well_known_classes.h" @@ -27,15 +28,42 @@ void PrepareForRegisterAllocation::Run() { // Order does not matter. for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) { // No need to visit the phis. - for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); + for (HInstructionIteratorHandleChanges inst_it(block->GetInstructions()); !inst_it.Done(); inst_it.Advance()) { inst_it.Current()->Accept(this); } } } +void PrepareForRegisterAllocation::VisitCheckCast(HCheckCast* check_cast) { + // Record only those bitstring type checks that make it to the codegen stage. + if (check_cast->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck); + } +} + +void PrepareForRegisterAllocation::VisitInstanceOf(HInstanceOf* instance_of) { + // Record only those bitstring type checks that make it to the codegen stage. + if (instance_of->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck); + } +} + void PrepareForRegisterAllocation::VisitNullCheck(HNullCheck* check) { check->ReplaceWith(check->InputAt(0)); + if (compiler_options_.GetImplicitNullChecks()) { + HInstruction* next = check->GetNext(); + + // The `PrepareForRegisterAllocation` pass removes `HBoundType` from the graph, + // so do it ourselves now to not prevent optimizations. + while (next->IsBoundType()) { + next = next->GetNext(); + VisitBoundType(next->GetPrevious()->AsBoundType()); + } + if (next->CanDoImplicitNullCheckOn(check->InputAt(0))) { + check->MarkEmittedAtUseSite(); + } + } } void PrepareForRegisterAllocation::VisitDivZeroCheck(HDivZeroCheck* check) { @@ -59,9 +87,9 @@ void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) { if (GetGraph()->GetArtMethod() != char_at_method) { ArenaAllocator* allocator = GetGraph()->GetAllocator(); HEnvironment* environment = new (allocator) HEnvironment(allocator, - /* number_of_vregs */ 0u, + /* number_of_vregs= */ 0u, char_at_method, - /* dex_pc */ dex::kDexNoIndex, + /* dex_pc= */ dex::kDexNoIndex, check); check->InsertRawEnvironment(environment); } @@ -136,7 +164,9 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { if (can_merge_with_load_class && !load_class->HasUses()) { load_class->GetBlock()->RemoveInstruction(load_class); } - } else if (can_merge_with_load_class && !load_class->NeedsAccessCheck()) { + } else if (can_merge_with_load_class && + load_class->GetLoadKind() != HLoadClass::LoadKind::kRuntimeCall) { + DCHECK(!load_class->NeedsAccessCheck()); // Pass the initialization duty to the `HLoadClass` instruction, // and remove the instruction from the graph. DCHECK(load_class->HasEnvironment()); @@ -274,4 +304,13 @@ bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input, return true; } +void PrepareForRegisterAllocation::VisitTypeConversion(HTypeConversion* instruction) { + // For simplicity, our code generators don't handle implicit type conversion, so ensure + // there are none before hitting codegen. + if (instruction->IsImplicitConversion()) { + instruction->ReplaceWith(instruction->GetInput()); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + } // namespace art diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index 2c64f016c1..e0bb76eb22 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -21,6 +21,7 @@ namespace art { +class CompilerOptions; class OptimizingCompilerStats; /** @@ -30,9 +31,11 @@ class OptimizingCompilerStats; */ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { public: - explicit PrepareForRegisterAllocation(HGraph* graph, - OptimizingCompilerStats* stats = nullptr) - : HGraphDelegateVisitor(graph, stats) {} + PrepareForRegisterAllocation(HGraph* graph, + const CompilerOptions& compiler_options, + OptimizingCompilerStats* stats = nullptr) + : HGraphDelegateVisitor(graph, stats), + compiler_options_(compiler_options) {} void Run(); @@ -40,20 +43,25 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { "prepare_for_register_allocation"; private: - void VisitNullCheck(HNullCheck* check) OVERRIDE; - void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE; - void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE; - void VisitBoundType(HBoundType* bound_type) OVERRIDE; - void VisitArraySet(HArraySet* instruction) OVERRIDE; - void VisitClinitCheck(HClinitCheck* check) OVERRIDE; - void VisitCondition(HCondition* condition) OVERRIDE; - void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE; - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; - void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE; + void VisitCheckCast(HCheckCast* check_cast) override; + void VisitInstanceOf(HInstanceOf* instance_of) override; + void VisitNullCheck(HNullCheck* check) override; + void VisitDivZeroCheck(HDivZeroCheck* check) override; + void VisitBoundsCheck(HBoundsCheck* check) override; + void VisitBoundType(HBoundType* bound_type) override; + void VisitArraySet(HArraySet* instruction) override; + void VisitClinitCheck(HClinitCheck* check) override; + void VisitCondition(HCondition* condition) override; + void VisitConstructorFence(HConstructorFence* constructor_fence) override; + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override; + void VisitDeoptimize(HDeoptimize* deoptimize) override; + void VisitTypeConversion(HTypeConversion* instruction) override; bool CanMoveClinitCheck(HInstruction* input, HInstruction* user) const; bool CanEmitConditionAt(HCondition* condition, HInstruction* user) const; + const CompilerOptions& compiler_options_; + DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation); }; diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h index c6579dc5e0..8ef9ce4e8b 100644 --- a/compiler/optimizing/pretty_printer.h +++ b/compiler/optimizing/pretty_printer.h @@ -33,7 +33,7 @@ class HPrettyPrinter : public HGraphVisitor { PrintString(": "); } - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { PrintPreInstruction(instruction); PrintString(instruction->DebugName()); PrintPostInstruction(instruction); @@ -70,7 +70,7 @@ class HPrettyPrinter : public HGraphVisitor { PrintNewLine(); } - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + void VisitBasicBlock(HBasicBlock* block) override { PrintString("BasicBlock "); PrintInt(block->GetBlockId()); const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors(); @@ -108,15 +108,15 @@ class StringPrettyPrinter : public HPrettyPrinter { explicit StringPrettyPrinter(HGraph* graph) : HPrettyPrinter(graph), str_(""), current_block_(nullptr) { } - void PrintInt(int value) OVERRIDE { + void PrintInt(int value) override { str_ += android::base::StringPrintf("%d", value); } - void PrintString(const char* value) OVERRIDE { + void PrintString(const char* value) override { str_ += value; } - void PrintNewLine() OVERRIDE { + void PrintNewLine() override { str_ += '\n'; } @@ -124,12 +124,12 @@ class StringPrettyPrinter : public HPrettyPrinter { std::string str() const { return str_; } - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + void VisitBasicBlock(HBasicBlock* block) override { current_block_ = block; HPrettyPrinter::VisitBasicBlock(block); } - void VisitGoto(HGoto* gota) OVERRIDE { + void VisitGoto(HGoto* gota) override { PrintString(" "); PrintInt(gota->GetId()); PrintString(": Goto "); diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 67a61fc01d..4929e0a3a1 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -22,6 +22,7 @@ #include "base/scoped_arena_containers.h" #include "base/enums.h" #include "class_linker-inl.h" +#include "class_root.h" #include "handle_scope-inl.h" #include "mirror/class-inl.h" #include "mirror/dex_cache.h" @@ -40,31 +41,40 @@ static inline ObjPtr<mirror::DexCache> FindDexCacheWithHint( } static inline ReferenceTypeInfo::TypeHandle GetRootHandle(VariableSizedHandleScope* handles, - ClassLinker::ClassRoot class_root, + ClassRoot class_root, ReferenceTypeInfo::TypeHandle* cache) { if (!ReferenceTypeInfo::IsValidHandle(*cache)) { // Mutator lock is required for NewHandle. - ClassLinker* linker = Runtime::Current()->GetClassLinker(); ScopedObjectAccess soa(Thread::Current()); - *cache = handles->NewHandle(linker->GetClassRoot(class_root)); + *cache = handles->NewHandle(GetClassRoot(class_root)); } return *cache; } ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetObjectClassHandle() { - return GetRootHandle(handles_, ClassLinker::kJavaLangObject, &object_class_handle_); + return GetRootHandle(handles_, ClassRoot::kJavaLangObject, &object_class_handle_); } ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetClassClassHandle() { - return GetRootHandle(handles_, ClassLinker::kJavaLangClass, &class_class_handle_); + return GetRootHandle(handles_, ClassRoot::kJavaLangClass, &class_class_handle_); +} + +ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetMethodHandleClassHandle() { + return GetRootHandle(handles_, + ClassRoot::kJavaLangInvokeMethodHandleImpl, + &method_handle_class_handle_); +} + +ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetMethodTypeClassHandle() { + return GetRootHandle(handles_, ClassRoot::kJavaLangInvokeMethodType, &method_type_class_handle_); } ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetStringClassHandle() { - return GetRootHandle(handles_, ClassLinker::kJavaLangString, &string_class_handle_); + return GetRootHandle(handles_, ClassRoot::kJavaLangString, &string_class_handle_); } ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetThrowableClassHandle() { - return GetRootHandle(handles_, ClassLinker::kJavaLangThrowable, &throwable_class_handle_); + return GetRootHandle(handles_, ClassRoot::kJavaLangThrowable, &throwable_class_handle_); } class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { @@ -84,26 +94,29 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { worklist_.reserve(kDefaultWorklistSize); } - void VisitDeoptimize(HDeoptimize* deopt) OVERRIDE; - void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; - void VisitLoadClass(HLoadClass* load_class) OVERRIDE; - void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE; - void VisitLoadString(HLoadString* instr) OVERRIDE; - void VisitLoadException(HLoadException* instr) OVERRIDE; - void VisitNewArray(HNewArray* instr) OVERRIDE; - void VisitParameterValue(HParameterValue* instr) OVERRIDE; - void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE; - void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE; - void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) OVERRIDE; - void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instr) OVERRIDE; - void VisitInvoke(HInvoke* instr) OVERRIDE; - void VisitArrayGet(HArrayGet* instr) OVERRIDE; - void VisitCheckCast(HCheckCast* instr) OVERRIDE; - void VisitBoundType(HBoundType* instr) OVERRIDE; - void VisitNullCheck(HNullCheck* instr) OVERRIDE; - void VisitPhi(HPhi* phi); - - void VisitBasicBlock(HBasicBlock* block); + void VisitDeoptimize(HDeoptimize* deopt) override; + void VisitNewInstance(HNewInstance* new_instance) override; + void VisitLoadClass(HLoadClass* load_class) override; + void VisitInstanceOf(HInstanceOf* load_class) override; + void VisitClinitCheck(HClinitCheck* clinit_check) override; + void VisitLoadMethodHandle(HLoadMethodHandle* instr) override; + void VisitLoadMethodType(HLoadMethodType* instr) override; + void VisitLoadString(HLoadString* instr) override; + void VisitLoadException(HLoadException* instr) override; + void VisitNewArray(HNewArray* instr) override; + void VisitParameterValue(HParameterValue* instr) override; + void VisitInstanceFieldGet(HInstanceFieldGet* instr) override; + void VisitStaticFieldGet(HStaticFieldGet* instr) override; + void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) override; + void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instr) override; + void VisitInvoke(HInvoke* instr) override; + void VisitArrayGet(HArrayGet* instr) override; + void VisitCheckCast(HCheckCast* instr) override; + void VisitBoundType(HBoundType* instr) override; + void VisitNullCheck(HNullCheck* instr) override; + void VisitPhi(HPhi* phi) override; + + void VisitBasicBlock(HBasicBlock* block) override; void ProcessWorklist(); private: @@ -171,6 +184,12 @@ void ReferenceTypePropagation::ValidateTypes() { << "NullCheck " << instr->GetReferenceTypeInfo() << "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo(); } + } else if (instr->IsInstanceOf()) { + HInstanceOf* iof = instr->AsInstanceOf(); + DCHECK(!iof->GetTargetClassRTI().IsValid() || iof->GetTargetClassRTI().IsExact()); + } else if (instr->IsCheckCast()) { + HCheckCast* check = instr->AsCheckCast(); + DCHECK(!check->GetTargetClassRTI().IsValid() || check->GetTargetClassRTI().IsExact()); } } } @@ -259,7 +278,7 @@ static void BoundTypeIn(HInstruction* receiver, if (ShouldCreateBoundType( insert_point, receiver, class_rti, start_instruction, start_block)) { bound_type = new (receiver->GetBlock()->GetGraph()->GetAllocator()) HBoundType(receiver); - bound_type->SetUpperBound(class_rti, /* bound_can_be_null */ false); + bound_type->SetUpperBound(class_rti, /* can_be_null= */ false); start_block->InsertInstructionBefore(bound_type, insert_point); // To comply with the RTP algorithm, don't type the bound type just yet, it will // be handled in RTPVisitor::VisitBoundType. @@ -320,8 +339,7 @@ static void BoundTypeForClassCheck(HInstruction* check) { { ScopedObjectAccess soa(Thread::Current()); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0); + ArtField* field = GetClassRoot<mirror::Object>()->GetInstanceField(0); DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); if (field_get->GetFieldInfo().GetField() != field) { return; @@ -332,7 +350,7 @@ static void BoundTypeForClassCheck(HInstruction* check) { HBasicBlock* trueBlock = compare->IsEqual() ? check->AsIf()->IfTrueSuccessor() : check->AsIf()->IfFalseSuccessor(); - BoundTypeIn(receiver, trueBlock, /* start_instruction */ nullptr, class_rti); + BoundTypeIn(receiver, trueBlock, /* start_instruction= */ nullptr, class_rti); } else { DCHECK(check->IsDeoptimize()); if (compare->IsEqual() && check->AsDeoptimize()->GuardsAnInput()) { @@ -341,7 +359,7 @@ static void BoundTypeForClassCheck(HInstruction* check) { } } -void ReferenceTypePropagation::Run() { +bool ReferenceTypePropagation::Run() { RTPVisitor visitor(graph_, class_loader_, hint_dex_cache_, &handle_cache_, is_first_run_); // To properly propagate type info we need to visit in the dominator-based order. @@ -353,6 +371,7 @@ void ReferenceTypePropagation::Run() { visitor.ProcessWorklist(); ValidateTypes(); + return true; } void ReferenceTypePropagation::RTPVisitor::VisitBasicBlock(HBasicBlock* block) { @@ -408,9 +427,9 @@ void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfNotNull(HBasicBlock* bl : ifInstruction->IfFalseSuccessor(); ReferenceTypeInfo object_rti = ReferenceTypeInfo::Create( - handle_cache_->GetObjectClassHandle(), /* is_exact */ false); + handle_cache_->GetObjectClassHandle(), /* is_exact= */ false); - BoundTypeIn(obj, notNullBlock, /* start_instruction */ nullptr, object_rti); + BoundTypeIn(obj, notNullBlock, /* start_instruction= */ nullptr, object_rti); } // Returns true if one of the patterns below has been recognized. If so, the @@ -499,8 +518,7 @@ void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock* return; } - HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + ReferenceTypeInfo class_rti = instanceOf->GetTargetClassRTI(); if (!class_rti.IsValid()) { // He have loaded an unresolved class. Don't bother bounding the type. return; @@ -520,10 +538,10 @@ void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock* { ScopedObjectAccess soa(Thread::Current()); if (!class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes()) { - class_rti = ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false); + class_rti = ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact= */ false); } } - BoundTypeIn(obj, instanceOfTrueBlock, /* start_instruction */ nullptr, class_rti); + BoundTypeIn(obj, instanceOfTrueBlock, /* start_instruction= */ nullptr, class_rti); } void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* instr, @@ -543,9 +561,9 @@ void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* inst // Use a null loader, the target method is in a boot classpath dex file. Handle<mirror::ClassLoader> loader(hs.NewHandle<mirror::ClassLoader>(nullptr)); ArtMethod* method = cl->ResolveMethod<ClassLinker::ResolveMode::kNoChecks>( - dex_method_index, dex_cache, loader, /* referrer */ nullptr, kDirect); + dex_method_index, dex_cache, loader, /* referrer= */ nullptr, kDirect); DCHECK(method != nullptr); - mirror::Class* declaring_class = method->GetDeclaringClass(); + ObjPtr<mirror::Class> declaring_class = method->GetDeclaringClass(); DCHECK(declaring_class != nullptr); DCHECK(declaring_class->IsStringClass()) << "Expected String class: " << declaring_class->PrettyDescriptor(); @@ -553,8 +571,8 @@ void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* inst << "Expected String.<init>: " << method->PrettyMethod(); } instr->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact */ true)); - } else if (IsAdmissible(klass.Ptr())) { + ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact= */ true)); + } else if (IsAdmissible(klass)) { ReferenceTypeInfo::TypeHandle handle = handle_cache_->NewHandle(klass); is_exact = is_exact || handle->CannotBeAssignedFromOtherTypes(); instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact)); @@ -582,12 +600,12 @@ void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction* void ReferenceTypePropagation::RTPVisitor::VisitNewInstance(HNewInstance* instr) { ScopedObjectAccess soa(Thread::Current()); - SetClassAsTypeInfo(instr, instr->GetLoadClass()->GetClass().Get(), /* is_exact */ true); + SetClassAsTypeInfo(instr, instr->GetLoadClass()->GetClass().Get(), /* is_exact= */ true); } void ReferenceTypePropagation::RTPVisitor::VisitNewArray(HNewArray* instr) { ScopedObjectAccess soa(Thread::Current()); - SetClassAsTypeInfo(instr, instr->GetLoadClass()->GetClass().Get(), /* is_exact */ true); + SetClassAsTypeInfo(instr, instr->GetLoadClass()->GetClass().Get(), /* is_exact= */ true); } void ReferenceTypePropagation::RTPVisitor::VisitParameterValue(HParameterValue* instr) { @@ -596,7 +614,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitParameterValue(HParameterValue* UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), - /* is_exact */ false); + /* is_exact= */ false); } } @@ -614,7 +632,7 @@ void ReferenceTypePropagation::RTPVisitor::UpdateFieldAccessTypeInfo(HInstructio klass = info.GetField()->LookupResolvedType(); } - SetClassAsTypeInfo(instr, klass, /* is_exact */ false); + SetClassAsTypeInfo(instr, klass, /* is_exact= */ false); } void ReferenceTypePropagation::RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) { @@ -643,36 +661,52 @@ void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedStaticFieldGet( void ReferenceTypePropagation::RTPVisitor::VisitLoadClass(HLoadClass* instr) { ScopedObjectAccess soa(Thread::Current()); - Handle<mirror::Class> resolved_class = instr->GetClass(); - if (IsAdmissible(resolved_class.Get())) { - instr->SetLoadedClassRTI(ReferenceTypeInfo::Create( - resolved_class, /* is_exact */ true)); + if (IsAdmissible(instr->GetClass().Get())) { + instr->SetValidLoadedClassRTI(); } instr->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact */ true)); + ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact= */ true)); +} + +void ReferenceTypePropagation::RTPVisitor::VisitInstanceOf(HInstanceOf* instr) { + ScopedObjectAccess soa(Thread::Current()); + if (IsAdmissible(instr->GetClass().Get())) { + instr->SetValidTargetClassRTI(); + } } void ReferenceTypePropagation::RTPVisitor::VisitClinitCheck(HClinitCheck* instr) { instr->SetReferenceTypeInfo(instr->InputAt(0)->GetReferenceTypeInfo()); } +void ReferenceTypePropagation::RTPVisitor::VisitLoadMethodHandle(HLoadMethodHandle* instr) { + instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create( + handle_cache_->GetMethodHandleClassHandle(), + /* is_exact= */ true)); +} + +void ReferenceTypePropagation::RTPVisitor::VisitLoadMethodType(HLoadMethodType* instr) { + instr->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(handle_cache_->GetMethodTypeClassHandle(), /* is_exact= */ true)); +} + void ReferenceTypePropagation::RTPVisitor::VisitLoadString(HLoadString* instr) { instr->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact */ true)); + ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact= */ true)); } void ReferenceTypePropagation::RTPVisitor::VisitLoadException(HLoadException* instr) { DCHECK(instr->GetBlock()->IsCatchBlock()); TryCatchInformation* catch_info = instr->GetBlock()->GetTryCatchInformation(); - if (catch_info->IsCatchAllTypeIndex()) { - instr->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(handle_cache_->GetThrowableClassHandle(), /* is_exact */ false)); - } else { + if (catch_info->IsValidTypeIndex()) { UpdateReferenceTypeInfo(instr, catch_info->GetCatchTypeIndex(), catch_info->GetCatchDexFile(), - /* is_exact */ false); + /* is_exact= */ false); + } else { + instr->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(handle_cache_->GetThrowableClassHandle(), /* is_exact= */ false)); } } @@ -702,7 +736,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) { // bound type is dead. To not confuse potential other optimizations, we mark // the bound as non-exact. instr->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false)); + ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact= */ false)); } } else { // Object not typed yet. Leave BoundType untyped for now rather than @@ -719,8 +753,6 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) { } void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) { - HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); HBoundType* bound_type = check_cast->GetNext()->AsBoundType(); if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) { // The next instruction is not an uninitialized BoundType. This must be @@ -729,12 +761,14 @@ void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast } DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0)); - if (class_rti.IsValid()) { + ScopedObjectAccess soa(Thread::Current()); + Handle<mirror::Class> klass = check_cast->GetClass(); + if (IsAdmissible(klass.Get())) { DCHECK(is_first_run_); - ScopedObjectAccess soa(Thread::Current()); + check_cast->SetValidTargetClassRTI(); // This is the first run of RTP and class is resolved. - bool is_exact = class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes(); - bound_type->SetUpperBound(ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), is_exact), + bool is_exact = klass->CannotBeAssignedFromOtherTypes(); + bound_type->SetUpperBound(ReferenceTypeInfo::Create(klass, is_exact), /* CheckCast succeeds for nulls. */ true); } else { // This is the first run of RTP and class is unresolved. Remove the binding. @@ -880,7 +914,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitInvoke(HInvoke* instr) { ScopedObjectAccess soa(Thread::Current()); ArtMethod* method = instr->GetResolvedMethod(); ObjPtr<mirror::Class> klass = (method == nullptr) ? nullptr : method->LookupResolvedReturnType(); - SetClassAsTypeInfo(instr, klass, /* is_exact */ false); + SetClassAsTypeInfo(instr, klass, /* is_exact= */ false); } void ReferenceTypePropagation::RTPVisitor::VisitArrayGet(HArrayGet* instr) { @@ -913,7 +947,7 @@ void ReferenceTypePropagation::RTPVisitor::UpdateBoundType(HBoundType* instr) { // bound type is dead. To not confuse potential other optimizations, we mark // the bound as non-exact. instr->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(upper_bound_rti.GetTypeHandle(), /* is_exact */ false)); + ReferenceTypeInfo::Create(upper_bound_rti.GetTypeHandle(), /* is_exact= */ false)); } } diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index fd4dad2b45..7c6a048444 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -40,7 +40,7 @@ class ReferenceTypePropagation : public HOptimization { // Visit a single instruction. void Visit(HInstruction* instruction); - void Run() OVERRIDE; + bool Run() override; // Returns true if klass is admissible to the propagation: non-null and resolved. // For an array type, we also check if the component type is admissible. @@ -75,6 +75,8 @@ class ReferenceTypePropagation : public HOptimization { ReferenceTypeInfo::TypeHandle GetObjectClassHandle(); ReferenceTypeInfo::TypeHandle GetClassClassHandle(); + ReferenceTypeInfo::TypeHandle GetMethodHandleClassHandle(); + ReferenceTypeInfo::TypeHandle GetMethodTypeClassHandle(); ReferenceTypeInfo::TypeHandle GetStringClassHandle(); ReferenceTypeInfo::TypeHandle GetThrowableClassHandle(); @@ -83,6 +85,8 @@ class ReferenceTypePropagation : public HOptimization { ReferenceTypeInfo::TypeHandle object_class_handle_; ReferenceTypeInfo::TypeHandle class_class_handle_; + ReferenceTypeInfo::TypeHandle method_handle_class_handle_; + ReferenceTypeInfo::TypeHandle method_type_class_handle_; ReferenceTypeInfo::TypeHandle string_class_handle_; ReferenceTypeInfo::TypeHandle throwable_class_handle_; }; diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index 27f9ac3990..b1f0a1add9 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -280,16 +280,16 @@ size_t RegisterAllocationResolver::CalculateMaximumSafepointSpillSize( LocationSummary* locations = instruction->GetLocations(); if (locations->OnlyCallsOnSlowPath()) { size_t core_spills = - codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ true); + codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers= */ true); size_t fp_spills = - codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ false); + codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers= */ false); size_t spill_size = core_register_spill_size * core_spills + fp_register_spill_size * fp_spills; maximum_safepoint_spill_size = std::max(maximum_safepoint_spill_size, spill_size); } else if (locations->CallsOnMainAndSlowPath()) { // Nothing to spill on the slow path if the main path already clobbers caller-saves. - DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ true)); - DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ false)); + DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers= */ true)); + DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers= */ false)); } } return maximum_safepoint_spill_size; diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index fa7ad82316..42e6498148 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -1183,7 +1183,7 @@ static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNo void ColoringIteration::BuildInterferenceGraph( const ScopedArenaVector<LiveInterval*>& intervals, const ScopedArenaVector<InterferenceNode*>& physical_nodes) { - DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty()); + DCHECK(interval_node_map_.empty() && prunable_nodes_.empty()); // Build the interference graph efficiently by ordering range endpoints // by position and doing a linear sweep to find interferences. (That is, we // jump from endpoint to endpoint, maintaining a set of intervals live at each @@ -1208,7 +1208,7 @@ void ColoringIteration::BuildInterferenceGraph( if (range != nullptr) { InterferenceNode* node = new (allocator_) InterferenceNode(sibling, register_allocator_->liveness_); - interval_node_map_.Insert(std::make_pair(sibling, node)); + interval_node_map_.insert(std::make_pair(sibling, node)); if (sibling->HasRegister()) { // Fixed nodes should alias the canonical node for the corresponding register. @@ -1303,7 +1303,7 @@ void ColoringIteration::FindCoalesceOpportunities() { // Coalesce siblings. LiveInterval* next_sibling = interval->GetNextSibling(); if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) { - auto it = interval_node_map_.Find(next_sibling); + auto it = interval_node_map_.find(next_sibling); if (it != interval_node_map_.end()) { InterferenceNode* sibling_node = it->second; CreateCoalesceOpportunity(node, @@ -1318,7 +1318,7 @@ void ColoringIteration::FindCoalesceOpportunities() { if (parent->HasRegister() && parent->GetNextSibling() == interval && parent->GetEnd() == interval->GetStart()) { - auto it = interval_node_map_.Find(parent); + auto it = interval_node_map_.find(parent); if (it != interval_node_map_.end()) { InterferenceNode* parent_node = it->second; CreateCoalesceOpportunity(node, @@ -1341,7 +1341,7 @@ void ColoringIteration::FindCoalesceOpportunities() { size_t position = predecessor->GetLifetimeEnd() - 1; LiveInterval* existing = interval->GetParent()->GetSiblingAt(position); if (existing != nullptr) { - auto it = interval_node_map_.Find(existing); + auto it = interval_node_map_.find(existing); if (it != interval_node_map_.end()) { InterferenceNode* existing_node = it->second; CreateCoalesceOpportunity(node, @@ -1364,7 +1364,7 @@ void ColoringIteration::FindCoalesceOpportunities() { size_t position = predecessors[i]->GetLifetimeEnd() - 1; LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position); - auto it = interval_node_map_.Find(input_interval); + auto it = interval_node_map_.find(input_interval); if (it != interval_node_map_.end()) { InterferenceNode* input_node = it->second; CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position); @@ -1380,7 +1380,7 @@ void ColoringIteration::FindCoalesceOpportunities() { = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1); // TODO: Could we consider lifetime holes here? if (input_interval->GetEnd() == interval->GetStart()) { - auto it = interval_node_map_.Find(input_interval); + auto it = interval_node_map_.find(input_interval); if (it != interval_node_map_.end()) { InterferenceNode* input_node = it->second; CreateCoalesceOpportunity(node, @@ -1407,7 +1407,7 @@ void ColoringIteration::FindCoalesceOpportunities() { LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point); if (input_interval != nullptr && input_interval->HasHighInterval() == interval->HasHighInterval()) { - auto it = interval_node_map_.Find(input_interval); + auto it = interval_node_map_.find(input_interval); if (it != interval_node_map_.end()) { InterferenceNode* input_node = it->second; CreateCoalesceOpportunity(node, diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h index 3072c92e0f..f0e7e55863 100644 --- a/compiler/optimizing/register_allocator_graph_color.h +++ b/compiler/optimizing/register_allocator_graph_color.h @@ -90,11 +90,11 @@ class RegisterAllocatorGraphColor : public RegisterAllocator { CodeGenerator* codegen, const SsaLivenessAnalysis& analysis, bool iterative_move_coalescing = true); - ~RegisterAllocatorGraphColor() OVERRIDE; + ~RegisterAllocatorGraphColor() override; - void AllocateRegisters() OVERRIDE; + void AllocateRegisters() override; - bool Validate(bool log_fatal_on_failure); + bool Validate(bool log_fatal_on_failure) override; private: // Collect all intervals and prepare for register allocation. diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index 216fb57a96..0d6c5a3eff 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -252,7 +252,7 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) temp_intervals_.push_back(interval); interval->AddTempUse(instruction, i); if (codegen_->NeedsTwoRegisters(DataType::Type::kFloat64)) { - interval->AddHighInterval(/* is_temp */ true); + interval->AddHighInterval(/* is_temp= */ true); LiveInterval* high = interval->GetHighInterval(); temp_intervals_.push_back(high); unhandled_fp_intervals_.push_back(high); @@ -284,7 +284,7 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) } if (locations->WillCall()) { - BlockRegisters(position, position + 1, /* caller_save_only */ true); + BlockRegisters(position, position + 1, /* caller_save_only= */ true); } for (size_t i = 0; i < locations->GetInputCount(); ++i) { @@ -312,7 +312,7 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) { HInstruction* safepoint = safepoints_[safepoint_index - 1u]; - size_t safepoint_position = safepoint->GetLifetimePosition(); + size_t safepoint_position = SafepointPosition::ComputePosition(safepoint); // Test that safepoints are ordered in the optimal way. DCHECK(safepoint_index == safepoints_.size() || diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h index 36788b7c3c..4d445c7ff7 100644 --- a/compiler/optimizing/register_allocator_linear_scan.h +++ b/compiler/optimizing/register_allocator_linear_scan.h @@ -42,11 +42,11 @@ class RegisterAllocatorLinearScan : public RegisterAllocator { RegisterAllocatorLinearScan(ScopedArenaAllocator* allocator, CodeGenerator* codegen, const SsaLivenessAnalysis& analysis); - ~RegisterAllocatorLinearScan() OVERRIDE; + ~RegisterAllocatorLinearScan() override; - void AllocateRegisters() OVERRIDE; + void AllocateRegisters() override; - bool Validate(bool log_fatal_on_failure) OVERRIDE { + bool Validate(bool log_fatal_on_failure) override { processing_core_registers_ = true; if (!ValidateInternal(log_fatal_on_failure)) { return false; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index a70b0664dc..79eb082cd7 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -40,6 +40,12 @@ using Strategy = RegisterAllocator::Strategy; class RegisterAllocatorTest : public OptimizingUnitTest { protected: + void SetUp() override { + // This test is using the x86 ISA. + OverrideInstructionSetFeatures(InstructionSet::kX86, "default"); + OptimizingUnitTest::SetUp(); + } + // These functions need to access private variables of LocationSummary, so we declare it // as a member of RegisterAllocatorTest, which we make a friend class. void SameAsFirstInputHint(Strategy strategy); @@ -62,11 +68,11 @@ class RegisterAllocatorTest : public OptimizingUnitTest { bool ValidateIntervals(const ScopedArenaVector<LiveInterval*>& intervals, const CodeGenerator& codegen) { return RegisterAllocator::ValidateIntervals(ArrayRef<LiveInterval* const>(intervals), - /* number_of_spill_slots */ 0u, - /* number_of_out_slots */ 0u, + /* number_of_spill_slots= */ 0u, + /* number_of_out_slots= */ 0u, codegen, - /* processing_core_registers */ true, - /* log_fatal_on_failure */ false); + /* processing_core_registers= */ true, + /* log_fatal_on_failure= */ false); } }; @@ -81,9 +87,7 @@ TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\ bool RegisterAllocatorTest::Check(const std::vector<uint16_t>& data, Strategy strategy) { HGraph* graph = CreateCFG(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); std::unique_ptr<RegisterAllocator> register_allocator = @@ -98,9 +102,7 @@ bool RegisterAllocatorTest::Check(const std::vector<uint16_t>& data, Strategy st */ TEST_F(RegisterAllocatorTest, ValidateIntervals) { HGraph* graph = CreateGraph(); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); ScopedArenaVector<LiveInterval*> intervals(GetScopedAllocator()->Adapter()); // Test with two intervals of the same range. @@ -324,9 +326,7 @@ void RegisterAllocatorTest::Loop3(Strategy strategy) { Instruction::GOTO | 0xF900); HGraph* graph = CreateCFG(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); std::unique_ptr<RegisterAllocator> register_allocator = @@ -359,9 +359,7 @@ TEST_F(RegisterAllocatorTest, FirstRegisterUse) { Instruction::RETURN_VOID); HGraph* graph = CreateCFG(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -412,9 +410,7 @@ void RegisterAllocatorTest::DeadPhi(Strategy strategy) { HGraph* graph = CreateCFG(data); SsaDeadPhiElimination(graph).Run(); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); std::unique_ptr<RegisterAllocator> register_allocator = @@ -438,9 +434,7 @@ TEST_F(RegisterAllocatorTest, FreeUntil) { HGraph* graph = CreateCFG(data); SsaDeadPhiElimination(graph).Run(); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); RegisterAllocatorLinearScan register_allocator(GetScopedAllocator(), &codegen, liveness); @@ -566,9 +560,7 @@ void RegisterAllocatorTest::PhiHint(Strategy strategy) { { HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -584,9 +576,7 @@ void RegisterAllocatorTest::PhiHint(Strategy strategy) { { HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -604,9 +594,7 @@ void RegisterAllocatorTest::PhiHint(Strategy strategy) { { HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -624,9 +612,7 @@ void RegisterAllocatorTest::PhiHint(Strategy strategy) { { HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -689,9 +675,7 @@ void RegisterAllocatorTest::ExpectedInRegisterHint(Strategy strategy) { { HGraph* graph = BuildFieldReturn(&field, &ret); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -705,9 +689,7 @@ void RegisterAllocatorTest::ExpectedInRegisterHint(Strategy strategy) { { HGraph* graph = BuildFieldReturn(&field, &ret); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -761,9 +743,7 @@ void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) { { HGraph* graph = BuildTwoSubs(&first_sub, &second_sub); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -778,9 +758,7 @@ void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) { { HGraph* graph = BuildTwoSubs(&first_sub, &second_sub); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -834,9 +812,7 @@ HGraph* RegisterAllocatorTest::BuildDiv(HInstruction** div) { void RegisterAllocatorTest::ExpectedExactInRegisterAndSameOutputHint(Strategy strategy) { HInstruction *div; HGraph* graph = BuildDiv(&div); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -896,9 +872,9 @@ TEST_F(RegisterAllocatorTest, SpillInactive) { // Create an interval with lifetime holes. static constexpr size_t ranges1[][2] = {{0, 2}, {4, 6}, {8, 10}}; LiveInterval* first = BuildInterval(ranges1, arraysize(ranges1), GetScopedAllocator(), -1, one); - first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 8)); - first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 7)); - first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 6)); + first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 8)); + first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 7)); + first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 6)); locations = new (GetAllocator()) LocationSummary(first->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); @@ -919,9 +895,9 @@ TEST_F(RegisterAllocatorTest, SpillInactive) { // before lifetime position 6 yet. static constexpr size_t ranges3[][2] = {{2, 4}, {8, 10}}; LiveInterval* third = BuildInterval(ranges3, arraysize(ranges3), GetScopedAllocator(), -1, three); - third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 8)); - third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 4)); - third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 3)); + third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 8)); + third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 4)); + third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 3)); locations = new (GetAllocator()) LocationSummary(third->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); third = third->SplitAt(3); @@ -934,9 +910,7 @@ TEST_F(RegisterAllocatorTest, SpillInactive) { new (GetAllocator()) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); // Populate the instructions in the liveness object, to please the register allocator. for (size_t i = 0; i < 32; ++i) { diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index bb28d50b56..fdef45ec8b 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -70,19 +70,19 @@ static bool MayHaveReorderingDependency(SideEffects node, SideEffects other) { return false; } -size_t SchedulingGraph::ArrayAccessHeapLocation(HInstruction* array, HInstruction* index) const { +size_t SchedulingGraph::ArrayAccessHeapLocation(HInstruction* instruction) const { DCHECK(heap_location_collector_ != nullptr); - size_t heap_loc = heap_location_collector_->GetArrayHeapLocation(array, index); + size_t heap_loc = heap_location_collector_->GetArrayHeapLocation(instruction); // This array access should be analyzed and added to HeapLocationCollector before. DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound); return heap_loc; } -bool SchedulingGraph::ArrayAccessMayAlias(const HInstruction* node, - const HInstruction* other) const { +bool SchedulingGraph::ArrayAccessMayAlias(HInstruction* node, + HInstruction* other) const { DCHECK(heap_location_collector_ != nullptr); - size_t node_heap_loc = ArrayAccessHeapLocation(node->InputAt(0), node->InputAt(1)); - size_t other_heap_loc = ArrayAccessHeapLocation(other->InputAt(0), other->InputAt(1)); + size_t node_heap_loc = ArrayAccessHeapLocation(node); + size_t other_heap_loc = ArrayAccessHeapLocation(other); // For example: arr[0] and arr[0] if (node_heap_loc == other_heap_loc) { @@ -194,8 +194,8 @@ bool SchedulingGraph::FieldAccessMayAlias(const HInstruction* node, return true; } -bool SchedulingGraph::HasMemoryDependency(const HInstruction* node, - const HInstruction* other) const { +bool SchedulingGraph::HasMemoryDependency(HInstruction* node, + HInstruction* other) const { if (!MayHaveReorderingDependency(node->GetSideEffects(), other->GetSideEffects())) { return false; } @@ -264,8 +264,8 @@ bool SchedulingGraph::HasExceptionDependency(const HInstruction* node, // Check whether `node` depends on `other`, taking into account `SideEffect` // information and `CanThrow` information. -bool SchedulingGraph::HasSideEffectDependency(const HInstruction* node, - const HInstruction* other) const { +bool SchedulingGraph::HasSideEffectDependency(HInstruction* node, + HInstruction* other) const { if (HasMemoryDependency(node, other)) { return true; } @@ -280,6 +280,23 @@ bool SchedulingGraph::HasSideEffectDependency(const HInstruction* node, return false; } +// Check if the specified instruction is a better candidate which more likely will +// have other instructions depending on it. +static bool IsBetterCandidateWithMoreLikelyDependencies(HInstruction* new_candidate, + HInstruction* old_candidate) { + if (!new_candidate->GetSideEffects().Includes(old_candidate->GetSideEffects())) { + // Weaker side effects. + return false; + } + if (old_candidate->GetSideEffects().Includes(new_candidate->GetSideEffects())) { + // Same side effects, check if `new_candidate` has stronger `CanThrow()`. + return new_candidate->CanThrow() && !old_candidate->CanThrow(); + } else { + // Stronger side effects, check if `new_candidate` has at least as strong `CanThrow()`. + return new_candidate->CanThrow() || !old_candidate->CanThrow(); + } +} + void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_scheduling_barrier) { SchedulingNode* instruction_node = GetNode(instruction); @@ -331,6 +348,7 @@ void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_schedul // Side effect dependencies. if (!instruction->GetSideEffects().DoesNothing() || instruction->CanThrow()) { + HInstruction* dep_chain_candidate = nullptr; for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) { SchedulingNode* other_node = GetNode(other); if (other_node->IsSchedulingBarrier()) { @@ -340,7 +358,18 @@ void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_schedul break; } if (HasSideEffectDependency(other, instruction)) { - AddOtherDependency(other_node, instruction_node); + if (dep_chain_candidate != nullptr && + HasSideEffectDependency(other, dep_chain_candidate)) { + // Skip an explicit dependency to reduce memory usage, rely on the transitive dependency. + } else { + AddOtherDependency(other_node, instruction_node); + } + // Check if `other` is a better candidate which more likely will have other instructions + // depending on it. + if (dep_chain_candidate == nullptr || + IsBetterCandidateWithMoreLikelyDependencies(other, dep_chain_candidate)) { + dep_chain_candidate = other; + } } } } @@ -545,60 +574,67 @@ SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingN void HScheduler::Schedule(HGraph* graph) { // We run lsa here instead of in a separate pass to better control whether we // should run the analysis or not. + const HeapLocationCollector* heap_location_collector = nullptr; LoadStoreAnalysis lsa(graph); if (!only_optimize_loop_blocks_ || graph->HasLoops()) { lsa.Run(); - scheduling_graph_.SetHeapLocationCollector(lsa.GetHeapLocationCollector()); + heap_location_collector = &lsa.GetHeapLocationCollector(); } for (HBasicBlock* block : graph->GetReversePostOrder()) { if (IsSchedulable(block)) { - Schedule(block); + Schedule(block, heap_location_collector); } } } -void HScheduler::Schedule(HBasicBlock* block) { - ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator_->Adapter(kArenaAllocScheduler)); +void HScheduler::Schedule(HBasicBlock* block, + const HeapLocationCollector* heap_location_collector) { + ScopedArenaAllocator allocator(block->GetGraph()->GetArenaStack()); + ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator.Adapter(kArenaAllocScheduler)); // Build the scheduling graph. - scheduling_graph_.Clear(); + SchedulingGraph scheduling_graph(this, &allocator, heap_location_collector); for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); CHECK_EQ(instruction->GetBlock(), block) << instruction->DebugName() << " is in block " << instruction->GetBlock()->GetBlockId() << ", and expected in block " << block->GetBlockId(); - SchedulingNode* node = scheduling_graph_.AddNode(instruction, IsSchedulingBarrier(instruction)); + SchedulingNode* node = scheduling_graph.AddNode(instruction, IsSchedulingBarrier(instruction)); CalculateLatency(node); scheduling_nodes.push_back(node); } - if (scheduling_graph_.Size() <= 1) { - scheduling_graph_.Clear(); + if (scheduling_graph.Size() <= 1) { return; } cursor_ = block->GetLastInstruction(); + // The list of candidates for scheduling. A node becomes a candidate when all + // its predecessors have been scheduled. + ScopedArenaVector<SchedulingNode*> candidates(allocator.Adapter(kArenaAllocScheduler)); + // Find the initial candidates for scheduling. - candidates_.clear(); for (SchedulingNode* node : scheduling_nodes) { if (!node->HasUnscheduledSuccessors()) { node->MaybeUpdateCriticalPath(node->GetLatency()); - candidates_.push_back(node); + candidates.push_back(node); } } - ScopedArenaVector<SchedulingNode*> initial_candidates(allocator_->Adapter(kArenaAllocScheduler)); + ScopedArenaVector<SchedulingNode*> initial_candidates(allocator.Adapter(kArenaAllocScheduler)); if (kDumpDotSchedulingGraphs) { // Remember the list of initial candidates for debug output purposes. - initial_candidates.assign(candidates_.begin(), candidates_.end()); + initial_candidates.assign(candidates.begin(), candidates.end()); } // Schedule all nodes. - while (!candidates_.empty()) { - Schedule(selector_->PopHighestPriorityNode(&candidates_, scheduling_graph_)); + selector_->Reset(); + while (!candidates.empty()) { + SchedulingNode* node = selector_->PopHighestPriorityNode(&candidates, scheduling_graph); + Schedule(node, &candidates); } if (kDumpDotSchedulingGraphs) { @@ -607,11 +643,12 @@ void HScheduler::Schedule(HBasicBlock* block) { std::stringstream description; description << graph->GetDexFile().PrettyMethod(graph->GetMethodIdx()) << " B" << block->GetBlockId(); - scheduling_graph_.DumpAsDotGraph(description.str(), initial_candidates); + scheduling_graph.DumpAsDotGraph(description.str(), initial_candidates); } } -void HScheduler::Schedule(SchedulingNode* scheduling_node) { +void HScheduler::Schedule(SchedulingNode* scheduling_node, + /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates) { // Check whether any of the node's predecessors will be valid candidates after // this node is scheduled. uint32_t path_to_node = scheduling_node->GetCriticalPath(); @@ -620,7 +657,7 @@ void HScheduler::Schedule(SchedulingNode* scheduling_node) { path_to_node + predecessor->GetInternalLatency() + predecessor->GetLatency()); predecessor->DecrementNumberOfUnscheduledSuccessors(); if (!predecessor->HasUnscheduledSuccessors()) { - candidates_.push_back(predecessor); + candidates->push_back(predecessor); } } for (SchedulingNode* predecessor : scheduling_node->GetOtherPredecessors()) { @@ -630,7 +667,7 @@ void HScheduler::Schedule(SchedulingNode* scheduling_node) { // correctness. So we do not use them to compute the critical path. predecessor->DecrementNumberOfUnscheduledSuccessors(); if (!predecessor->HasUnscheduledSuccessors()) { - candidates_.push_back(predecessor); + candidates->push_back(predecessor); } } @@ -643,7 +680,7 @@ static void MoveAfterInBlock(HInstruction* instruction, HInstruction* cursor) { DCHECK_NE(cursor, cursor->GetBlock()->GetLastInstruction()); DCHECK(!instruction->IsControlFlow()); DCHECK(!cursor->IsControlFlow()); - instruction->MoveBefore(cursor->GetNext(), /* do_checks */ false); + instruction->MoveBefore(cursor->GetNext(), /* do_checks= */ false); } void HScheduler::Schedule(HInstruction* instruction) { @@ -667,7 +704,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { // HUnaryOperation (or HBinaryOperation), check in debug mode that we have // the exhaustive lists here. if (instruction->IsUnaryOperation()) { - DCHECK(instruction->IsBooleanNot() || + DCHECK(instruction->IsAbs() || + instruction->IsBooleanNot() || instruction->IsNot() || instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName(); return true; @@ -678,6 +716,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { instruction->IsCompare() || instruction->IsCondition() || instruction->IsDiv() || + instruction->IsMin() || + instruction->IsMax() || instruction->IsMul() || instruction->IsOr() || instruction->IsRem() || @@ -771,12 +811,11 @@ bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const { instr->IsSuspendCheck(); } -void HInstructionScheduling::Run(bool only_optimize_loop_blocks, +bool HInstructionScheduling::Run(bool only_optimize_loop_blocks, bool schedule_randomly) { #if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm) // Phase-local allocator that allocates scheduler internal data structures like // scheduling nodes, internel nodes map, dependencies, etc. - ScopedArenaAllocator allocator(graph_->GetArenaStack()); CriticalPathSchedulingNodeSelector critical_path_selector; RandomSchedulingNodeSelector random_selector; SchedulingNodeSelector* selector = schedule_randomly @@ -792,7 +831,7 @@ void HInstructionScheduling::Run(bool only_optimize_loop_blocks, switch (instruction_set_) { #ifdef ART_ENABLE_CODEGEN_arm64 case InstructionSet::kArm64: { - arm64::HSchedulerARM64 scheduler(&allocator, selector); + arm64::HSchedulerARM64 scheduler(selector); scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks); scheduler.Schedule(graph_); break; @@ -802,7 +841,7 @@ void HInstructionScheduling::Run(bool only_optimize_loop_blocks, case InstructionSet::kThumb2: case InstructionSet::kArm: { arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_); - arm::HSchedulerARM scheduler(&allocator, selector, &arm_latency_visitor); + arm::HSchedulerARM scheduler(selector, &arm_latency_visitor); scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks); scheduler.Schedule(graph_); break; @@ -811,6 +850,7 @@ void HInstructionScheduling::Run(bool only_optimize_loop_blocks, default: break; } + return true; } } // namespace art diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h index dfa077f7de..d2dbeca924 100644 --- a/compiler/optimizing/scheduler.h +++ b/compiler/optimizing/scheduler.h @@ -23,7 +23,6 @@ #include "base/scoped_arena_containers.h" #include "base/time_utils.h" #include "code_generator.h" -#include "driver/compiler_driver.h" #include "load_store_analysis.h" #include "nodes.h" #include "optimization.h" @@ -251,34 +250,27 @@ class SchedulingNode : public DeletableArenaObject<kArenaAllocScheduler> { */ class SchedulingGraph : public ValueObject { public: - SchedulingGraph(const HScheduler* scheduler, ScopedArenaAllocator* allocator) + SchedulingGraph(const HScheduler* scheduler, + ScopedArenaAllocator* allocator, + const HeapLocationCollector* heap_location_collector) : scheduler_(scheduler), allocator_(allocator), contains_scheduling_barrier_(false), nodes_map_(allocator_->Adapter(kArenaAllocScheduler)), - heap_location_collector_(nullptr) {} + heap_location_collector_(heap_location_collector) {} SchedulingNode* AddNode(HInstruction* instr, bool is_scheduling_barrier = false) { std::unique_ptr<SchedulingNode> node( new (allocator_) SchedulingNode(instr, allocator_, is_scheduling_barrier)); SchedulingNode* result = node.get(); - nodes_map_.Insert(std::make_pair(instr, std::move(node))); + nodes_map_.insert(std::make_pair(instr, std::move(node))); contains_scheduling_barrier_ |= is_scheduling_barrier; AddDependencies(instr, is_scheduling_barrier); return result; } - void Clear() { - nodes_map_.Clear(); - contains_scheduling_barrier_ = false; - } - - void SetHeapLocationCollector(const HeapLocationCollector& heap_location_collector) { - heap_location_collector_ = &heap_location_collector; - } - SchedulingNode* GetNode(const HInstruction* instr) const { - auto it = nodes_map_.Find(instr); + auto it = nodes_map_.find(instr); if (it == nodes_map_.end()) { return nullptr; } else { @@ -294,7 +286,7 @@ class SchedulingGraph : public ValueObject { bool HasImmediateOtherDependency(const HInstruction* node, const HInstruction* other) const; size_t Size() const { - return nodes_map_.Size(); + return nodes_map_.size(); } // Dump the scheduling graph, in dot file format, appending it to the file @@ -310,12 +302,12 @@ class SchedulingGraph : public ValueObject { void AddOtherDependency(SchedulingNode* node, SchedulingNode* dependency) { AddDependency(node, dependency, /*is_data_dependency*/false); } - bool HasMemoryDependency(const HInstruction* node, const HInstruction* other) const; + bool HasMemoryDependency(HInstruction* node, HInstruction* other) const; bool HasExceptionDependency(const HInstruction* node, const HInstruction* other) const; - bool HasSideEffectDependency(const HInstruction* node, const HInstruction* other) const; - bool ArrayAccessMayAlias(const HInstruction* node, const HInstruction* other) const; + bool HasSideEffectDependency(HInstruction* node, HInstruction* other) const; + bool ArrayAccessMayAlias(HInstruction* node, HInstruction* other) const; bool FieldAccessMayAlias(const HInstruction* node, const HInstruction* other) const; - size_t ArrayAccessHeapLocation(HInstruction* array, HInstruction* index) const; + size_t ArrayAccessHeapLocation(HInstruction* instruction) const; size_t FieldAccessHeapLocation(HInstruction* obj, const FieldInfo* field) const; // Add dependencies nodes for the given `HInstruction`: inputs, environments, and side-effects. @@ -329,7 +321,7 @@ class SchedulingGraph : public ValueObject { ScopedArenaHashMap<const HInstruction*, std::unique_ptr<SchedulingNode>> nodes_map_; - const HeapLocationCollector* heap_location_collector_; + const HeapLocationCollector* const heap_location_collector_; }; /* @@ -346,7 +338,7 @@ class SchedulingLatencyVisitor : public HGraphDelegateVisitor { last_visited_latency_(0), last_visited_internal_latency_(0) {} - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Error visiting " << instruction->DebugName() << ". " "Architecture-specific scheduling latency visitors must handle all instructions" " (potentially by overriding the generic `VisitInstruction()`."; @@ -377,6 +369,7 @@ class SchedulingLatencyVisitor : public HGraphDelegateVisitor { class SchedulingNodeSelector : public ArenaObject<kArenaAllocScheduler> { public: + virtual void Reset() {} virtual SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) = 0; virtual ~SchedulingNodeSelector() {} @@ -398,7 +391,7 @@ class RandomSchedulingNodeSelector : public SchedulingNodeSelector { } SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes, - const SchedulingGraph& graph) OVERRIDE { + const SchedulingGraph& graph) override { UNUSED(graph); DCHECK(!nodes->empty()); size_t select = rand_r(&seed_) % nodes->size(); @@ -418,8 +411,9 @@ class CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector { public: CriticalPathSchedulingNodeSelector() : prev_select_(nullptr) {} + void Reset() override { prev_select_ = nullptr; } SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes, - const SchedulingGraph& graph) OVERRIDE; + const SchedulingGraph& graph) override; protected: SchedulingNode* GetHigherPrioritySchedulingNode(SchedulingNode* candidate, @@ -434,16 +428,11 @@ class CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector { class HScheduler { public: - HScheduler(ScopedArenaAllocator* allocator, - SchedulingLatencyVisitor* latency_visitor, - SchedulingNodeSelector* selector) - : allocator_(allocator), - latency_visitor_(latency_visitor), + HScheduler(SchedulingLatencyVisitor* latency_visitor, SchedulingNodeSelector* selector) + : latency_visitor_(latency_visitor), selector_(selector), only_optimize_loop_blocks_(true), - scheduling_graph_(this, allocator), - cursor_(nullptr), - candidates_(allocator_->Adapter(kArenaAllocScheduler)) {} + cursor_(nullptr) {} virtual ~HScheduler() {} void Schedule(HGraph* graph); @@ -454,8 +443,9 @@ class HScheduler { virtual bool IsSchedulingBarrier(const HInstruction* instruction) const; protected: - void Schedule(HBasicBlock* block); - void Schedule(SchedulingNode* scheduling_node); + void Schedule(HBasicBlock* block, const HeapLocationCollector* heap_location_collector); + void Schedule(SchedulingNode* scheduling_node, + /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates); void Schedule(HInstruction* instruction); // Any instruction returning `false` via this method will prevent its @@ -476,19 +466,12 @@ class HScheduler { node->SetInternalLatency(latency_visitor_->GetLastVisitedInternalLatency()); } - ScopedArenaAllocator* const allocator_; SchedulingLatencyVisitor* const latency_visitor_; SchedulingNodeSelector* const selector_; bool only_optimize_loop_blocks_; - // We instantiate the members below as part of this class to avoid - // instantiating them locally for every chunk scheduled. - SchedulingGraph scheduling_graph_; // A pointer indicating where the next instruction to be scheduled will be inserted. HInstruction* cursor_; - // The list of candidates for scheduling. A node becomes a candidate when all - // its predecessors have been scheduled. - ScopedArenaVector<SchedulingNode*> candidates_; private: DISALLOW_COPY_AND_ASSIGN(HScheduler); @@ -508,10 +491,11 @@ class HInstructionScheduling : public HOptimization { codegen_(cg), instruction_set_(instruction_set) {} - void Run() { - Run(/*only_optimize_loop_blocks*/ true, /*schedule_randomly*/ false); + bool Run() override { + return Run(/*only_optimize_loop_blocks*/ true, /*schedule_randomly*/ false); } - void Run(bool only_optimize_loop_blocks, bool schedule_randomly); + + bool Run(bool only_optimize_loop_blocks, bool schedule_randomly); static constexpr const char* kInstructionSchedulingPassName = "scheduler"; diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc index 8dcadaad2e..858a555e97 100644 --- a/compiler/optimizing/scheduler_arm.cc +++ b/compiler/optimizing/scheduler_arm.cc @@ -563,7 +563,7 @@ void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* last_visited_internal_latency_ = kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; } else { - HandleGenerateDataProcInstruction(/* internal_latency */ true); + HandleGenerateDataProcInstruction(/* internal_latency= */ true); HandleGenerateDataProcInstruction(); } } @@ -585,8 +585,8 @@ void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifte DCHECK_LT(shift_value, 32U); if (kind == HInstruction::kOr || kind == HInstruction::kXor) { - HandleGenerateDataProcInstruction(/* internal_latency */ true); - HandleGenerateDataProcInstruction(/* internal_latency */ true); + HandleGenerateDataProcInstruction(/* internal_latency= */ true); + HandleGenerateDataProcInstruction(/* internal_latency= */ true); HandleGenerateDataProcInstruction(); } else { last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; @@ -679,7 +679,7 @@ void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) { } else { last_visited_internal_latency_ += kArmIntegerOpLatency; } - last_visited_internal_latency_ = kArmMemoryLoadLatency; + last_visited_latency_ = kArmMemoryLoadLatency; } } break; diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h index 0cb8684376..4c7a3bb4d6 100644 --- a/compiler/optimizing/scheduler_arm.h +++ b/compiler/optimizing/scheduler_arm.h @@ -55,7 +55,7 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {} // Default visitor for instructions not handled specifically below. - void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) { + void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { last_visited_latency_ = kArmIntegerOpLatency; } @@ -100,7 +100,7 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { M(DataProcWithShifterOp, unused) #define DECLARE_VISIT_INSTRUCTION(type, unused) \ - void Visit##type(H##type* instruction) OVERRIDE; + void Visit##type(H##type* instruction) override; FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) @@ -137,13 +137,12 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { class HSchedulerARM : public HScheduler { public: - HSchedulerARM(ScopedArenaAllocator* allocator, - SchedulingNodeSelector* selector, + HSchedulerARM(SchedulingNodeSelector* selector, SchedulingLatencyVisitorARM* arm_latency_visitor) - : HScheduler(allocator, arm_latency_visitor, selector) {} - ~HSchedulerARM() OVERRIDE {} + : HScheduler(arm_latency_visitor, selector) {} + ~HSchedulerARM() override {} - bool IsSchedulable(const HInstruction* instruction) const OVERRIDE { + bool IsSchedulable(const HInstruction* instruction) const override { #define CASE_INSTRUCTION_KIND(type, unused) case \ HInstruction::InstructionKind::k##type: switch (instruction->GetKind()) { diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h index f71cb5b784..ba5a743545 100644 --- a/compiler/optimizing/scheduler_arm64.h +++ b/compiler/optimizing/scheduler_arm64.h @@ -58,7 +58,7 @@ static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10; class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { public: // Default visitor for instructions not handled specifically below. - void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) { + void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { last_visited_latency_ = kArm64IntegerOpLatency; } @@ -68,12 +68,10 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { M(ArrayGet , unused) \ M(ArrayLength , unused) \ M(ArraySet , unused) \ - M(BinaryOperation , unused) \ M(BoundsCheck , unused) \ M(Div , unused) \ M(InstanceFieldGet , unused) \ M(InstanceOf , unused) \ - M(Invoke , unused) \ M(LoadString , unused) \ M(Mul , unused) \ M(NewArray , unused) \ @@ -108,6 +106,10 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { M(VecLoad , unused) \ M(VecStore , unused) +#define FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(M) \ + M(BinaryOperation , unused) \ + M(Invoke , unused) + #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ M(BitwiseNegatedRight, unused) \ M(MultiplyAccumulate, unused) \ @@ -116,9 +118,10 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { M(DataProcWithShifterOp, unused) #define DECLARE_VISIT_INSTRUCTION(type, unused) \ - void Visit##type(H##type* instruction) OVERRIDE; + void Visit##type(H##type* instruction) override; FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) @@ -131,11 +134,11 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { class HSchedulerARM64 : public HScheduler { public: - HSchedulerARM64(ScopedArenaAllocator* allocator, SchedulingNodeSelector* selector) - : HScheduler(allocator, &arm64_latency_visitor_, selector) {} - ~HSchedulerARM64() OVERRIDE {} + explicit HSchedulerARM64(SchedulingNodeSelector* selector) + : HScheduler(&arm64_latency_visitor_, selector) {} + ~HSchedulerARM64() override {} - bool IsSchedulable(const HInstruction* instruction) const OVERRIDE { + bool IsSchedulable(const HInstruction* instruction) const override { #define CASE_INSTRUCTION_KIND(type, unused) case \ HInstruction::InstructionKind::k##type: switch (instruction->GetKind()) { @@ -157,7 +160,7 @@ class HSchedulerARM64 : public HScheduler { // SIMD&FP registers are callee saved) so don't reorder such vector instructions. // // TODO: remove this when a proper support of SIMD registers is introduced to the compiler. - bool IsSchedulingBarrier(const HInstruction* instr) const OVERRIDE { + bool IsSchedulingBarrier(const HInstruction* instr) const override { return HScheduler::IsSchedulingBarrier(instr) || instr->IsVecReduce() || instr->IsVecExtractScalar() || diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc index fb15fc8975..e0e265a04c 100644 --- a/compiler/optimizing/scheduler_test.cc +++ b/compiler/optimizing/scheduler_test.cc @@ -146,7 +146,9 @@ class SchedulerTest : public OptimizingUnitTest { environment->SetRawEnvAt(1, mul); mul->AddEnvUseAt(div_check->GetEnvironment(), 1); - SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator()); + SchedulingGraph scheduling_graph(scheduler, + GetScopedAllocator(), + /* heap_location_collector= */ nullptr); // Instructions must be inserted in reverse order into the scheduling graph. for (HInstruction* instr : ReverseRange(block_instructions)) { scheduling_graph.AddNode(instr); @@ -169,7 +171,9 @@ class SchedulerTest : public OptimizingUnitTest { ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1)); ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2)); ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1)); - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1)); + // Unnecessary dependency is not stored, we rely on transitive dependencies. + // The array_set2 -> array_get2 -> array_set1 dependencies are tested above. + ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1)); // Env dependency. ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul)); @@ -192,7 +196,9 @@ class SchedulerTest : public OptimizingUnitTest { HInstructionScheduling scheduling(graph, target_config.GetInstructionSet()); scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); RunCode(target_config, + *compiler_options_, graph, [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); }, has_result, expected); @@ -274,11 +280,10 @@ class SchedulerTest : public OptimizingUnitTest { entry->AddInstruction(instr); } - SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator()); HeapLocationCollector heap_location_collector(graph_); heap_location_collector.VisitBasicBlock(entry); heap_location_collector.BuildAliasingMatrix(); - scheduling_graph.SetHeapLocationCollector(heap_location_collector); + SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator(), &heap_location_collector); for (HInstruction* instr : ReverseRange(block_instructions)) { // Build scheduling graph with memory access aliasing information @@ -296,47 +301,53 @@ class SchedulerTest : public OptimizingUnitTest { size_t loc2 = HeapLocationCollector::kHeapLocationNotFound; // Test side effect dependency: array[0] and array[1] - loc1 = heap_location_collector.GetArrayHeapLocation(arr, c0); - loc2 = heap_location_collector.GetArrayHeapLocation(arr, c1); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_0); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_1); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_1, arr_set_0)); // Test side effect dependency based on LSA analysis: array[i] and array[j] - loc1 = heap_location_collector.GetArrayHeapLocation(arr, i); - loc2 = heap_location_collector.GetArrayHeapLocation(arr, j); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_j); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_i)); + // Unnecessary dependency is not stored, we rely on transitive dependencies. + // The arr_set_j -> arr_set_sub0 -> arr_set_add0 -> arr_set_i dependencies are tested below. + ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_i)); // Test side effect dependency based on LSA analysis: array[i] and array[i+0] - loc1 = heap_location_collector.GetArrayHeapLocation(arr, i); - loc2 = heap_location_collector.GetArrayHeapLocation(arr, add0); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_add0); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_add0, arr_set_i)); // Test side effect dependency based on LSA analysis: array[i] and array[i-0] - loc1 = heap_location_collector.GetArrayHeapLocation(arr, i); - loc2 = heap_location_collector.GetArrayHeapLocation(arr, sub0); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_sub0); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub0, arr_set_i)); + // Unnecessary dependency is not stored, we rely on transitive dependencies. + ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub0, arr_set_i)); + // Instead, we rely on arr_set_sub0 -> arr_set_add0 -> arr_set_i, the latter is tested above. + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub0, arr_set_add0)); // Test side effect dependency based on LSA analysis: array[i] and array[i+1] - loc1 = heap_location_collector.GetArrayHeapLocation(arr, i); - loc2 = heap_location_collector.GetArrayHeapLocation(arr, add1); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_add1); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_add1, arr_set_i)); // Test side effect dependency based on LSA analysis: array[i+1] and array[i-1] - loc1 = heap_location_collector.GetArrayHeapLocation(arr, add1); - loc2 = heap_location_collector.GetArrayHeapLocation(arr, sub1); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_add1); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_sub1); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub1, arr_set_add1)); // Test side effect dependency based on LSA analysis: array[j] and all others array accesses - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_i)); - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_add0)); ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_sub0)); ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_add1)); ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_sub1)); + // Unnecessary dependencies are not stored, we rely on transitive dependencies. + ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_i)); + ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_add0)); // Test that ArraySet and FieldSet should not have side effect dependency ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_i, set_field10)); @@ -352,13 +363,13 @@ class SchedulerTest : public OptimizingUnitTest { #if defined(ART_ENABLE_CODEGEN_arm64) TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM64) { CriticalPathSchedulingNodeSelector critical_path_selector; - arm64::HSchedulerARM64 scheduler(GetScopedAllocator(), &critical_path_selector); + arm64::HSchedulerARM64 scheduler(&critical_path_selector); TestBuildDependencyGraphAndSchedule(&scheduler); } TEST_F(SchedulerTest, ArrayAccessAliasingARM64) { CriticalPathSchedulingNodeSelector critical_path_selector; - arm64::HSchedulerARM64 scheduler(GetScopedAllocator(), &critical_path_selector); + arm64::HSchedulerARM64 scheduler(&critical_path_selector); TestDependencyGraphOnAliasingArrayAccesses(&scheduler); } #endif @@ -367,14 +378,14 @@ TEST_F(SchedulerTest, ArrayAccessAliasingARM64) { TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM) { CriticalPathSchedulingNodeSelector critical_path_selector; arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr); - arm::HSchedulerARM scheduler(GetScopedAllocator(), &critical_path_selector, &arm_latency_visitor); + arm::HSchedulerARM scheduler(&critical_path_selector, &arm_latency_visitor); TestBuildDependencyGraphAndSchedule(&scheduler); } TEST_F(SchedulerTest, ArrayAccessAliasingARM) { CriticalPathSchedulingNodeSelector critical_path_selector; arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr); - arm::HSchedulerARM scheduler(GetScopedAllocator(), &critical_path_selector, &arm_latency_visitor); + arm::HSchedulerARM scheduler(&critical_path_selector, &arm_latency_visitor); TestDependencyGraphOnAliasingArrayAccesses(&scheduler); } #endif diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc index 66e51421ca..dcc7f77fc2 100644 --- a/compiler/optimizing/select_generator.cc +++ b/compiler/optimizing/select_generator.cc @@ -16,6 +16,7 @@ #include "select_generator.h" +#include "base/scoped_arena_containers.h" #include "reference_type_propagation.h" namespace art { @@ -43,12 +44,18 @@ static bool IsSimpleBlock(HBasicBlock* block) { for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); if (instruction->IsControlFlow()) { - if (num_instructions > kMaxInstructionsInBranch) { - return false; - } return instruction->IsGoto() || instruction->IsReturn(); - } else if (instruction->CanBeMoved() && !instruction->HasSideEffects()) { - num_instructions++; + } else if (instruction->CanBeMoved() && + !instruction->HasSideEffects() && + !instruction->CanThrow()) { + if (instruction->IsSelect() && + instruction->AsSelect()->GetCondition()->GetBlock() == block) { + // Count one HCondition and HSelect in the same block as a single instruction. + // This enables finding nested selects. + continue; + } else if (++num_instructions > kMaxInstructionsInBranch) { + return false; // bail as soon as we exceed number of allowed instructions + } } else { return false; } @@ -85,10 +92,15 @@ static HPhi* GetSingleChangedPhi(HBasicBlock* block, size_t index1, size_t index return select_phi; } -void HSelectGenerator::Run() { +bool HSelectGenerator::Run() { + bool didSelect = false; + // Select cache with local allocator. + ScopedArenaAllocator allocator(graph_->GetArenaStack()); + ScopedArenaSafeMap<HInstruction*, HSelect*> cache( + std::less<HInstruction*>(), allocator.Adapter(kArenaAllocSelectGenerator)); + // Iterate in post order in the unlikely case that removing one occurrence of // the selection pattern empties a branch block of another occurrence. - // Otherwise the order does not matter. for (HBasicBlock* block : graph_->GetPostOrder()) { if (!block->EndsWithIf()) continue; @@ -97,6 +109,7 @@ void HSelectGenerator::Run() { HBasicBlock* true_block = if_instruction->IfTrueSuccessor(); HBasicBlock* false_block = if_instruction->IfFalseSuccessor(); DCHECK_NE(true_block, false_block); + if (!IsSimpleBlock(true_block) || !IsSimpleBlock(false_block) || !BlocksMergeTogether(true_block, false_block)) { @@ -107,11 +120,15 @@ void HSelectGenerator::Run() { // If the branches are not empty, move instructions in front of the If. // TODO(dbrazdil): This puts an instruction between If and its condition. // Implement moving of conditions to first users if possible. - if (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) { - true_block->GetFirstInstruction()->MoveBefore(if_instruction); + while (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) { + HInstruction* instr = true_block->GetFirstInstruction(); + DCHECK(!instr->CanThrow()); + instr->MoveBefore(if_instruction); } - if (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) { - false_block->GetFirstInstruction()->MoveBefore(if_instruction); + while (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) { + HInstruction* instr = false_block->GetFirstInstruction(); + DCHECK(!instr->CanThrow()); + instr->MoveBefore(if_instruction); } DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn()); DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn()); @@ -138,7 +155,8 @@ void HSelectGenerator::Run() { DCHECK(both_successors_return || phi != nullptr); // Create the Select instruction and insert it in front of the If. - HSelect* select = new (graph_->GetAllocator()) HSelect(if_instruction->InputAt(0), + HInstruction* condition = if_instruction->InputAt(0); + HSelect* select = new (graph_->GetAllocator()) HSelect(condition, true_value, false_value, if_instruction->GetDexPc()); @@ -175,12 +193,34 @@ void HSelectGenerator::Run() { MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated); + // Very simple way of finding common subexpressions in the generated HSelect statements + // (since this runs after GVN). Lookup by condition, and reuse latest one if possible + // (due to post order, latest select is most likely replacement). If needed, we could + // improve this by e.g. using the operands in the map as well. + auto it = cache.find(condition); + if (it == cache.end()) { + cache.Put(condition, select); + } else { + // Found cached value. See if latest can replace cached in the HIR. + HSelect* cached = it->second; + DCHECK_EQ(cached->GetCondition(), select->GetCondition()); + if (cached->GetTrueValue() == select->GetTrueValue() && + cached->GetFalseValue() == select->GetFalseValue() && + select->StrictlyDominates(cached)) { + cached->ReplaceWith(select); + cached->GetBlock()->RemoveInstruction(cached); + } + it->second = select; // always cache latest + } + // No need to update dominance information, as we are simplifying // a simple diamond shape, where the join block is merged with the // entry block. Any following blocks would have had the join block // as a dominator, and `MergeWith` handles changing that to the // entry block. + didSelect = true; } + return didSelect; } } // namespace art diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h index bda57fd5c8..2889166f60 100644 --- a/compiler/optimizing/select_generator.h +++ b/compiler/optimizing/select_generator.h @@ -68,7 +68,7 @@ class HSelectGenerator : public HOptimization { OptimizingCompilerStats* stats, const char* name = kSelectGeneratorPassName); - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kSelectGeneratorPassName = "select_generator"; diff --git a/compiler/optimizing/select_generator_test.cc b/compiler/optimizing/select_generator_test.cc new file mode 100644 index 0000000000..6e6549737c --- /dev/null +++ b/compiler/optimizing/select_generator_test.cc @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "select_generator.h" + +#include "base/arena_allocator.h" +#include "builder.h" +#include "nodes.h" +#include "optimizing_unit_test.h" +#include "side_effects_analysis.h" + +namespace art { + +class SelectGeneratorTest : public ImprovedOptimizingUnitTest { + public: + void ConstructBasicGraphForSelect(HInstruction* instr) { + HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* then_block = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* else_block = new (GetAllocator()) HBasicBlock(graph_); + + graph_->AddBlock(if_block); + graph_->AddBlock(then_block); + graph_->AddBlock(else_block); + + entry_block_->ReplaceSuccessor(return_block_, if_block); + + if_block->AddSuccessor(then_block); + if_block->AddSuccessor(else_block); + then_block->AddSuccessor(return_block_); + else_block->AddSuccessor(return_block_); + + HParameterValue* bool_param = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 1, + DataType::Type::kBool); + entry_block_->AddInstruction(bool_param); + HIntConstant* const1 = graph_->GetIntConstant(1); + + if_block->AddInstruction(new (GetAllocator()) HIf(bool_param)); + + then_block->AddInstruction(instr); + then_block->AddInstruction(new (GetAllocator()) HGoto()); + + else_block->AddInstruction(new (GetAllocator()) HGoto()); + + HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32); + return_block_->AddPhi(phi); + phi->AddInput(instr); + phi->AddInput(const1); + } + + bool CheckGraphAndTrySelectGenerator() { + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + SideEffectsAnalysis side_effects(graph_); + side_effects.Run(); + return HSelectGenerator(graph_, /*handles*/ nullptr, /*stats*/ nullptr).Run(); + } +}; + +// HDivZeroCheck might throw and should not be hoisted from the conditional to an unconditional. +TEST_F(SelectGeneratorTest, testZeroCheck) { + InitGraph(); + HDivZeroCheck* instr = new (GetAllocator()) HDivZeroCheck(parameter_, 0); + ConstructBasicGraphForSelect(instr); + + ArenaVector<HInstruction*> current_locals({parameter_, graph_->GetIntConstant(1)}, + GetAllocator()->Adapter(kArenaAllocInstruction)); + ManuallyBuildEnvFor(instr, ¤t_locals); + + EXPECT_FALSE(CheckGraphAndTrySelectGenerator()); +} + +// Test that SelectGenerator succeeds with HAdd. +TEST_F(SelectGeneratorTest, testAdd) { + InitGraph(); + HAdd* instr = new (GetAllocator()) HAdd(DataType::Type::kInt32, parameter_, parameter_, 0); + ConstructBasicGraphForSelect(instr); + EXPECT_TRUE(CheckGraphAndTrySelectGenerator()); +} + +} // namespace art diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 1e49411c72..8637db13ad 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -21,7 +21,6 @@ #include "base/enums.h" #include "class_linker.h" #include "code_generator.h" -#include "driver/compiler_driver.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "gc/heap.h" @@ -36,23 +35,6 @@ namespace art { -void HSharpening::Run() { - // We don't care about the order of the blocks here. - for (HBasicBlock* block : graph_->GetReversePostOrder()) { - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* instruction = it.Current(); - if (instruction->IsInvokeStaticOrDirect()) { - SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), - codegen_, - compiler_driver_); - } - // TODO: Move the sharpening of invoke-virtual/-interface/-super from HGraphBuilder - // here. Rewrite it to avoid the CompilerDriver's reliance on verifier data - // because we know the type better when inlining. - } - } -} - static bool IsInBootImage(ArtMethod* method) { const std::vector<gc::space::ImageSpace*>& image_spaces = Runtime::Current()->GetHeap()->GetBootImageSpaces(); @@ -65,34 +47,23 @@ static bool IsInBootImage(ArtMethod* method) { return false; } -static bool AOTCanEmbedMethod(ArtMethod* method, const CompilerOptions& options) { - return IsInBootImage(method) && !options.GetCompilePic(); -} - -static bool BootImageAOTCanEmbedMethod(ArtMethod* method, CompilerDriver* compiler_driver) { - DCHECK(compiler_driver->GetCompilerOptions().IsBootImage()); - if (!compiler_driver->GetSupportBootImageFixup()) { - return false; - } +static bool BootImageAOTCanEmbedMethod(ArtMethod* method, const CompilerOptions& compiler_options) { + DCHECK(compiler_options.IsBootImage()); ScopedObjectAccess soa(Thread::Current()); ObjPtr<mirror::Class> klass = method->GetDeclaringClass(); DCHECK(klass != nullptr); const DexFile& dex_file = klass->GetDexFile(); - return compiler_driver->IsImageClass(dex_file.StringByTypeIdx(klass->GetDexTypeIndex())); + return compiler_options.IsImageClass(dex_file.StringByTypeIdx(klass->GetDexTypeIndex())); } -void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, - CodeGenerator* codegen, - CompilerDriver* compiler_driver) { - if (invoke->IsStringInit()) { - // Not using the dex cache arrays. But we could still try to use a better dispatch... - // TODO: Use direct_method and direct_code for the appropriate StringFactory method. - return; +HInvokeStaticOrDirect::DispatchInfo HSharpening::SharpenInvokeStaticOrDirect( + ArtMethod* callee, CodeGenerator* codegen) { + if (kIsDebugBuild) { + ScopedObjectAccess soa(Thread::Current()); // Required for GetDeclaringClass below. + DCHECK(callee != nullptr); + DCHECK(!(callee->IsConstructor() && callee->GetDeclaringClass()->IsStringClass())); } - ArtMethod* callee = invoke->GetResolvedMethod(); - DCHECK(callee != nullptr); - HInvokeStaticOrDirect::MethodLoadKind method_load_kind; HInvokeStaticOrDirect::CodePtrLocation code_ptr_location; uint64_t method_load_data = 0u; @@ -110,23 +81,34 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, // We don't optimize for debuggable as it would prevent us from obsoleting the method in some // situations. + const CompilerOptions& compiler_options = codegen->GetCompilerOptions(); if (callee == codegen->GetGraph()->GetArtMethod() && !codegen->GetGraph()->IsDebuggable()) { // Recursive call. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf; - } else if (Runtime::Current()->UseJitCompilation() || - AOTCanEmbedMethod(callee, codegen->GetCompilerOptions())) { + } else if (compiler_options.IsBootImage()) { + if (!compiler_options.GetCompilePic()) { + // Test configuration, do not sharpen. + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall; + } else if (BootImageAOTCanEmbedMethod(callee, compiler_options)) { + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative; + } else { + // Use PC-relative access to the .bss methods array. + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBssEntry; + } + code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; + } else if (Runtime::Current()->UseJitCompilation()) { // JIT or on-device AOT compilation referencing a boot image method. // Use the method address directly. - method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress; + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress; method_load_data = reinterpret_cast<uintptr_t>(callee); code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; - } else if (codegen->GetCompilerOptions().IsBootImage() && - BootImageAOTCanEmbedMethod(callee, compiler_driver)) { - method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative; + } else if (IsInBootImage(callee)) { + // Use PC-relative access to the .data.bimg.rel.ro methods array. + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } else { - // Use PC-relative access to the .bss methods arrays. + // Use PC-relative access to the .bss methods array. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBssEntry; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } @@ -140,15 +122,12 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, HInvokeStaticOrDirect::DispatchInfo desired_dispatch_info = { method_load_kind, code_ptr_location, method_load_data }; - HInvokeStaticOrDirect::DispatchInfo dispatch_info = - codegen->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, invoke); - invoke->SetDispatchInfo(dispatch_info); + return codegen->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, callee); } HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( HLoadClass* load_class, CodeGenerator* codegen, - CompilerDriver* compiler_driver, const DexCompilationUnit& dex_compilation_unit) { Handle<mirror::Class> klass = load_class->GetClass(); DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kRuntimeCall || @@ -172,29 +151,29 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( bool is_in_boot_image = false; HLoadClass::LoadKind desired_load_kind = HLoadClass::LoadKind::kInvalid; Runtime* runtime = Runtime::Current(); - if (codegen->GetCompilerOptions().IsBootImage()) { + const CompilerOptions& compiler_options = codegen->GetCompilerOptions(); + if (compiler_options.IsBootImage()) { // Compiling boot image. Check if the class is a boot image class. DCHECK(!runtime->UseJitCompilation()); - if (!compiler_driver->GetSupportBootImageFixup()) { - // compiler_driver_test. Do not sharpen. + if (!compiler_options.GetCompilePic()) { + // Test configuration, do not sharpen. desired_load_kind = HLoadClass::LoadKind::kRuntimeCall; } else if ((klass != nullptr) && - compiler_driver->IsImageClass(dex_file.StringByTypeIdx(type_index))) { + compiler_options.IsImageClass(dex_file.StringByTypeIdx(type_index))) { is_in_boot_image = true; desired_load_kind = HLoadClass::LoadKind::kBootImageLinkTimePcRelative; } else { // Not a boot image class. - DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file)); + DCHECK(ContainsElement(compiler_options.GetDexFilesForOatFile(), &dex_file)); desired_load_kind = HLoadClass::LoadKind::kBssEntry; } } else { is_in_boot_image = (klass != nullptr) && runtime->GetHeap()->ObjectIsInBootImageSpace(klass.Get()); if (runtime->UseJitCompilation()) { - DCHECK(!codegen->GetCompilerOptions().GetCompilePic()); + DCHECK(!compiler_options.GetCompilePic()); if (is_in_boot_image) { - // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787 - desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; + desired_load_kind = HLoadClass::LoadKind::kJitBootImageAddress; } else if (klass != nullptr) { desired_load_kind = HLoadClass::LoadKind::kJitTableAddress; } else { @@ -206,11 +185,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( } } else if (is_in_boot_image) { // AOT app compilation, boot image class. - if (codegen->GetCompilerOptions().GetCompilePic()) { - desired_load_kind = HLoadClass::LoadKind::kBootImageClassTable; - } else { - desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; - } + desired_load_kind = HLoadClass::LoadKind::kBootImageRelRo; } else { // Not JIT and the klass is not in boot image. desired_load_kind = HLoadClass::LoadKind::kBssEntry; @@ -236,10 +211,75 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( return load_kind; } +static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass, CodeGenerator* codegen) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(!klass->IsProxyClass()); + DCHECK(!klass->IsArrayClass()); + + if (Runtime::Current()->UseJitCompilation()) { + // If we're JITting, try to assign a type check bitstring (fall through). + } else if (codegen->GetCompilerOptions().IsBootImage()) { + const char* descriptor = klass->GetDexFile().StringByTypeIdx(klass->GetDexTypeIndex()); + if (!codegen->GetCompilerOptions().IsImageClass(descriptor)) { + return false; + } + // If the target is a boot image class, try to assign a type check bitstring (fall through). + // (If --force-determinism, this was already done; repeating is OK and yields the same result.) + } else { + // TODO: Use the bitstring also for AOT app compilation if the target class has a bitstring + // already assigned in the boot image. + return false; + } + + // Try to assign a type check bitstring. + MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); + if ((false) && // FIXME: Inliner does not respect CompilerDriver::ShouldCompileMethod() + // and we're hitting an unassigned bitstring in dex2oat_image_test. b/26687569 + kIsDebugBuild && + codegen->GetCompilerOptions().IsBootImage() && + codegen->GetCompilerOptions().IsForceDeterminism()) { + SubtypeCheckInfo::State old_state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass); + CHECK(old_state == SubtypeCheckInfo::kAssigned || old_state == SubtypeCheckInfo::kOverflowed) + << klass->PrettyDescriptor() << "/" << old_state + << " in " << codegen->GetGraph()->PrettyMethod(); + } + SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass); + return state == SubtypeCheckInfo::kAssigned; +} + +TypeCheckKind HSharpening::ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, + CodeGenerator* codegen, + bool needs_access_check) { + if (klass == nullptr) { + return TypeCheckKind::kUnresolvedCheck; + } else if (klass->IsInterface()) { + return TypeCheckKind::kInterfaceCheck; + } else if (klass->IsArrayClass()) { + if (klass->GetComponentType()->IsObjectClass()) { + return TypeCheckKind::kArrayObjectCheck; + } else if (klass->CannotBeAssignedFromOtherTypes()) { + return TypeCheckKind::kExactCheck; + } else { + return TypeCheckKind::kArrayCheck; + } + } else if (klass->IsFinal()) { // TODO: Consider using bitstring for final classes. + return TypeCheckKind::kExactCheck; + } else if (kBitstringSubtypeCheckEnabled && + !needs_access_check && + CanUseTypeCheckBitstring(klass, codegen)) { + // TODO: We should not need the `!needs_access_check` check but getting rid of that + // requires rewriting some optimizations in instruction simplifier. + return TypeCheckKind::kBitstringCheck; + } else if (klass->IsAbstract()) { + return TypeCheckKind::kAbstractClassCheck; + } else { + return TypeCheckKind::kClassHierarchyCheck; + } +} + void HSharpening::ProcessLoadString( HLoadString* load_string, CodeGenerator* codegen, - CompilerDriver* compiler_driver, const DexCompilationUnit& dex_compilation_unit, VariableSizedHandleScope* handles) { DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall); @@ -258,17 +298,33 @@ void HSharpening::ProcessLoadString( : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); ObjPtr<mirror::String> string = nullptr; - if (codegen->GetCompilerOptions().IsBootImage()) { + const CompilerOptions& compiler_options = codegen->GetCompilerOptions(); + if (compiler_options.IsBootImage()) { // Compiling boot image. Resolve the string and allocate it if needed, to ensure // the string will be added to the boot image. DCHECK(!runtime->UseJitCompilation()); - string = class_linker->ResolveString(string_index, dex_cache); - CHECK(string != nullptr); - if (compiler_driver->GetSupportBootImageFixup()) { - DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file)); - desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative; + if (compiler_options.GetCompilePic()) { + DCHECK(ContainsElement(compiler_options.GetDexFilesForOatFile(), &dex_file)); + if (compiler_options.IsForceDeterminism()) { + // Strings for methods we're compiling should be pre-resolved but Strings in inlined + // methods may not be if these inlined methods are not in the boot image profile. + // Multiple threads allocating new Strings can cause non-deterministic boot image + // because of the image relying on the order of GC roots we walk. (We could fix that + // by ordering the roots we walk in ImageWriter.) Therefore we avoid allocating these + // strings even if that results in omitting them from the boot image and using the + // sub-optimal load kind kBssEntry. + string = class_linker->LookupString(string_index, dex_cache.Get()); + } else { + string = class_linker->ResolveString(string_index, dex_cache); + CHECK(string != nullptr); + } + if (string != nullptr) { + desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative; + } else { + desired_load_kind = HLoadString::LoadKind::kBssEntry; + } } else { - // compiler_driver_test. Do not sharpen. + // Test configuration, do not sharpen. desired_load_kind = HLoadString::LoadKind::kRuntimeCall; } } else if (runtime->UseJitCompilation()) { @@ -276,7 +332,7 @@ void HSharpening::ProcessLoadString( string = class_linker->LookupString(string_index, dex_cache.Get()); if (string != nullptr) { if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { - desired_load_kind = HLoadString::LoadKind::kBootImageAddress; + desired_load_kind = HLoadString::LoadKind::kJitBootImageAddress; } else { desired_load_kind = HLoadString::LoadKind::kJitTableAddress; } @@ -287,11 +343,7 @@ void HSharpening::ProcessLoadString( // AOT app compilation. Try to lookup the string without allocating if not found. string = class_linker->LookupString(string_index, dex_cache.Get()); if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { - if (codegen->GetCompilerOptions().GetCompilePic()) { - desired_load_kind = HLoadString::LoadKind::kBootImageInternTable; - } else { - desired_load_kind = HLoadString::LoadKind::kBootImageAddress; - } + desired_load_kind = HLoadString::LoadKind::kBootImageRelRo; } else { desired_load_kind = HLoadString::LoadKind::kBssEntry; } diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h index 6df7d6d91e..b81867201f 100644 --- a/compiler/optimizing/sharpening.h +++ b/compiler/optimizing/sharpening.h @@ -23,49 +23,33 @@ namespace art { class CodeGenerator; -class CompilerDriver; class DexCompilationUnit; -// Optimization that tries to improve the way we dispatch methods and access types, -// fields, etc. Besides actual method sharpening based on receiver type (for example -// virtual->direct), this includes selecting the best available dispatch for -// invoke-static/-direct based on code generator support. -class HSharpening : public HOptimization { +// Utility methods that try to improve the way we dispatch methods, and access +// types and strings. +class HSharpening { public: - HSharpening(HGraph* graph, - CodeGenerator* codegen, - CompilerDriver* compiler_driver, - const char* name = kSharpeningPassName) - : HOptimization(graph, name), - codegen_(codegen), - compiler_driver_(compiler_driver) { } - - void Run() OVERRIDE; - - static constexpr const char* kSharpeningPassName = "sharpening"; - - // Used by the builder. - static void ProcessLoadString(HLoadString* load_string, - CodeGenerator* codegen, - CompilerDriver* compiler_driver, - const DexCompilationUnit& dex_compilation_unit, - VariableSizedHandleScope* handles); + // Used by the builder and InstructionSimplifier. + static HInvokeStaticOrDirect::DispatchInfo SharpenInvokeStaticOrDirect( + ArtMethod* callee, CodeGenerator* codegen); // Used by the builder and the inliner. static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class, CodeGenerator* codegen, - CompilerDriver* compiler_driver, const DexCompilationUnit& dex_compilation_unit) REQUIRES_SHARED(Locks::mutator_lock_); - // Used by Sharpening and InstructionSimplifier. - static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, - CodeGenerator* codegen, - CompilerDriver* compiler_driver); + // Used by the builder. + static TypeCheckKind ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, + CodeGenerator* codegen, + bool needs_access_check) + REQUIRES_SHARED(Locks::mutator_lock_); - private: - CodeGenerator* codegen_; - CompilerDriver* compiler_driver_; + // Used by the builder. + static void ProcessLoadString(HLoadString* load_string, + CodeGenerator* codegen, + const DexCompilationUnit& dex_compilation_unit, + VariableSizedHandleScope* handles); }; } // namespace art diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc index 6d82e8e06d..ba97b43de9 100644 --- a/compiler/optimizing/side_effects_analysis.cc +++ b/compiler/optimizing/side_effects_analysis.cc @@ -18,7 +18,7 @@ namespace art { -void SideEffectsAnalysis::Run() { +bool SideEffectsAnalysis::Run() { // Inlining might have created more blocks, so we need to increase the size // if needed. block_effects_.resize(graph_->GetBlocks().size()); @@ -69,6 +69,7 @@ void SideEffectsAnalysis::Run() { } } has_run_ = true; + return true; } SideEffects SideEffectsAnalysis::GetLoopEffects(HBasicBlock* block) const { diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h index c0f81a9c54..56a01e63f1 100644 --- a/compiler/optimizing/side_effects_analysis.h +++ b/compiler/optimizing/side_effects_analysis.h @@ -37,7 +37,7 @@ class SideEffectsAnalysis : public HOptimization { SideEffects GetBlockEffects(HBasicBlock* block) const; // Compute side effects of individual blocks and loops. - void Run(); + bool Run(); bool HasRun() const { return has_run_; } diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc index 97317124ef..cf26e79c69 100644 --- a/compiler/optimizing/side_effects_test.cc +++ b/compiler/optimizing/side_effects_test.cc @@ -141,13 +141,13 @@ TEST(SideEffectsTest, NoDependences) { TEST(SideEffectsTest, VolatileDependences) { SideEffects volatile_write = - SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile */ true); + SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile= */ true); SideEffects any_write = - SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile */ false); + SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile= */ false); SideEffects volatile_read = - SideEffects::FieldReadOfType(DataType::Type::kInt8, /* is_volatile */ true); + SideEffects::FieldReadOfType(DataType::Type::kInt8, /* is_volatile= */ true); SideEffects any_read = - SideEffects::FieldReadOfType(DataType::Type::kInt8, /* is_volatile */ false); + SideEffects::FieldReadOfType(DataType::Type::kInt8, /* is_volatile= */ false); EXPECT_FALSE(volatile_write.MayDependOn(any_read)); EXPECT_TRUE(any_read.MayDependOn(volatile_write)); @@ -163,15 +163,15 @@ TEST(SideEffectsTest, VolatileDependences) { TEST(SideEffectsTest, SameWidthTypesNoAlias) { // Type I/F. testNoWriteAndReadDependence( - SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile */ false), - SideEffects::FieldReadOfType(DataType::Type::kFloat32, /* is_volatile */ false)); + SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile= */ false), + SideEffects::FieldReadOfType(DataType::Type::kFloat32, /* is_volatile= */ false)); testNoWriteAndReadDependence( SideEffects::ArrayWriteOfType(DataType::Type::kInt32), SideEffects::ArrayReadOfType(DataType::Type::kFloat32)); // Type L/D. testNoWriteAndReadDependence( - SideEffects::FieldWriteOfType(DataType::Type::kInt64, /* is_volatile */ false), - SideEffects::FieldReadOfType(DataType::Type::kFloat64, /* is_volatile */ false)); + SideEffects::FieldWriteOfType(DataType::Type::kInt64, /* is_volatile= */ false), + SideEffects::FieldReadOfType(DataType::Type::kFloat64, /* is_volatile= */ false)); testNoWriteAndReadDependence( SideEffects::ArrayWriteOfType(DataType::Type::kInt64), SideEffects::ArrayReadOfType(DataType::Type::kFloat64)); @@ -181,9 +181,9 @@ TEST(SideEffectsTest, AllWritesAndReads) { SideEffects s = SideEffects::None(); // Keep taking the union of different writes and reads. for (DataType::Type type : kTestTypes) { - s = s.Union(SideEffects::FieldWriteOfType(type, /* is_volatile */ false)); + s = s.Union(SideEffects::FieldWriteOfType(type, /* is_volatile= */ false)); s = s.Union(SideEffects::ArrayWriteOfType(type)); - s = s.Union(SideEffects::FieldReadOfType(type, /* is_volatile */ false)); + s = s.Union(SideEffects::FieldReadOfType(type, /* is_volatile= */ false)); s = s.Union(SideEffects::ArrayReadOfType(type)); } EXPECT_TRUE(s.DoesAllReadWrite()); @@ -202,6 +202,7 @@ TEST(SideEffectsTest, GC) { EXPECT_TRUE(depends_on_gc.MayDependOn(all_changes)); EXPECT_TRUE(depends_on_gc.Union(can_trigger_gc).MayDependOn(all_changes)); EXPECT_FALSE(can_trigger_gc.MayDependOn(all_changes)); + EXPECT_FALSE(can_trigger_gc.MayDependOn(can_trigger_gc)); EXPECT_TRUE(all_changes.Includes(can_trigger_gc)); EXPECT_FALSE(all_changes.Includes(depends_on_gc)); @@ -253,10 +254,10 @@ TEST(SideEffectsTest, BitStrings) { "||I|||||", SideEffects::ArrayReadOfType(DataType::Type::kInt32).ToString().c_str()); SideEffects s = SideEffects::None(); - s = s.Union(SideEffects::FieldWriteOfType(DataType::Type::kUint16, /* is_volatile */ false)); - s = s.Union(SideEffects::FieldWriteOfType(DataType::Type::kInt64, /* is_volatile */ false)); + s = s.Union(SideEffects::FieldWriteOfType(DataType::Type::kUint16, /* is_volatile= */ false)); + s = s.Union(SideEffects::FieldWriteOfType(DataType::Type::kInt64, /* is_volatile= */ false)); s = s.Union(SideEffects::ArrayWriteOfType(DataType::Type::kInt16)); - s = s.Union(SideEffects::FieldReadOfType(DataType::Type::kInt32, /* is_volatile */ false)); + s = s.Union(SideEffects::FieldReadOfType(DataType::Type::kInt32, /* is_volatile= */ false)); s = s.Union(SideEffects::ArrayReadOfType(DataType::Type::kFloat32)); s = s.Union(SideEffects::ArrayReadOfType(DataType::Type::kFloat64)); EXPECT_STREQ("||DF|I||S|JC|", s.ToString().c_str()); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index dd54468217..0d0e1ecf1f 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -16,6 +16,9 @@ #include "ssa_builder.h" +#include "base/arena_bit_vector.h" +#include "base/bit_vector-inl.h" +#include "base/logging.h" #include "data_type-inl.h" #include "dex/bytecode_utils.h" #include "mirror/class-inl.h" @@ -388,7 +391,7 @@ bool SsaBuilder::FixAmbiguousArrayOps() { // succeed in code validated by the verifier. HInstruction* equivalent = GetFloatOrDoubleEquivalent(value, array_type); DCHECK(equivalent != nullptr); - aset->ReplaceInput(equivalent, /* input_index */ 2); + aset->ReplaceInput(equivalent, /* index= */ 2); if (equivalent->IsPhi()) { // Returned equivalent is a phi which may not have had its inputs // replaced yet. We need to run primitive type propagation on it. @@ -415,29 +418,36 @@ bool SsaBuilder::FixAmbiguousArrayOps() { return true; } -static bool HasAliasInEnvironments(HInstruction* instruction) { - HEnvironment* last_user = nullptr; +bool SsaBuilder::HasAliasInEnvironments(HInstruction* instruction) { + ScopedArenaHashSet<size_t> seen_users( + local_allocator_->Adapter(kArenaAllocGraphBuilder)); for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) { DCHECK(use.GetUser() != nullptr); - // Note: The first comparison (== null) always fails. - if (use.GetUser() == last_user) { + size_t id = use.GetUser()->GetHolder()->GetId(); + if (seen_users.find(id) != seen_users.end()) { return true; } - last_user = use.GetUser(); + seen_users.insert(id); } + return false; +} - if (kIsDebugBuild) { - // Do a quadratic search to ensure same environment uses are next - // to each other. - const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses(); - for (auto current = env_uses.begin(), end = env_uses.end(); current != end; ++current) { - auto next = current; - for (++next; next != end; ++next) { - DCHECK(next->GetUser() != current->GetUser()); - } +bool SsaBuilder::ReplaceUninitializedStringPhis() { + for (HInvoke* invoke : uninitialized_string_phis_) { + HInstruction* str = invoke->InputAt(invoke->InputCount() - 1); + if (str->IsPhi()) { + // If after redundant phi and dead phi elimination, it's still a phi that feeds + // the invoke, then we must be compiling a method with irreducible loops. Just bail. + DCHECK(graph_->HasIrreducibleLoops()); + return false; } + DCHECK(str->IsNewInstance()); + AddUninitializedString(str->AsNewInstance()); + str->ReplaceUsesDominatedBy(invoke, invoke); + str->ReplaceEnvUsesDominatedBy(invoke, invoke); + invoke->RemoveInputAt(invoke->InputCount() - 1); } - return false; + return true; } void SsaBuilder::RemoveRedundantUninitializedStrings() { @@ -452,8 +462,9 @@ void SsaBuilder::RemoveRedundantUninitializedStrings() { DCHECK(new_instance->IsStringAlloc()); // Replace NewInstance of String with NullConstant if not used prior to - // calling StringFactory. In case of deoptimization, the interpreter is - // expected to skip null check on the `this` argument of the StringFactory call. + // calling StringFactory. We check for alias environments in case of deoptimization. + // The interpreter is expected to skip null check on the `this` argument of the + // StringFactory call. if (!new_instance->HasNonEnvironmentUses() && !HasAliasInEnvironments(new_instance)) { new_instance->ReplaceWith(graph_->GetNullConstant()); new_instance->GetBlock()->RemoveInstruction(new_instance); @@ -488,35 +499,35 @@ void SsaBuilder::RemoveRedundantUninitializedStrings() { GraphAnalysisResult SsaBuilder::BuildSsa() { DCHECK(!graph_->IsInSsaForm()); - // 1) Propagate types of phis. At this point, phis are typed void in the general + // Propagate types of phis. At this point, phis are typed void in the general // case, or float/double/reference if we created an equivalent phi. So we need // to propagate the types across phis to give them a correct type. If a type // conflict is detected in this stage, the phi is marked dead. RunPrimitiveTypePropagation(); - // 2) Now that the correct primitive types have been assigned, we can get rid + // Now that the correct primitive types have been assigned, we can get rid // of redundant phis. Note that we cannot do this phase before type propagation, // otherwise we could get rid of phi equivalents, whose presence is a requirement // for the type propagation phase. Note that this is to satisfy statement (a) // of the SsaBuilder (see ssa_builder.h). SsaRedundantPhiElimination(graph_).Run(); - // 3) Fix the type for null constants which are part of an equality comparison. + // Fix the type for null constants which are part of an equality comparison. // We need to do this after redundant phi elimination, to ensure the only cases // that we can see are reference comparison against 0. The redundant phi // elimination ensures we do not see a phi taking two 0 constants in a HEqual // or HNotEqual. FixNullConstantType(); - // 4) Compute type of reference type instructions. The pass assumes that + // Compute type of reference type instructions. The pass assumes that // NullConstant has been fixed up. ReferenceTypePropagation(graph_, class_loader_, dex_cache_, handles_, - /* is_first_run */ true).Run(); + /* is_first_run= */ true).Run(); - // 5) HInstructionBuilder duplicated ArrayGet instructions with ambiguous type + // HInstructionBuilder duplicated ArrayGet instructions with ambiguous type // (int/float or long/double) and marked ArraySets with ambiguous input type. // Now that RTP computed the type of the array input, the ambiguity can be // resolved and the correct equivalents kept. @@ -524,13 +535,13 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { return kAnalysisFailAmbiguousArrayOp; } - // 6) Mark dead phis. This will mark phis which are not used by instructions + // Mark dead phis. This will mark phis which are not used by instructions // or other live phis. If compiling as debuggable code, phis will also be kept // live if they have an environment use. SsaDeadPhiElimination dead_phi_elimimation(graph_); dead_phi_elimimation.MarkDeadPhis(); - // 7) Make sure environments use the right phi equivalent: a phi marked dead + // Make sure environments use the right phi equivalent: a phi marked dead // can have a phi equivalent that is not dead. In that case we have to replace // it with the live equivalent because deoptimization and try/catch rely on // environments containing values of all live vregs at that point. Note that @@ -539,14 +550,22 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { // environments to just reference one. FixEnvironmentPhis(); - // 8) Now that the right phis are used for the environments, we can eliminate + // Now that the right phis are used for the environments, we can eliminate // phis we do not need. Regardless of the debuggable status, this phase is /// necessary for statement (b) of the SsaBuilder (see ssa_builder.h), as well // as for the code generation, which does not deal with phis of conflicting // input types. dead_phi_elimimation.EliminateDeadPhis(); - // 9) HInstructionBuidler replaced uses of NewInstances of String with the + // Replace Phis that feed in a String.<init> during instruction building. We + // run this after redundant and dead phi elimination to make sure the phi will have + // been replaced by the actual allocation. Only with an irreducible loop + // a phi can still be the input, in which case we bail. + if (!ReplaceUninitializedStringPhis()) { + return kAnalysisFailIrreducibleLoopAndStringInit; + } + + // HInstructionBuidler replaced uses of NewInstances of String with the // results of their corresponding StringFactory calls. Unless the String // objects are used before they are initialized, they can be replaced with // NullConstant. Note that this optimization is valid only if unsimplified diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 60831a9e6a..bb892c9304 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -61,7 +61,8 @@ class SsaBuilder : public ValueObject { local_allocator_(local_allocator), ambiguous_agets_(local_allocator->Adapter(kArenaAllocGraphBuilder)), ambiguous_asets_(local_allocator->Adapter(kArenaAllocGraphBuilder)), - uninitialized_strings_(local_allocator->Adapter(kArenaAllocGraphBuilder)) { + uninitialized_strings_(local_allocator->Adapter(kArenaAllocGraphBuilder)), + uninitialized_string_phis_(local_allocator->Adapter(kArenaAllocGraphBuilder)) { graph_->InitializeInexactObjectRTI(handles); } @@ -96,6 +97,10 @@ class SsaBuilder : public ValueObject { } } + void AddUninitializedStringPhi(HInvoke* invoke) { + uninitialized_string_phis_.push_back(invoke); + } + private: void SetLoopHeaderPhiInputs(); void FixEnvironmentPhis(); @@ -118,6 +123,8 @@ class SsaBuilder : public ValueObject { HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget); void RemoveRedundantUninitializedStrings(); + bool ReplaceUninitializedStringPhis(); + bool HasAliasInEnvironments(HInstruction* instruction); HGraph* const graph_; Handle<mirror::ClassLoader> class_loader_; @@ -131,6 +138,7 @@ class SsaBuilder : public ValueObject { ScopedArenaVector<HArrayGet*> ambiguous_agets_; ScopedArenaVector<HArraySet*> ambiguous_asets_; ScopedArenaVector<HNewInstance*> uninitialized_strings_; + ScopedArenaVector<HInvoke*> uninitialized_string_phis_; DISALLOW_COPY_AND_ASSIGN(SsaBuilder); }; diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index f6bd05269e..7b2c3a939c 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -103,9 +103,9 @@ void SsaLivenessAnalysis::ComputeLiveness() { ComputeLiveInAndLiveOutSets(); } -static void RecursivelyProcessInputs(HInstruction* current, - HInstruction* actual_user, - BitVector* live_in) { +void SsaLivenessAnalysis::RecursivelyProcessInputs(HInstruction* current, + HInstruction* actual_user, + BitVector* live_in) { HInputsRef inputs = current->GetInputs(); for (size_t i = 0; i < inputs.size(); ++i) { HInstruction* input = inputs[i]; @@ -120,7 +120,7 @@ static void RecursivelyProcessInputs(HInstruction* current, DCHECK(input->HasSsaIndex()); // `input` generates a result used by `current`. Add use and update // the live-in set. - input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i, actual_user); + input->GetLiveInterval()->AddUse(current, /* environment= */ nullptr, i, actual_user); live_in->SetBit(input->GetSsaIndex()); } else if (has_out_location) { // `input` generates a result but it is not used by `current`. @@ -131,11 +131,40 @@ static void RecursivelyProcessInputs(HInstruction* current, // Check that the inlined input is not a phi. Recursing on loop phis could // lead to an infinite loop. DCHECK(!input->IsPhi()); + DCHECK(!input->HasEnvironment()); RecursivelyProcessInputs(input, actual_user, live_in); } } } +void SsaLivenessAnalysis::ProcessEnvironment(HInstruction* current, + HInstruction* actual_user, + BitVector* live_in) { + for (HEnvironment* environment = current->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { + // Handle environment uses. See statements (b) and (c) of the + // SsaLivenessAnalysis. + for (size_t i = 0, e = environment->Size(); i < e; ++i) { + HInstruction* instruction = environment->GetInstructionAt(i); + if (instruction == nullptr) { + continue; + } + bool should_be_live = ShouldBeLiveForEnvironment(current, instruction); + // If this environment use does not keep the instruction live, it does not + // affect the live range of that instruction. + if (should_be_live) { + CHECK(instruction->HasSsaIndex()) << instruction->DebugName(); + live_in->SetBit(instruction->GetSsaIndex()); + instruction->GetLiveInterval()->AddUse(current, + environment, + i, + actual_user); + } + } + } +} + void SsaLivenessAnalysis::ComputeLiveRanges() { // Do a post order visit, adding inputs of instructions live in the block where // that instruction is defined, and killing instructions that are being visited. @@ -186,27 +215,6 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { current->GetLiveInterval()->SetFrom(current->GetLifetimePosition()); } - // Process the environment first, because we know their uses come after - // or at the same liveness position of inputs. - for (HEnvironment* environment = current->GetEnvironment(); - environment != nullptr; - environment = environment->GetParent()) { - // Handle environment uses. See statements (b) and (c) of the - // SsaLivenessAnalysis. - for (size_t i = 0, e = environment->Size(); i < e; ++i) { - HInstruction* instruction = environment->GetInstructionAt(i); - bool should_be_live = ShouldBeLiveForEnvironment(current, instruction); - if (should_be_live) { - CHECK(instruction->HasSsaIndex()) << instruction->DebugName(); - live_in->SetBit(instruction->GetSsaIndex()); - } - if (instruction != nullptr) { - instruction->GetLiveInterval()->AddUse( - current, environment, i, /* actual_user */ nullptr, should_be_live); - } - } - } - // Process inputs of instructions. if (current->IsEmittedAtUseSite()) { if (kIsDebugBuild) { @@ -219,6 +227,16 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { DCHECK(!current->HasEnvironmentUses()); } } else { + // Process the environment first, because we know their uses come after + // or at the same liveness position of inputs. + ProcessEnvironment(current, current, live_in); + + // Special case implicit null checks. We want their environment uses to be + // emitted at the instruction doing the actual null check. + HNullCheck* check = current->GetImplicitNullCheck(); + if (check != nullptr) { + ProcessEnvironment(check, current, live_in); + } RecursivelyProcessInputs(current, current, live_in); } } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index f83bb52b69..c88390775c 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -60,7 +60,7 @@ class BlockInfo : public ArenaObject<kArenaAllocSsaLiveness> { * A live range contains the start and end of a range where an instruction or a temporary * is live. */ -class LiveRange FINAL : public ArenaObject<kArenaAllocSsaLiveness> { +class LiveRange final : public ArenaObject<kArenaAllocSsaLiveness> { public: LiveRange(size_t start, size_t end, LiveRange* next) : start_(start), end_(end), next_(next) { DCHECK_LT(start, end); @@ -230,12 +230,25 @@ class SafepointPosition : public ArenaObject<kArenaAllocSsaLiveness> { : instruction_(instruction), next_(nullptr) {} + static size_t ComputePosition(HInstruction* instruction) { + // We special case instructions emitted at use site, as their + // safepoint position needs to be at their use. + if (instruction->IsEmittedAtUseSite()) { + // Currently only applies to implicit null checks, which are emitted + // at the next instruction. + DCHECK(instruction->IsNullCheck()) << instruction->DebugName(); + return instruction->GetLifetimePosition() + 2; + } else { + return instruction->GetLifetimePosition(); + } + } + void SetNext(SafepointPosition* next) { next_ = next; } size_t GetPosition() const { - return instruction_->GetLifetimePosition(); + return ComputePosition(instruction_); } SafepointPosition* GetNext() const { @@ -300,8 +313,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { void AddUse(HInstruction* instruction, HEnvironment* environment, size_t input_index, - HInstruction* actual_user = nullptr, - bool keep_alive = false) { + HInstruction* actual_user = nullptr) { bool is_environment = (environment != nullptr); LocationSummary* locations = instruction->GetLocations(); if (actual_user == nullptr) { @@ -359,12 +371,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { uses_.push_front(*new_use); } - if (is_environment && !keep_alive) { - // If this environment use does not keep the instruction live, it does not - // affect the live range of that instruction. - return; - } - size_t start_block_position = instruction->GetBlock()->GetLifetimeStart(); if (first_range_ == nullptr) { // First time we see a use of that interval. @@ -929,7 +935,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { if (first_safepoint_ == nullptr) { first_safepoint_ = last_safepoint_ = safepoint; } else { - DCHECK_LT(last_safepoint_->GetPosition(), safepoint->GetPosition()); + DCHECK_LE(last_safepoint_->GetPosition(), safepoint->GetPosition()); last_safepoint_->SetNext(safepoint); last_safepoint_ = safepoint; } @@ -1149,16 +1155,20 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { * * (a) Non-environment uses of an instruction always make * the instruction live. - * (b) Environment uses of an instruction whose type is - * object (that is, non-primitive), make the instruction live. - * This is due to having to keep alive objects that have - * finalizers deleting native objects. + * (b) Environment uses of an instruction whose type is object (that is, non-primitive), make the + * instruction live, unless the class has an @DeadReferenceSafe annotation. + * This avoids unexpected premature reference enqueuing or finalization, which could + * result in premature deletion of native objects. In the presence of @DeadReferenceSafe, + * object references are treated like primitive types. * (c) When the graph has the debuggable property, environment uses * of an instruction that has a primitive type make the instruction live. * If the graph does not have the debuggable property, the environment * use has no effect, and may get a 'none' value after register allocation. + * (d) When compiling in OSR mode, all loops in the compiled method may be entered + * from the interpreter via SuspendCheck; such use in SuspendCheck makes the instruction + * live. * - * (b) and (c) are implemented through SsaLivenessAnalysis::ShouldBeLiveForEnvironment. + * (b), (c) and (d) are implemented through SsaLivenessAnalysis::ShouldBeLiveForEnvironment. */ class SsaLivenessAnalysis : public ValueObject { public: @@ -1256,17 +1266,29 @@ class SsaLivenessAnalysis : public ValueObject { // Update the live_out set of the block and returns whether it has changed. bool UpdateLiveOut(const HBasicBlock& block); + static void ProcessEnvironment(HInstruction* instruction, + HInstruction* actual_user, + BitVector* live_in); + static void RecursivelyProcessInputs(HInstruction* instruction, + HInstruction* actual_user, + BitVector* live_in); + // Returns whether `instruction` in an HEnvironment held by `env_holder` // should be kept live by the HEnvironment. static bool ShouldBeLiveForEnvironment(HInstruction* env_holder, HInstruction* instruction) { - if (instruction == nullptr) return false; + DCHECK(instruction != nullptr); // A value that's not live in compiled code may still be needed in interpreter, // due to code motion, etc. if (env_holder->IsDeoptimize()) return true; // A value live at a throwing instruction in a try block may be copied by // the exception handler to its location at the top of the catch block. if (env_holder->CanThrowIntoCatchBlock()) return true; - if (instruction->GetBlock()->GetGraph()->IsDebuggable()) return true; + HGraph* graph = instruction->GetBlock()->GetGraph(); + if (graph->IsDebuggable()) return true; + // When compiling in OSR mode, all loops in the compiled method may be entered + // from the interpreter via SuspendCheck; thus we need to preserve the environment. + if (env_holder->IsSuspendCheck() && graph->IsCompilingOsr()) return true; + if (graph -> IsDeadReferenceSafe()) return false; return instruction->GetType() == DataType::Type::kReference; } diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc index b9bfbaa173..352c44f63a 100644 --- a/compiler/optimizing/ssa_liveness_analysis_test.cc +++ b/compiler/optimizing/ssa_liveness_analysis_test.cc @@ -28,18 +28,11 @@ namespace art { class SsaLivenessAnalysisTest : public OptimizingUnitTest { - public: - SsaLivenessAnalysisTest() - : graph_(CreateGraph()), - compiler_options_(), - instruction_set_(kRuntimeISA) { - std::string error_msg; - instruction_set_features_ = - InstructionSetFeatures::FromVariant(instruction_set_, "default", &error_msg); - codegen_ = CodeGenerator::Create(graph_, - instruction_set_, - *instruction_set_features_, - compiler_options_); + protected: + void SetUp() override { + OptimizingUnitTest::SetUp(); + graph_ = CreateGraph(); + codegen_ = CodeGenerator::Create(graph_, *compiler_options_); CHECK(codegen_ != nullptr) << instruction_set_ << " is not a supported target architecture."; // Create entry block. entry_ = new (GetAllocator()) HBasicBlock(graph_); @@ -57,9 +50,6 @@ class SsaLivenessAnalysisTest : public OptimizingUnitTest { } HGraph* graph_; - CompilerOptions compiler_options_; - InstructionSet instruction_set_; - std::unique_ptr<const InstructionSetFeatures> instruction_set_features_; std::unique_ptr<CodeGenerator> codegen_; HBasicBlock* entry_; }; @@ -104,25 +94,25 @@ TEST_F(SsaLivenessAnalysisTest, TestAput) { HInstruction* null_check = new (GetAllocator()) HNullCheck(array, 0); block->AddInstruction(null_check); HEnvironment* null_check_env = new (GetAllocator()) HEnvironment(GetAllocator(), - /* number_of_vregs */ 5, - /* method */ nullptr, - /* dex_pc */ 0u, + /* number_of_vregs= */ 5, + /* method= */ nullptr, + /* dex_pc= */ 0u, null_check); null_check_env->CopyFrom(ArrayRef<HInstruction* const>(args)); null_check->SetRawEnvironment(null_check_env); HInstruction* length = new (GetAllocator()) HArrayLength(array, 0); block->AddInstruction(length); - HInstruction* bounds_check = new (GetAllocator()) HBoundsCheck(index, length, /* dex_pc */ 0u); + HInstruction* bounds_check = new (GetAllocator()) HBoundsCheck(index, length, /* dex_pc= */ 0u); block->AddInstruction(bounds_check); HEnvironment* bounds_check_env = new (GetAllocator()) HEnvironment(GetAllocator(), - /* number_of_vregs */ 5, - /* method */ nullptr, - /* dex_pc */ 0u, + /* number_of_vregs= */ 5, + /* method= */ nullptr, + /* dex_pc= */ 0u, bounds_check); bounds_check_env->CopyFrom(ArrayRef<HInstruction* const>(args)); bounds_check->SetRawEnvironment(bounds_check_env); HInstruction* array_set = - new (GetAllocator()) HArraySet(array, index, value, DataType::Type::kInt32, /* dex_pc */ 0); + new (GetAllocator()) HArraySet(array, index, value, DataType::Type::kInt32, /* dex_pc= */ 0); block->AddInstruction(array_set); graph_->BuildDominatorTree(); @@ -134,12 +124,12 @@ TEST_F(SsaLivenessAnalysisTest, TestAput) { static const char* const expected[] = { "ranges: { [2,21) }, uses: { 15 17 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 " "is_high: 0", - "ranges: { [4,21) }, uses: { 19 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 " + "ranges: { [4,21) }, uses: { 19 21 }, { } is_fixed: 0, is_split: 0 is_low: 0 " "is_high: 0", - "ranges: { [6,21) }, uses: { 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 " + "ranges: { [6,21) }, uses: { 21 }, { } is_fixed: 0, is_split: 0 is_low: 0 " "is_high: 0", // Environment uses do not keep the non-reference argument alive. - "ranges: { [8,10) }, uses: { }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", + "ranges: { [8,10) }, uses: { }, { } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", // Environment uses keep the reference argument alive. "ranges: { [10,19) }, uses: { }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", }; @@ -173,9 +163,9 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) { HInstruction* null_check = new (GetAllocator()) HNullCheck(array, 0); block->AddInstruction(null_check); HEnvironment* null_check_env = new (GetAllocator()) HEnvironment(GetAllocator(), - /* number_of_vregs */ 5, - /* method */ nullptr, - /* dex_pc */ 0u, + /* number_of_vregs= */ 5, + /* method= */ nullptr, + /* dex_pc= */ 0u, null_check); null_check_env->CopyFrom(ArrayRef<HInstruction* const>(args)); null_check->SetRawEnvironment(null_check_env); @@ -185,17 +175,17 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) { HInstruction* ae = new (GetAllocator()) HAboveOrEqual(index, length); block->AddInstruction(ae); HInstruction* deoptimize = new(GetAllocator()) HDeoptimize( - GetAllocator(), ae, DeoptimizationKind::kBlockBCE, /* dex_pc */ 0u); + GetAllocator(), ae, DeoptimizationKind::kBlockBCE, /* dex_pc= */ 0u); block->AddInstruction(deoptimize); HEnvironment* deoptimize_env = new (GetAllocator()) HEnvironment(GetAllocator(), - /* number_of_vregs */ 5, - /* method */ nullptr, - /* dex_pc */ 0u, + /* number_of_vregs= */ 5, + /* method= */ nullptr, + /* dex_pc= */ 0u, deoptimize); deoptimize_env->CopyFrom(ArrayRef<HInstruction* const>(args)); deoptimize->SetRawEnvironment(deoptimize_env); HInstruction* array_set = - new (GetAllocator()) HArraySet(array, index, value, DataType::Type::kInt32, /* dex_pc */ 0); + new (GetAllocator()) HArraySet(array, index, value, DataType::Type::kInt32, /* dex_pc= */ 0); block->AddInstruction(array_set); graph_->BuildDominatorTree(); @@ -207,11 +197,11 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) { static const char* const expected[] = { "ranges: { [2,23) }, uses: { 15 17 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 " "is_high: 0", - "ranges: { [4,23) }, uses: { 19 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 " + "ranges: { [4,23) }, uses: { 19 23 }, { 21 } is_fixed: 0, is_split: 0 is_low: 0 " "is_high: 0", - "ranges: { [6,23) }, uses: { 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", + "ranges: { [6,23) }, uses: { 23 }, { 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", // Environment use in HDeoptimize keeps even the non-reference argument alive. - "ranges: { [8,21) }, uses: { }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", + "ranges: { [8,21) }, uses: { }, { 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", // Environment uses keep the reference argument alive. "ranges: { [10,21) }, uses: { }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", }; diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index cb27ded17a..3fcb72e4fb 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -23,9 +23,10 @@ namespace art { -void SsaDeadPhiElimination::Run() { +bool SsaDeadPhiElimination::Run() { MarkDeadPhis(); EliminateDeadPhis(); + return true; } void SsaDeadPhiElimination::MarkDeadPhis() { @@ -122,7 +123,7 @@ void SsaDeadPhiElimination::EliminateDeadPhis() { } } -void SsaRedundantPhiElimination::Run() { +bool SsaRedundantPhiElimination::Run() { // Use local allocator for allocating memory used by this optimization. ScopedArenaAllocator allocator(graph_->GetArenaStack()); @@ -140,7 +141,7 @@ void SsaRedundantPhiElimination::Run() { ArenaBitVector visited_phis_in_cycle(&allocator, graph_->GetCurrentInstructionId(), - /* expandable */ false, + /* expandable= */ false, kArenaAllocSsaPhiElimination); visited_phis_in_cycle.ClearAllBits(); ScopedArenaVector<HPhi*> cycle_worklist(allocator.Adapter(kArenaAllocSsaPhiElimination)); @@ -255,6 +256,7 @@ void SsaRedundantPhiElimination::Run() { current->GetBlock()->RemovePhi(current); } } + return true; } } // namespace art diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h index 11d5837eb5..c5cc752ffc 100644 --- a/compiler/optimizing/ssa_phi_elimination.h +++ b/compiler/optimizing/ssa_phi_elimination.h @@ -31,7 +31,7 @@ class SsaDeadPhiElimination : public HOptimization { explicit SsaDeadPhiElimination(HGraph* graph) : HOptimization(graph, kSsaDeadPhiEliminationPassName) {} - void Run() OVERRIDE; + bool Run() override; void MarkDeadPhis(); void EliminateDeadPhis(); @@ -53,7 +53,7 @@ class SsaRedundantPhiElimination : public HOptimization { explicit SsaRedundantPhiElimination(HGraph* graph) : HOptimization(graph, kSsaRedundantPhiEliminationPassName) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kSsaRedundantPhiEliminationPassName = "redundant_phi_elimination"; diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index 85ed06eb9b..e679893af2 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -38,15 +38,15 @@ class SsaPrettyPrinter : public HPrettyPrinter { public: explicit SsaPrettyPrinter(HGraph* graph) : HPrettyPrinter(graph), str_("") {} - void PrintInt(int value) OVERRIDE { + void PrintInt(int value) override { str_ += android::base::StringPrintf("%d", value); } - void PrintString(const char* value) OVERRIDE { + void PrintString(const char* value) override { str_ += value; } - void PrintNewLine() OVERRIDE { + void PrintNewLine() override { str_ += '\n'; } @@ -54,7 +54,7 @@ class SsaPrettyPrinter : public HPrettyPrinter { std::string str() const { return str_; } - void VisitIntConstant(HIntConstant* constant) OVERRIDE { + void VisitIntConstant(HIntConstant* constant) override { PrintPreInstruction(constant); str_ += constant->DebugName(); str_ += " "; diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 7010e3f380..60ca61c133 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -16,682 +16,312 @@ #include "stack_map_stream.h" +#include <memory> + #include "art_method-inl.h" #include "base/stl_util.h" #include "dex/dex_file_types.h" #include "optimizing/optimizing_compiler.h" #include "runtime.h" #include "scoped_thread_state_change-inl.h" +#include "stack_map.h" namespace art { -void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, - uint32_t native_pc_offset, - uint32_t register_mask, - BitVector* sp_mask, - uint32_t num_dex_registers, - uint8_t inlining_depth) { - DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry"; - current_entry_.dex_pc = dex_pc; - current_entry_.native_pc_code_offset = CodeOffset::FromOffset(native_pc_offset, instruction_set_); - current_entry_.register_mask = register_mask; - current_entry_.sp_mask = sp_mask; - current_entry_.inlining_depth = inlining_depth; - current_entry_.inline_infos_start_index = inline_infos_.size(); - current_entry_.stack_mask_index = 0; - current_entry_.dex_method_index = dex::kDexNoIndex; - current_entry_.dex_register_entry.num_dex_registers = num_dex_registers; - current_entry_.dex_register_entry.locations_start_index = dex_register_locations_.size(); - current_entry_.dex_register_entry.live_dex_registers_mask = nullptr; - if (num_dex_registers != 0u) { - current_entry_.dex_register_entry.live_dex_registers_mask = - ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream); - current_entry_.dex_register_entry.live_dex_registers_mask->ClearAllBits(); - } - if (sp_mask != nullptr) { - stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet()); - } - if (inlining_depth > 0) { - number_of_stack_maps_with_inline_info_++; - } +constexpr static bool kVerifyStackMaps = kIsDebugBuild; - // Note: dex_pc can be kNoDexPc for native method intrinsics. - if (dex_pc != dex::kDexNoIndex && (dex_pc_max_ == dex::kDexNoIndex || dex_pc_max_ < dex_pc)) { - dex_pc_max_ = dex_pc; - } - register_mask_max_ = std::max(register_mask_max_, register_mask); - current_dex_register_ = 0; +uint32_t StackMapStream::GetStackMapNativePcOffset(size_t i) { + return StackMap::UnpackNativePc(stack_maps_[i][StackMap::kPackedNativePc], instruction_set_); } -void StackMapStream::EndStackMapEntry() { - current_entry_.dex_register_map_index = AddDexRegisterMapEntry(current_entry_.dex_register_entry); - stack_maps_.push_back(current_entry_); - current_entry_ = StackMapEntry(); +void StackMapStream::SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) { + stack_maps_[i][StackMap::kPackedNativePc] = + StackMap::PackNativePc(native_pc_offset, instruction_set_); } -void StackMapStream::AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) { - if (kind != DexRegisterLocation::Kind::kNone) { - // Ensure we only use non-compressed location kind at this stage. - DCHECK(DexRegisterLocation::IsShortLocationKind(kind)) << kind; - DexRegisterLocation location(kind, value); - - // Look for Dex register `location` in the location catalog (using the - // companion hash map of locations to indices). Use its index if it - // is already in the location catalog. If not, insert it (in the - // location catalog and the hash map) and use the newly created index. - auto it = location_catalog_entries_indices_.Find(location); - if (it != location_catalog_entries_indices_.end()) { - // Retrieve the index from the hash map. - dex_register_locations_.push_back(it->second); - } else { - // Create a new entry in the location catalog and the hash map. - size_t index = location_catalog_entries_.size(); - location_catalog_entries_.push_back(location); - dex_register_locations_.push_back(index); - location_catalog_entries_indices_.Insert(std::make_pair(location, index)); - } - DexRegisterMapEntry* const entry = in_inline_frame_ - ? ¤t_inline_info_.dex_register_entry - : ¤t_entry_.dex_register_entry; - DCHECK_LT(current_dex_register_, entry->num_dex_registers); - entry->live_dex_registers_mask->SetBit(current_dex_register_); - entry->hash += (1 << - (current_dex_register_ % (sizeof(DexRegisterMapEntry::hash) * kBitsPerByte))); - entry->hash += static_cast<uint32_t>(value); - entry->hash += static_cast<uint32_t>(kind); - } - current_dex_register_++; +void StackMapStream::BeginMethod(size_t frame_size_in_bytes, + size_t core_spill_mask, + size_t fp_spill_mask, + uint32_t num_dex_registers) { + DCHECK(!in_method_) << "Mismatched Begin/End calls"; + in_method_ = true; + DCHECK_EQ(packed_frame_size_, 0u) << "BeginMethod was already called"; + + DCHECK_ALIGNED(frame_size_in_bytes, kStackAlignment); + packed_frame_size_ = frame_size_in_bytes / kStackAlignment; + core_spill_mask_ = core_spill_mask; + fp_spill_mask_ = fp_spill_mask; + num_dex_registers_ = num_dex_registers; } -void StackMapStream::AddInvoke(InvokeType invoke_type, uint32_t dex_method_index) { - current_entry_.invoke_type = invoke_type; - current_entry_.dex_method_index = dex_method_index; -} +void StackMapStream::EndMethod() { + DCHECK(in_method_) << "Mismatched Begin/End calls"; + in_method_ = false; -void StackMapStream::BeginInlineInfoEntry(ArtMethod* method, - uint32_t dex_pc, - uint32_t num_dex_registers, - const DexFile* outer_dex_file) { - DCHECK(!in_inline_frame_); - in_inline_frame_ = true; - if (EncodeArtMethodInInlineInfo(method)) { - current_inline_info_.method = method; - } else { - if (dex_pc != static_cast<uint32_t>(-1) && kIsDebugBuild) { - ScopedObjectAccess soa(Thread::Current()); - DCHECK(IsSameDexFile(*outer_dex_file, *method->GetDexFile())); + // Read the stack masks now. The compiler might have updated them. + for (size_t i = 0; i < lazy_stack_masks_.size(); i++) { + BitVector* stack_mask = lazy_stack_masks_[i]; + if (stack_mask != nullptr && stack_mask->GetNumberOfBits() != 0) { + stack_maps_[i][StackMap::kStackMaskIndex] = + stack_masks_.Dedup(stack_mask->GetRawStorage(), stack_mask->GetNumberOfBits()); } - current_inline_info_.method_index = method->GetDexMethodIndexUnchecked(); - } - current_inline_info_.dex_pc = dex_pc; - current_inline_info_.dex_register_entry.num_dex_registers = num_dex_registers; - current_inline_info_.dex_register_entry.locations_start_index = dex_register_locations_.size(); - current_inline_info_.dex_register_entry.live_dex_registers_mask = nullptr; - if (num_dex_registers != 0) { - current_inline_info_.dex_register_entry.live_dex_registers_mask = - ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream); - current_inline_info_.dex_register_entry.live_dex_registers_mask->ClearAllBits(); - } - current_dex_register_ = 0; -} - -void StackMapStream::EndInlineInfoEntry() { - current_inline_info_.dex_register_map_index = - AddDexRegisterMapEntry(current_inline_info_.dex_register_entry); - DCHECK(in_inline_frame_); - DCHECK_EQ(current_dex_register_, current_inline_info_.dex_register_entry.num_dex_registers) - << "Inline information contains less registers than expected"; - in_inline_frame_ = false; - inline_infos_.push_back(current_inline_info_); - current_inline_info_ = InlineInfoEntry(); -} - -CodeOffset StackMapStream::ComputeMaxNativePcCodeOffset() const { - CodeOffset max_native_pc_offset; - for (const StackMapEntry& entry : stack_maps_) { - max_native_pc_offset = std::max(max_native_pc_offset, entry.native_pc_code_offset); - } - return max_native_pc_offset; -} - -size_t StackMapStream::PrepareForFillIn() { - CodeInfoEncoding encoding; - encoding.dex_register_map.num_entries = 0; // TODO: Remove this field. - encoding.dex_register_map.num_bytes = ComputeDexRegisterMapsSize(); - encoding.location_catalog.num_entries = location_catalog_entries_.size(); - encoding.location_catalog.num_bytes = ComputeDexRegisterLocationCatalogSize(); - encoding.inline_info.num_entries = inline_infos_.size(); - // Must be done before calling ComputeInlineInfoEncoding since ComputeInlineInfoEncoding requires - // dex_method_index_idx to be filled in. - PrepareMethodIndices(); - ComputeInlineInfoEncoding(&encoding.inline_info.encoding, - encoding.dex_register_map.num_bytes); - CodeOffset max_native_pc_offset = ComputeMaxNativePcCodeOffset(); - // Prepare the CodeInfo variable-sized encoding. - encoding.stack_mask.encoding.num_bits = stack_mask_max_ + 1; // Need room for max element too. - encoding.stack_mask.num_entries = PrepareStackMasks(encoding.stack_mask.encoding.num_bits); - encoding.register_mask.encoding.num_bits = MinimumBitsToStore(register_mask_max_); - encoding.register_mask.num_entries = PrepareRegisterMasks(); - encoding.stack_map.num_entries = stack_maps_.size(); - encoding.stack_map.encoding.SetFromSizes( - // The stack map contains compressed native PC offsets. - max_native_pc_offset.CompressedValue(), - dex_pc_max_, - encoding.dex_register_map.num_bytes, - encoding.inline_info.num_entries, - encoding.register_mask.num_entries, - encoding.stack_mask.num_entries); - ComputeInvokeInfoEncoding(&encoding); - DCHECK_EQ(code_info_encoding_.size(), 0u); - encoding.Compress(&code_info_encoding_); - encoding.ComputeTableOffsets(); - // Compute table offsets so we can get the non header size. - DCHECK_EQ(encoding.HeaderSize(), code_info_encoding_.size()); - needed_size_ = code_info_encoding_.size() + encoding.NonHeaderSize(); - return needed_size_; -} - -size_t StackMapStream::ComputeDexRegisterLocationCatalogSize() const { - size_t size = DexRegisterLocationCatalog::kFixedSize; - for (const DexRegisterLocation& dex_register_location : location_catalog_entries_) { - size += DexRegisterLocationCatalog::EntrySize(dex_register_location); - } - return size; -} - -size_t StackMapStream::DexRegisterMapEntry::ComputeSize(size_t catalog_size) const { - // For num_dex_registers == 0u live_dex_registers_mask may be null. - if (num_dex_registers == 0u) { - return 0u; // No register map will be emitted. } - DCHECK(live_dex_registers_mask != nullptr); - - // Size of the map in bytes. - size_t size = DexRegisterMap::kFixedSize; - // Add the live bit mask for the Dex register liveness. - size += DexRegisterMap::GetLiveBitMaskSize(num_dex_registers); - // Compute the size of the set of live Dex register entries. - size_t number_of_live_dex_registers = live_dex_registers_mask->NumSetBits(); - size_t map_entries_size_in_bits = - DexRegisterMap::SingleEntrySizeInBits(catalog_size) * number_of_live_dex_registers; - size_t map_entries_size_in_bytes = - RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte; - size += map_entries_size_in_bytes; - return size; } -size_t StackMapStream::ComputeDexRegisterMapsSize() const { - size_t size = 0; - for (const DexRegisterMapEntry& entry : dex_register_entries_) { - size += entry.ComputeSize(location_catalog_entries_.size()); - } - return size; -} - -void StackMapStream::ComputeInvokeInfoEncoding(CodeInfoEncoding* encoding) { - DCHECK(encoding != nullptr); - uint32_t native_pc_max = 0; - uint16_t method_index_max = 0; - size_t invoke_infos_count = 0; - size_t invoke_type_max = 0; - for (const StackMapEntry& entry : stack_maps_) { - if (entry.dex_method_index != dex::kDexNoIndex) { - native_pc_max = std::max(native_pc_max, entry.native_pc_code_offset.CompressedValue()); - method_index_max = std::max(method_index_max, static_cast<uint16_t>(entry.dex_method_index)); - invoke_type_max = std::max(invoke_type_max, static_cast<size_t>(entry.invoke_type)); - ++invoke_infos_count; +void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, + uint32_t native_pc_offset, + uint32_t register_mask, + BitVector* stack_mask, + StackMap::Kind kind) { + DCHECK(in_method_) << "Call BeginMethod first"; + DCHECK(!in_stack_map_) << "Mismatched Begin/End calls"; + in_stack_map_ = true; + + current_stack_map_ = BitTableBuilder<StackMap>::Entry(); + current_stack_map_[StackMap::kKind] = static_cast<uint32_t>(kind); + current_stack_map_[StackMap::kPackedNativePc] = + StackMap::PackNativePc(native_pc_offset, instruction_set_); + current_stack_map_[StackMap::kDexPc] = dex_pc; + if (stack_maps_.size() > 0) { + // Check that non-catch stack maps are sorted by pc. + // Catch stack maps are at the end and may be unordered. + if (stack_maps_.back()[StackMap::kKind] == StackMap::Kind::Catch) { + DCHECK(current_stack_map_[StackMap::kKind] == StackMap::Kind::Catch); + } else if (current_stack_map_[StackMap::kKind] != StackMap::Kind::Catch) { + DCHECK_LE(stack_maps_.back()[StackMap::kPackedNativePc], + current_stack_map_[StackMap::kPackedNativePc]); } } - encoding->invoke_info.num_entries = invoke_infos_count; - encoding->invoke_info.encoding.SetFromSizes(native_pc_max, invoke_type_max, method_index_max); -} - -void StackMapStream::ComputeInlineInfoEncoding(InlineInfoEncoding* encoding, - size_t dex_register_maps_bytes) { - uint32_t method_index_max = 0; - uint32_t dex_pc_max = dex::kDexNoIndex; - uint32_t extra_data_max = 0; - - uint32_t inline_info_index = 0; - for (const StackMapEntry& entry : stack_maps_) { - for (size_t j = 0; j < entry.inlining_depth; ++j) { - InlineInfoEntry inline_entry = inline_infos_[inline_info_index++]; - if (inline_entry.method == nullptr) { - method_index_max = std::max(method_index_max, inline_entry.dex_method_index_idx); - extra_data_max = std::max(extra_data_max, 1u); - } else { - method_index_max = std::max( - method_index_max, High32Bits(reinterpret_cast<uintptr_t>(inline_entry.method))); - extra_data_max = std::max( - extra_data_max, Low32Bits(reinterpret_cast<uintptr_t>(inline_entry.method))); + if (register_mask != 0) { + uint32_t shift = LeastSignificantBit(register_mask); + BitTableBuilder<RegisterMask>::Entry entry; + entry[RegisterMask::kValue] = register_mask >> shift; + entry[RegisterMask::kShift] = shift; + current_stack_map_[StackMap::kRegisterMaskIndex] = register_masks_.Dedup(&entry); + } + // The compiler assumes the bit vector will be read during PrepareForFillIn(), + // and it might modify the data before that. Therefore, just store the pointer. + // See ClearSpillSlotsFromLoopPhisInStackMap in code_generator.h. + lazy_stack_masks_.push_back(stack_mask); + current_inline_infos_.clear(); + current_dex_registers_.clear(); + expected_num_dex_registers_ = num_dex_registers_; + + if (kVerifyStackMaps) { + size_t stack_map_index = stack_maps_.size(); + // Create lambda method, which will be executed at the very end to verify data. + // Parameters and local variables will be captured(stored) by the lambda "[=]". + dchecks_.emplace_back([=](const CodeInfo& code_info) { + if (kind == StackMap::Kind::Default || kind == StackMap::Kind::OSR) { + StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, + instruction_set_); + CHECK_EQ(stack_map.Row(), stack_map_index); + } else if (kind == StackMap::Kind::Catch) { + StackMap stack_map = code_info.GetCatchStackMapForDexPc(dex_pc); + CHECK_EQ(stack_map.Row(), stack_map_index); } - if (inline_entry.dex_pc != dex::kDexNoIndex && - (dex_pc_max == dex::kDexNoIndex || dex_pc_max < inline_entry.dex_pc)) { - dex_pc_max = inline_entry.dex_pc; + StackMap stack_map = code_info.GetStackMapAt(stack_map_index); + CHECK_EQ(stack_map.GetNativePcOffset(instruction_set_), native_pc_offset); + CHECK_EQ(stack_map.GetKind(), static_cast<uint32_t>(kind)); + CHECK_EQ(stack_map.GetDexPc(), dex_pc); + CHECK_EQ(code_info.GetRegisterMaskOf(stack_map), register_mask); + BitMemoryRegion seen_stack_mask = code_info.GetStackMaskOf(stack_map); + CHECK_GE(seen_stack_mask.size_in_bits(), stack_mask ? stack_mask->GetNumberOfBits() : 0); + for (size_t b = 0; b < seen_stack_mask.size_in_bits(); b++) { + CHECK_EQ(seen_stack_mask.LoadBit(b), stack_mask != nullptr && stack_mask->IsBitSet(b)); } - } + }); } - DCHECK_EQ(inline_info_index, inline_infos_.size()); - - encoding->SetFromSizes(method_index_max, dex_pc_max, extra_data_max, dex_register_maps_bytes); } -size_t StackMapStream::MaybeCopyDexRegisterMap(DexRegisterMapEntry& entry, - size_t* current_offset, - MemoryRegion dex_register_locations_region) { - DCHECK(current_offset != nullptr); - if ((entry.num_dex_registers == 0) || (entry.live_dex_registers_mask->NumSetBits() == 0)) { - // No dex register map needed. - return StackMap::kNoDexRegisterMap; - } - if (entry.offset == DexRegisterMapEntry::kOffsetUnassigned) { - // Not already copied, need to copy and and assign an offset. - entry.offset = *current_offset; - const size_t entry_size = entry.ComputeSize(location_catalog_entries_.size()); - DexRegisterMap dex_register_map( - dex_register_locations_region.Subregion(entry.offset, entry_size)); - *current_offset += entry_size; - // Fill in the map since it was just added. - FillInDexRegisterMap(dex_register_map, - entry.num_dex_registers, - *entry.live_dex_registers_mask, - entry.locations_start_index); - } - return entry.offset; -} - -void StackMapStream::FillInMethodInfo(MemoryRegion region) { - { - MethodInfo info(region.begin(), method_indices_.size()); - for (size_t i = 0; i < method_indices_.size(); ++i) { - info.SetMethodIndex(i, method_indices_[i]); - } - } - if (kIsDebugBuild) { - // Check the data matches. - MethodInfo info(region.begin()); - const size_t count = info.NumMethodIndices(); - DCHECK_EQ(count, method_indices_.size()); - for (size_t i = 0; i < count; ++i) { - DCHECK_EQ(info.GetMethodIndex(i), method_indices_[i]); - } - } -} - -void StackMapStream::FillInCodeInfo(MemoryRegion region) { - DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry"; - DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before FillIn"; - - DCHECK_EQ(region.size(), needed_size_); - - // Note that the memory region does not have to be zeroed when we JIT code - // because we do not use the arena allocator there. - - // Write the CodeInfo header. - region.CopyFrom(0, MemoryRegion(code_info_encoding_.data(), code_info_encoding_.size())); - - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - DCHECK_EQ(encoding.stack_map.num_entries, stack_maps_.size()); - - MemoryRegion dex_register_locations_region = region.Subregion( - encoding.dex_register_map.byte_offset, - encoding.dex_register_map.num_bytes); - - // Set the Dex register location catalog. - MemoryRegion dex_register_location_catalog_region = region.Subregion( - encoding.location_catalog.byte_offset, - encoding.location_catalog.num_bytes); - DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region); - // Offset in `dex_register_location_catalog` where to store the next - // register location. - size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize; - for (DexRegisterLocation dex_register_location : location_catalog_entries_) { - dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location); - location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location); - } - // Ensure we reached the end of the Dex registers location_catalog. - DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size()); - - ArenaBitVector empty_bitmask(allocator_, 0, /* expandable */ false, kArenaAllocStackMapStream); - uintptr_t next_dex_register_map_offset = 0; - uintptr_t next_inline_info_index = 0; - size_t invoke_info_idx = 0; - for (size_t i = 0, e = stack_maps_.size(); i < e; ++i) { - StackMap stack_map = code_info.GetStackMapAt(i, encoding); - StackMapEntry entry = stack_maps_[i]; - - stack_map.SetDexPc(encoding.stack_map.encoding, entry.dex_pc); - stack_map.SetNativePcCodeOffset(encoding.stack_map.encoding, entry.native_pc_code_offset); - stack_map.SetRegisterMaskIndex(encoding.stack_map.encoding, entry.register_mask_index); - stack_map.SetStackMaskIndex(encoding.stack_map.encoding, entry.stack_mask_index); - - size_t offset = MaybeCopyDexRegisterMap(dex_register_entries_[entry.dex_register_map_index], - &next_dex_register_map_offset, - dex_register_locations_region); - stack_map.SetDexRegisterMapOffset(encoding.stack_map.encoding, offset); - - if (entry.dex_method_index != dex::kDexNoIndex) { - InvokeInfo invoke_info(code_info.GetInvokeInfo(encoding, invoke_info_idx)); - invoke_info.SetNativePcCodeOffset(encoding.invoke_info.encoding, entry.native_pc_code_offset); - invoke_info.SetInvokeType(encoding.invoke_info.encoding, entry.invoke_type); - invoke_info.SetMethodIndexIdx(encoding.invoke_info.encoding, entry.dex_method_index_idx); - ++invoke_info_idx; - } +void StackMapStream::EndStackMapEntry() { + DCHECK(in_stack_map_) << "Mismatched Begin/End calls"; + in_stack_map_ = false; - // Set the inlining info. - if (entry.inlining_depth != 0) { - InlineInfo inline_info = code_info.GetInlineInfo(next_inline_info_index, encoding); - - // Fill in the index. - stack_map.SetInlineInfoIndex(encoding.stack_map.encoding, next_inline_info_index); - DCHECK_EQ(next_inline_info_index, entry.inline_infos_start_index); - next_inline_info_index += entry.inlining_depth; - - inline_info.SetDepth(encoding.inline_info.encoding, entry.inlining_depth); - DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size()); - - for (size_t depth = 0; depth < entry.inlining_depth; ++depth) { - InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index]; - if (inline_entry.method != nullptr) { - inline_info.SetMethodIndexIdxAtDepth( - encoding.inline_info.encoding, - depth, - High32Bits(reinterpret_cast<uintptr_t>(inline_entry.method))); - inline_info.SetExtraDataAtDepth( - encoding.inline_info.encoding, - depth, - Low32Bits(reinterpret_cast<uintptr_t>(inline_entry.method))); - } else { - inline_info.SetMethodIndexIdxAtDepth(encoding.inline_info.encoding, - depth, - inline_entry.dex_method_index_idx); - inline_info.SetExtraDataAtDepth(encoding.inline_info.encoding, depth, 1); - } - inline_info.SetDexPcAtDepth(encoding.inline_info.encoding, depth, inline_entry.dex_pc); - size_t dex_register_map_offset = MaybeCopyDexRegisterMap( - dex_register_entries_[inline_entry.dex_register_map_index], - &next_dex_register_map_offset, - dex_register_locations_region); - inline_info.SetDexRegisterMapOffsetAtDepth(encoding.inline_info.encoding, - depth, - dex_register_map_offset); - } - } else if (encoding.stack_map.encoding.GetInlineInfoEncoding().BitSize() > 0) { - stack_map.SetInlineInfoIndex(encoding.stack_map.encoding, StackMap::kNoInlineInfo); - } + // Generate index into the InlineInfo table. + size_t inlining_depth = current_inline_infos_.size(); + if (!current_inline_infos_.empty()) { + current_inline_infos_.back()[InlineInfo::kIsLast] = InlineInfo::kLast; + current_stack_map_[StackMap::kInlineInfoIndex] = + inline_infos_.Dedup(current_inline_infos_.data(), current_inline_infos_.size()); } - // Write stack masks table. - const size_t stack_mask_bits = encoding.stack_mask.encoding.BitSize(); - if (stack_mask_bits > 0) { - size_t stack_mask_bytes = RoundUp(stack_mask_bits, kBitsPerByte) / kBitsPerByte; - for (size_t i = 0; i < encoding.stack_mask.num_entries; ++i) { - MemoryRegion source(&stack_masks_[i * stack_mask_bytes], stack_mask_bytes); - BitMemoryRegion stack_mask = code_info.GetStackMask(i, encoding); - for (size_t bit_index = 0; bit_index < stack_mask_bits; ++bit_index) { - stack_mask.StoreBit(bit_index, source.LoadBit(bit_index)); - } - } + // Generate delta-compressed dex register map. + size_t num_dex_registers = current_dex_registers_.size(); + if (!current_dex_registers_.empty()) { + DCHECK_EQ(expected_num_dex_registers_, current_dex_registers_.size()); + CreateDexRegisterMap(); } - // Write register masks table. - for (size_t i = 0; i < encoding.register_mask.num_entries; ++i) { - BitMemoryRegion register_mask = code_info.GetRegisterMask(i, encoding); - register_mask.StoreBits(0, register_masks_[i], encoding.register_mask.encoding.BitSize()); - } + stack_maps_.Add(current_stack_map_); - // Verify all written data in debug build. - if (kIsDebugBuild) { - CheckCodeInfo(region); + if (kVerifyStackMaps) { + size_t stack_map_index = stack_maps_.size() - 1; + dchecks_.emplace_back([=](const CodeInfo& code_info) { + StackMap stack_map = code_info.GetStackMapAt(stack_map_index); + CHECK_EQ(stack_map.HasDexRegisterMap(), (num_dex_registers != 0)); + CHECK_EQ(stack_map.HasInlineInfo(), (inlining_depth != 0)); + CHECK_EQ(code_info.GetInlineInfosOf(stack_map).size(), inlining_depth); + }); } } -void StackMapStream::FillInDexRegisterMap(DexRegisterMap dex_register_map, +void StackMapStream::BeginInlineInfoEntry(ArtMethod* method, + uint32_t dex_pc, uint32_t num_dex_registers, - const BitVector& live_dex_registers_mask, - uint32_t start_index_in_dex_register_locations) const { - dex_register_map.SetLiveBitMask(num_dex_registers, live_dex_registers_mask); - // Set the dex register location mapping data. - size_t number_of_live_dex_registers = live_dex_registers_mask.NumSetBits(); - DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size()); - DCHECK_LE(start_index_in_dex_register_locations, - dex_register_locations_.size() - number_of_live_dex_registers); - for (size_t index_in_dex_register_locations = 0; - index_in_dex_register_locations != number_of_live_dex_registers; - ++index_in_dex_register_locations) { - size_t location_catalog_entry_index = dex_register_locations_[ - start_index_in_dex_register_locations + index_in_dex_register_locations]; - dex_register_map.SetLocationCatalogEntryIndex( - index_in_dex_register_locations, - location_catalog_entry_index, - num_dex_registers, - location_catalog_entries_.size()); - } -} + const DexFile* outer_dex_file) { + DCHECK(in_stack_map_) << "Call BeginStackMapEntry first"; + DCHECK(!in_inline_info_) << "Mismatched Begin/End calls"; + in_inline_info_ = true; + DCHECK_EQ(expected_num_dex_registers_, current_dex_registers_.size()); + + expected_num_dex_registers_ += num_dex_registers; -size_t StackMapStream::AddDexRegisterMapEntry(const DexRegisterMapEntry& entry) { - const size_t current_entry_index = dex_register_entries_.size(); - auto entries_it = dex_map_hash_to_stack_map_indices_.find(entry.hash); - if (entries_it == dex_map_hash_to_stack_map_indices_.end()) { - // We don't have a perfect hash functions so we need a list to collect all stack maps - // which might have the same dex register map. - ScopedArenaVector<uint32_t> stack_map_indices(allocator_->Adapter(kArenaAllocStackMapStream)); - stack_map_indices.push_back(current_entry_index); - dex_map_hash_to_stack_map_indices_.Put(entry.hash, std::move(stack_map_indices)); + BitTableBuilder<InlineInfo>::Entry entry; + entry[InlineInfo::kIsLast] = InlineInfo::kMore; + entry[InlineInfo::kDexPc] = dex_pc; + entry[InlineInfo::kNumberOfDexRegisters] = static_cast<uint32_t>(expected_num_dex_registers_); + if (EncodeArtMethodInInlineInfo(method)) { + entry[InlineInfo::kArtMethodHi] = High32Bits(reinterpret_cast<uintptr_t>(method)); + entry[InlineInfo::kArtMethodLo] = Low32Bits(reinterpret_cast<uintptr_t>(method)); } else { - // We might have collisions, so we need to check whether or not we really have a match. - for (uint32_t test_entry_index : entries_it->second) { - if (DexRegisterMapEntryEquals(dex_register_entries_[test_entry_index], entry)) { - return test_entry_index; - } + if (dex_pc != static_cast<uint32_t>(-1) && kIsDebugBuild) { + ScopedObjectAccess soa(Thread::Current()); + DCHECK(IsSameDexFile(*outer_dex_file, *method->GetDexFile())); } - entries_it->second.push_back(current_entry_index); + uint32_t dex_method_index = method->GetDexMethodIndex(); + entry[InlineInfo::kMethodInfoIndex] = method_infos_.Dedup({dex_method_index}); + } + current_inline_infos_.push_back(entry); + + if (kVerifyStackMaps) { + size_t stack_map_index = stack_maps_.size(); + size_t depth = current_inline_infos_.size() - 1; + dchecks_.emplace_back([=](const CodeInfo& code_info) { + StackMap stack_map = code_info.GetStackMapAt(stack_map_index); + InlineInfo inline_info = code_info.GetInlineInfosOf(stack_map)[depth]; + CHECK_EQ(inline_info.GetDexPc(), dex_pc); + bool encode_art_method = EncodeArtMethodInInlineInfo(method); + CHECK_EQ(inline_info.EncodesArtMethod(), encode_art_method); + if (encode_art_method) { + CHECK_EQ(inline_info.GetArtMethod(), method); + } else { + CHECK_EQ(code_info.GetMethodIndexOf(inline_info), method->GetDexMethodIndex()); + } + }); } - dex_register_entries_.push_back(entry); - return current_entry_index; } -bool StackMapStream::DexRegisterMapEntryEquals(const DexRegisterMapEntry& a, - const DexRegisterMapEntry& b) const { - if ((a.live_dex_registers_mask == nullptr) != (b.live_dex_registers_mask == nullptr)) { - return false; - } - if (a.num_dex_registers != b.num_dex_registers) { - return false; - } - if (a.num_dex_registers != 0u) { - DCHECK(a.live_dex_registers_mask != nullptr); - DCHECK(b.live_dex_registers_mask != nullptr); - if (!a.live_dex_registers_mask->Equal(b.live_dex_registers_mask)) { - return false; - } - size_t number_of_live_dex_registers = a.live_dex_registers_mask->NumSetBits(); - DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size()); - DCHECK_LE(a.locations_start_index, - dex_register_locations_.size() - number_of_live_dex_registers); - DCHECK_LE(b.locations_start_index, - dex_register_locations_.size() - number_of_live_dex_registers); - auto a_begin = dex_register_locations_.begin() + a.locations_start_index; - auto b_begin = dex_register_locations_.begin() + b.locations_start_index; - if (!std::equal(a_begin, a_begin + number_of_live_dex_registers, b_begin)) { - return false; - } - } - return true; +void StackMapStream::EndInlineInfoEntry() { + DCHECK(in_inline_info_) << "Mismatched Begin/End calls"; + in_inline_info_ = false; + DCHECK_EQ(expected_num_dex_registers_, current_dex_registers_.size()); } -// Helper for CheckCodeInfo - check that register map has the expected content. -void StackMapStream::CheckDexRegisterMap(const CodeInfo& code_info, - const DexRegisterMap& dex_register_map, - size_t num_dex_registers, - BitVector* live_dex_registers_mask, - size_t dex_register_locations_index) const { - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - for (size_t reg = 0; reg < num_dex_registers; reg++) { - // Find the location we tried to encode. - DexRegisterLocation expected = DexRegisterLocation::None(); - if (live_dex_registers_mask->IsBitSet(reg)) { - size_t catalog_index = dex_register_locations_[dex_register_locations_index++]; - expected = location_catalog_entries_[catalog_index]; +// Create delta-compressed dex register map based on the current list of DexRegisterLocations. +// All dex registers for a stack map are concatenated - inlined registers are just appended. +void StackMapStream::CreateDexRegisterMap() { + // These are fields rather than local variables so that we can reuse the reserved memory. + temp_dex_register_mask_.ClearAllBits(); + temp_dex_register_map_.clear(); + + // Ensure that the arrays that hold previous state are big enough to be safely indexed below. + if (previous_dex_registers_.size() < current_dex_registers_.size()) { + previous_dex_registers_.resize(current_dex_registers_.size(), DexRegisterLocation::None()); + dex_register_timestamp_.resize(current_dex_registers_.size(), 0u); + } + + // Set bit in the mask for each register that has been changed since the previous stack map. + // Modified registers are stored in the catalogue and the catalogue index added to the list. + for (size_t i = 0; i < current_dex_registers_.size(); i++) { + DexRegisterLocation reg = current_dex_registers_[i]; + // Distance is difference between this index and the index of last modification. + uint32_t distance = stack_maps_.size() - dex_register_timestamp_[i]; + if (previous_dex_registers_[i] != reg || distance > kMaxDexRegisterMapSearchDistance) { + BitTableBuilder<DexRegisterInfo>::Entry entry; + entry[DexRegisterInfo::kKind] = static_cast<uint32_t>(reg.GetKind()); + entry[DexRegisterInfo::kPackedValue] = + DexRegisterInfo::PackValue(reg.GetKind(), reg.GetValue()); + uint32_t index = reg.IsLive() ? dex_register_catalog_.Dedup(&entry) : kNoValue; + temp_dex_register_mask_.SetBit(i); + temp_dex_register_map_.push_back({index}); + previous_dex_registers_[i] = reg; + dex_register_timestamp_[i] = stack_maps_.size(); } - // Compare to the seen location. - if (expected.GetKind() == DexRegisterLocation::Kind::kNone) { - DCHECK(!dex_register_map.IsValid() || !dex_register_map.IsDexRegisterLive(reg)) - << dex_register_map.IsValid() << " " << dex_register_map.IsDexRegisterLive(reg); - } else { - DCHECK(dex_register_map.IsDexRegisterLive(reg)); - DexRegisterLocation seen = dex_register_map.GetDexRegisterLocation( - reg, num_dex_registers, code_info, encoding); - DCHECK_EQ(expected.GetKind(), seen.GetKind()); - DCHECK_EQ(expected.GetValue(), seen.GetValue()); - } - } - if (num_dex_registers == 0) { - DCHECK(!dex_register_map.IsValid()); } -} -size_t StackMapStream::PrepareRegisterMasks() { - register_masks_.resize(stack_maps_.size(), 0u); - ScopedArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream)); - for (StackMapEntry& stack_map : stack_maps_) { - const size_t index = dedupe.size(); - stack_map.register_mask_index = dedupe.emplace(stack_map.register_mask, index).first->second; - register_masks_[index] = stack_map.register_mask; - } - return dedupe.size(); -} - -void StackMapStream::PrepareMethodIndices() { - CHECK(method_indices_.empty()); - method_indices_.resize(stack_maps_.size() + inline_infos_.size()); - ScopedArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream)); - for (StackMapEntry& stack_map : stack_maps_) { - const size_t index = dedupe.size(); - const uint32_t method_index = stack_map.dex_method_index; - if (method_index != dex::kDexNoIndex) { - stack_map.dex_method_index_idx = dedupe.emplace(method_index, index).first->second; - method_indices_[index] = method_index; - } - } - for (InlineInfoEntry& inline_info : inline_infos_) { - const size_t index = dedupe.size(); - const uint32_t method_index = inline_info.method_index; - CHECK_NE(method_index, dex::kDexNoIndex); - inline_info.dex_method_index_idx = dedupe.emplace(method_index, index).first->second; - method_indices_[index] = method_index; + // Set the mask and map for the current StackMap (which includes inlined registers). + if (temp_dex_register_mask_.GetNumberOfBits() != 0) { + current_stack_map_[StackMap::kDexRegisterMaskIndex] = + dex_register_masks_.Dedup(temp_dex_register_mask_.GetRawStorage(), + temp_dex_register_mask_.GetNumberOfBits()); + } + if (!current_dex_registers_.empty()) { + current_stack_map_[StackMap::kDexRegisterMapIndex] = + dex_register_maps_.Dedup(temp_dex_register_map_.data(), + temp_dex_register_map_.size()); + } + + if (kVerifyStackMaps) { + size_t stack_map_index = stack_maps_.size(); + // We need to make copy of the current registers for later (when the check is run). + auto expected_dex_registers = std::make_shared<dchecked_vector<DexRegisterLocation>>( + current_dex_registers_.begin(), current_dex_registers_.end()); + dchecks_.emplace_back([=](const CodeInfo& code_info) { + StackMap stack_map = code_info.GetStackMapAt(stack_map_index); + uint32_t expected_reg = 0; + for (DexRegisterLocation reg : code_info.GetDexRegisterMapOf(stack_map)) { + CHECK_EQ((*expected_dex_registers)[expected_reg++], reg); + } + for (InlineInfo inline_info : code_info.GetInlineInfosOf(stack_map)) { + DexRegisterMap map = code_info.GetInlineDexRegisterMapOf(stack_map, inline_info); + for (DexRegisterLocation reg : map) { + CHECK_EQ((*expected_dex_registers)[expected_reg++], reg); + } + } + CHECK_EQ(expected_reg, expected_dex_registers->size()); + }); } - method_indices_.resize(dedupe.size()); } - -size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) { - // Preallocate memory since we do not want it to move (the dedup map will point into it). - const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte; - stack_masks_.resize(byte_entry_size * stack_maps_.size(), 0u); - // For deduplicating we store the stack masks as byte packed for simplicity. We can bit pack later - // when copying out from stack_masks_. - ScopedArenaUnorderedMap<MemoryRegion, - size_t, - FNVHash<MemoryRegion>, - MemoryRegion::ContentEquals> dedup( - stack_maps_.size(), allocator_->Adapter(kArenaAllocStackMapStream)); - for (StackMapEntry& stack_map : stack_maps_) { - size_t index = dedup.size(); - MemoryRegion stack_mask(stack_masks_.data() + index * byte_entry_size, byte_entry_size); - for (size_t i = 0; i < entry_size_in_bits; i++) { - stack_mask.StoreBit(i, stack_map.sp_mask != nullptr && stack_map.sp_mask->IsBitSet(i)); - } - stack_map.stack_mask_index = dedup.emplace(stack_mask, index).first->second; - } - return dedup.size(); +template<typename Writer, typename Builder> +ALWAYS_INLINE static void EncodeTable(Writer& out, const Builder& bit_table) { + out.WriteBit(false); // Is not deduped. + bit_table.Encode(out); } -// Check that all StackMapStream inputs are correctly encoded by trying to read them back. -void StackMapStream::CheckCodeInfo(MemoryRegion region) const { - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - DCHECK_EQ(code_info.GetNumberOfStackMaps(encoding), stack_maps_.size()); - size_t invoke_info_index = 0; - for (size_t s = 0; s < stack_maps_.size(); ++s) { - const StackMap stack_map = code_info.GetStackMapAt(s, encoding); - const StackMapEncoding& stack_map_encoding = encoding.stack_map.encoding; - StackMapEntry entry = stack_maps_[s]; - - // Check main stack map fields. - DCHECK_EQ(stack_map.GetNativePcOffset(stack_map_encoding, instruction_set_), - entry.native_pc_code_offset.Uint32Value(instruction_set_)); - DCHECK_EQ(stack_map.GetDexPc(stack_map_encoding), entry.dex_pc); - DCHECK_EQ(stack_map.GetRegisterMaskIndex(stack_map_encoding), entry.register_mask_index); - DCHECK_EQ(code_info.GetRegisterMaskOf(encoding, stack_map), entry.register_mask); - const size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits(encoding); - DCHECK_EQ(stack_map.GetStackMaskIndex(stack_map_encoding), entry.stack_mask_index); - BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map); - if (entry.sp_mask != nullptr) { - DCHECK_GE(stack_mask.size_in_bits(), entry.sp_mask->GetNumberOfBits()); - for (size_t b = 0; b < num_stack_mask_bits; b++) { - DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b)); - } - } else { - for (size_t b = 0; b < num_stack_mask_bits; b++) { - DCHECK_EQ(stack_mask.LoadBit(b), 0u); - } - } - if (entry.dex_method_index != dex::kDexNoIndex) { - InvokeInfo invoke_info = code_info.GetInvokeInfo(encoding, invoke_info_index); - DCHECK_EQ(invoke_info.GetNativePcOffset(encoding.invoke_info.encoding, instruction_set_), - entry.native_pc_code_offset.Uint32Value(instruction_set_)); - DCHECK_EQ(invoke_info.GetInvokeType(encoding.invoke_info.encoding), entry.invoke_type); - DCHECK_EQ(invoke_info.GetMethodIndexIdx(encoding.invoke_info.encoding), - entry.dex_method_index_idx); - invoke_info_index++; - } - CheckDexRegisterMap(code_info, - code_info.GetDexRegisterMapOf( - stack_map, encoding, entry.dex_register_entry.num_dex_registers), - entry.dex_register_entry.num_dex_registers, - entry.dex_register_entry.live_dex_registers_mask, - entry.dex_register_entry.locations_start_index); - - // Check inline info. - DCHECK_EQ(stack_map.HasInlineInfo(stack_map_encoding), (entry.inlining_depth != 0)); - if (entry.inlining_depth != 0) { - InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding); - DCHECK_EQ(inline_info.GetDepth(encoding.inline_info.encoding), entry.inlining_depth); - for (size_t d = 0; d < entry.inlining_depth; ++d) { - size_t inline_info_index = entry.inline_infos_start_index + d; - DCHECK_LT(inline_info_index, inline_infos_.size()); - InlineInfoEntry inline_entry = inline_infos_[inline_info_index]; - DCHECK_EQ(inline_info.GetDexPcAtDepth(encoding.inline_info.encoding, d), - inline_entry.dex_pc); - if (inline_info.EncodesArtMethodAtDepth(encoding.inline_info.encoding, d)) { - DCHECK_EQ(inline_info.GetArtMethodAtDepth(encoding.inline_info.encoding, d), - inline_entry.method); - } else { - const size_t method_index_idx = - inline_info.GetMethodIndexIdxAtDepth(encoding.inline_info.encoding, d); - DCHECK_EQ(method_index_idx, inline_entry.dex_method_index_idx); - DCHECK_EQ(method_indices_[method_index_idx], inline_entry.method_index); - } - - CheckDexRegisterMap(code_info, - code_info.GetDexRegisterMapAtDepth( - d, - inline_info, - encoding, - inline_entry.dex_register_entry.num_dex_registers), - inline_entry.dex_register_entry.num_dex_registers, - inline_entry.dex_register_entry.live_dex_registers_mask, - inline_entry.dex_register_entry.locations_start_index); - } +ScopedArenaVector<uint8_t> StackMapStream::Encode() { + DCHECK(in_stack_map_ == false) << "Mismatched Begin/End calls"; + DCHECK(in_inline_info_ == false) << "Mismatched Begin/End calls"; + + ScopedArenaVector<uint8_t> buffer(allocator_->Adapter(kArenaAllocStackMapStream)); + BitMemoryWriter<ScopedArenaVector<uint8_t>> out(&buffer); + out.WriteVarint(packed_frame_size_); + out.WriteVarint(core_spill_mask_); + out.WriteVarint(fp_spill_mask_); + out.WriteVarint(num_dex_registers_); + EncodeTable(out, stack_maps_); + EncodeTable(out, register_masks_); + EncodeTable(out, stack_masks_); + EncodeTable(out, inline_infos_); + EncodeTable(out, method_infos_); + EncodeTable(out, dex_register_masks_); + EncodeTable(out, dex_register_maps_); + EncodeTable(out, dex_register_catalog_); + + // Verify that we can load the CodeInfo and check some essentials. + CodeInfo code_info(buffer.data()); + CHECK_EQ(code_info.Size(), buffer.size()); + CHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size()); + + // Verify all written data (usually only in debug builds). + if (kVerifyStackMaps) { + for (const auto& dcheck : dchecks_) { + dcheck(code_info); } } -} -size_t StackMapStream::ComputeMethodInfoSize() const { - DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before " << __FUNCTION__; - return MethodInfo::ComputeSize(method_indices_.size()); + return buffer; } } // namespace art diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 579aabdb5f..01c6bf9e0e 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -17,133 +17,63 @@ #ifndef ART_COMPILER_OPTIMIZING_STACK_MAP_STREAM_H_ #define ART_COMPILER_OPTIMIZING_STACK_MAP_STREAM_H_ +#include "base/allocator.h" +#include "base/arena_bit_vector.h" +#include "base/bit_table.h" #include "base/bit_vector-inl.h" -#include "base/hash_map.h" +#include "base/memory_region.h" #include "base/scoped_arena_containers.h" #include "base/value_object.h" -#include "memory_region.h" -#include "method_info.h" +#include "dex_register_location.h" #include "nodes.h" #include "stack_map.h" namespace art { -// Helper to build art::StackMapStream::LocationCatalogEntriesIndices. -class LocationCatalogEntriesIndicesEmptyFn { - public: - void MakeEmpty(std::pair<DexRegisterLocation, size_t>& item) const { - item.first = DexRegisterLocation::None(); - } - bool IsEmpty(const std::pair<DexRegisterLocation, size_t>& item) const { - return item.first == DexRegisterLocation::None(); - } -}; - -// Hash function for art::StackMapStream::LocationCatalogEntriesIndices. -// This hash function does not create collisions. -class DexRegisterLocationHashFn { - public: - size_t operator()(DexRegisterLocation key) const { - // Concatenate `key`s fields to create a 64-bit value to be hashed. - int64_t kind_and_value = - (static_cast<int64_t>(key.kind_) << 32) | static_cast<int64_t>(key.value_); - return inner_hash_fn_(kind_and_value); - } - private: - std::hash<int64_t> inner_hash_fn_; -}; - - /** * Collects and builds stack maps for a method. All the stack maps * for a method are placed in a CodeInfo object. */ -class StackMapStream : public ValueObject { +class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { public: explicit StackMapStream(ScopedArenaAllocator* allocator, InstructionSet instruction_set) : allocator_(allocator), instruction_set_(instruction_set), - stack_maps_(allocator->Adapter(kArenaAllocStackMapStream)), - location_catalog_entries_(allocator->Adapter(kArenaAllocStackMapStream)), - location_catalog_entries_indices_(allocator->Adapter(kArenaAllocStackMapStream)), - dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)), - inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)), - stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)), - register_masks_(allocator->Adapter(kArenaAllocStackMapStream)), - method_indices_(allocator->Adapter(kArenaAllocStackMapStream)), - dex_register_entries_(allocator->Adapter(kArenaAllocStackMapStream)), - stack_mask_max_(-1), - dex_pc_max_(kNoDexPc), - register_mask_max_(0), - number_of_stack_maps_with_inline_info_(0), - dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), - allocator->Adapter(kArenaAllocStackMapStream)), - current_entry_(), - current_inline_info_(), - code_info_encoding_(allocator->Adapter(kArenaAllocStackMapStream)), - needed_size_(0), - current_dex_register_(0), - in_inline_frame_(false) { - stack_maps_.reserve(10); - location_catalog_entries_.reserve(4); - dex_register_locations_.reserve(10 * 4); - inline_infos_.reserve(2); - code_info_encoding_.reserve(16); + stack_maps_(allocator), + inline_infos_(allocator), + method_infos_(allocator), + register_masks_(allocator), + stack_masks_(allocator), + dex_register_masks_(allocator), + dex_register_maps_(allocator), + dex_register_catalog_(allocator), + lazy_stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)), + current_stack_map_(), + current_inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)), + current_dex_registers_(allocator->Adapter(kArenaAllocStackMapStream)), + previous_dex_registers_(allocator->Adapter(kArenaAllocStackMapStream)), + dex_register_timestamp_(allocator->Adapter(kArenaAllocStackMapStream)), + expected_num_dex_registers_(0u), + temp_dex_register_mask_(allocator, 32, true, kArenaAllocStackMapStream), + temp_dex_register_map_(allocator->Adapter(kArenaAllocStackMapStream)) { } - // A dex register map entry for a single stack map entry, contains what registers are live as - // well as indices into the location catalog. - class DexRegisterMapEntry { - public: - static const size_t kOffsetUnassigned = -1; - - BitVector* live_dex_registers_mask; - uint32_t num_dex_registers; - size_t locations_start_index; - // Computed fields - size_t hash = 0; - size_t offset = kOffsetUnassigned; - - size_t ComputeSize(size_t catalog_size) const; - }; - - // See runtime/stack_map.h to know what these fields contain. - struct StackMapEntry { - uint32_t dex_pc; - CodeOffset native_pc_code_offset; - uint32_t register_mask; - BitVector* sp_mask; - uint8_t inlining_depth; - size_t inline_infos_start_index; - uint32_t stack_mask_index; - uint32_t register_mask_index; - DexRegisterMapEntry dex_register_entry; - size_t dex_register_map_index; - InvokeType invoke_type; - uint32_t dex_method_index; - uint32_t dex_method_index_idx; // Index into dex method index table. - }; - - struct InlineInfoEntry { - uint32_t dex_pc; // dex::kDexNoIndex for intrinsified native methods. - ArtMethod* method; - uint32_t method_index; - DexRegisterMapEntry dex_register_entry; - size_t dex_register_map_index; - uint32_t dex_method_index_idx; // Index into the dex method index table. - }; + void BeginMethod(size_t frame_size_in_bytes, + size_t core_spill_mask, + size_t fp_spill_mask, + uint32_t num_dex_registers); + void EndMethod(); void BeginStackMapEntry(uint32_t dex_pc, uint32_t native_pc_offset, - uint32_t register_mask, - BitVector* sp_mask, - uint32_t num_dex_registers, - uint8_t inlining_depth); + uint32_t register_mask = 0, + BitVector* sp_mask = nullptr, + StackMap::Kind kind = StackMap::Kind::Default); void EndStackMapEntry(); - void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value); - - void AddInvoke(InvokeType type, uint32_t dex_method_index); + void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) { + current_dex_registers_.push_back(DexRegisterLocation(kind, value)); + } void BeginInlineInfoEntry(ArtMethod* method, uint32_t dex_pc, @@ -155,109 +85,54 @@ class StackMapStream : public ValueObject { return stack_maps_.size(); } - const StackMapEntry& GetStackMap(size_t i) const { - return stack_maps_[i]; - } - - void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) { - stack_maps_[i].native_pc_code_offset = - CodeOffset::FromOffset(native_pc_offset, instruction_set_); - } - - // Prepares the stream to fill in a memory region. Must be called before FillIn. - // Returns the size (in bytes) needed to store this stream. - size_t PrepareForFillIn(); - void FillInCodeInfo(MemoryRegion region); - void FillInMethodInfo(MemoryRegion region); + uint32_t GetStackMapNativePcOffset(size_t i); + void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset); - size_t ComputeMethodInfoSize() const; + // Encode all stack map data. + // The returned vector is allocated using the allocator passed to the StackMapStream. + ScopedArenaVector<uint8_t> Encode(); private: - size_t ComputeDexRegisterLocationCatalogSize() const; - size_t ComputeDexRegisterMapsSize() const; - void ComputeInlineInfoEncoding(InlineInfoEncoding* encoding, - size_t dex_register_maps_bytes); - - CodeOffset ComputeMaxNativePcCodeOffset() const; - - // Returns the number of unique stack masks. - size_t PrepareStackMasks(size_t entry_size_in_bits); - - // Returns the number of unique register masks. - size_t PrepareRegisterMasks(); - - // Prepare and deduplicate method indices. - void PrepareMethodIndices(); - - // Deduplicate entry if possible and return the corresponding index into dex_register_entries_ - // array. If entry is not a duplicate, a new entry is added to dex_register_entries_. - size_t AddDexRegisterMapEntry(const DexRegisterMapEntry& entry); - - // Return true if the two dex register map entries are equal. - bool DexRegisterMapEntryEquals(const DexRegisterMapEntry& a, const DexRegisterMapEntry& b) const; + static constexpr uint32_t kNoValue = -1; - // Fill in the corresponding entries of a register map. - void ComputeInvokeInfoEncoding(CodeInfoEncoding* encoding); + void CreateDexRegisterMap(); - // Returns the index of an entry with the same dex register map as the current_entry, - // or kNoSameDexMapFound if no such entry exists. - size_t FindEntryWithTheSameDexMap(); - bool HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const; - - // Fill in the corresponding entries of a register map. - void FillInDexRegisterMap(DexRegisterMap dex_register_map, - uint32_t num_dex_registers, - const BitVector& live_dex_registers_mask, - uint32_t start_index_in_dex_register_locations) const; - - // Returns the offset for the dex register inside of the dex register location region. See FillIn. - // Only copies the dex register map if the offset for the entry is not already assigned. - size_t MaybeCopyDexRegisterMap(DexRegisterMapEntry& entry, - size_t* current_offset, - MemoryRegion dex_register_locations_region); - void CheckDexRegisterMap(const CodeInfo& code_info, - const DexRegisterMap& dex_register_map, - size_t num_dex_registers, - BitVector* live_dex_registers_mask, - size_t dex_register_locations_index) const; - void CheckCodeInfo(MemoryRegion region) const; - - ScopedArenaAllocator* const allocator_; + ScopedArenaAllocator* allocator_; const InstructionSet instruction_set_; - ScopedArenaVector<StackMapEntry> stack_maps_; - - // A catalog of unique [location_kind, register_value] pairs (per method). - ScopedArenaVector<DexRegisterLocation> location_catalog_entries_; - // Map from Dex register location catalog entries to their indices in the - // location catalog. - using LocationCatalogEntriesIndices = ScopedArenaHashMap<DexRegisterLocation, - size_t, - LocationCatalogEntriesIndicesEmptyFn, - DexRegisterLocationHashFn>; - LocationCatalogEntriesIndices location_catalog_entries_indices_; - - // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`. - ScopedArenaVector<size_t> dex_register_locations_; - ScopedArenaVector<InlineInfoEntry> inline_infos_; - ScopedArenaVector<uint8_t> stack_masks_; - ScopedArenaVector<uint32_t> register_masks_; - ScopedArenaVector<uint32_t> method_indices_; - ScopedArenaVector<DexRegisterMapEntry> dex_register_entries_; - int stack_mask_max_; - uint32_t dex_pc_max_; - uint32_t register_mask_max_; - size_t number_of_stack_maps_with_inline_info_; - - ScopedArenaSafeMap<uint32_t, ScopedArenaVector<uint32_t>> dex_map_hash_to_stack_map_indices_; - - StackMapEntry current_entry_; - InlineInfoEntry current_inline_info_; - ScopedArenaVector<uint8_t> code_info_encoding_; - size_t needed_size_; - uint32_t current_dex_register_; - bool in_inline_frame_; - - static constexpr uint32_t kNoSameDexMapFound = -1; + uint32_t packed_frame_size_ = 0; + uint32_t core_spill_mask_ = 0; + uint32_t fp_spill_mask_ = 0; + uint32_t num_dex_registers_ = 0; + BitTableBuilder<StackMap> stack_maps_; + BitTableBuilder<InlineInfo> inline_infos_; + BitTableBuilder<MethodInfo> method_infos_; + BitTableBuilder<RegisterMask> register_masks_; + BitmapTableBuilder stack_masks_; + BitmapTableBuilder dex_register_masks_; + BitTableBuilder<DexRegisterMapInfo> dex_register_maps_; + BitTableBuilder<DexRegisterInfo> dex_register_catalog_; + + ScopedArenaVector<BitVector*> lazy_stack_masks_; + + // Variables which track the current state between Begin/End calls; + bool in_method_ = false; + bool in_stack_map_ = false; + bool in_inline_info_ = false; + BitTableBuilder<StackMap>::Entry current_stack_map_; + ScopedArenaVector<BitTableBuilder<InlineInfo>::Entry> current_inline_infos_; + ScopedArenaVector<DexRegisterLocation> current_dex_registers_; + ScopedArenaVector<DexRegisterLocation> previous_dex_registers_; + ScopedArenaVector<uint32_t> dex_register_timestamp_; // Stack map index of last change. + size_t expected_num_dex_registers_; + + // Temporary variables used in CreateDexRegisterMap. + // They are here so that we can reuse the reserved memory. + ArenaBitVector temp_dex_register_mask_; + ScopedArenaVector<BitTableBuilder<DexRegisterMapInfo>::Entry> temp_dex_register_map_; + + // A set of lambda functions to be executed at the end to verify + // the encoded data. It is generally only used in debug builds. + std::vector<std::function<void(CodeInfo&)>> dchecks_; DISALLOW_COPY_AND_ASSIGN(StackMapStream); }; diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 7e517f3485..d28f09fbba 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -18,6 +18,7 @@ #include "art_method.h" #include "base/arena_bit_vector.h" +#include "base/malloc_arena_pool.h" #include "stack_map_stream.h" #include "gtest/gtest.h" @@ -28,14 +29,13 @@ namespace art { // to the given bit vector. Returns true if they are same. static bool CheckStackMask( const CodeInfo& code_info, - const CodeInfoEncoding& encoding, const StackMap& stack_map, const BitVector& bit_vector) { - BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map); - if (bit_vector.GetNumberOfBits() > encoding.stack_mask.encoding.BitSize()) { + BitMemoryRegion stack_mask = code_info.GetStackMaskOf(stack_map); + if (bit_vector.GetNumberOfBits() > stack_mask.size_in_bits()) { return false; } - for (size_t i = 0; i < encoding.stack_mask.encoding.BitSize(); ++i) { + for (size_t i = 0; i < stack_mask.size_in_bits(); ++i) { if (stack_mask.LoadBit(i) != bit_vector.IsBitSet(i)) { return false; } @@ -45,93 +45,68 @@ static bool CheckStackMask( using Kind = DexRegisterLocation::Kind; +constexpr static uint32_t kPcAlign = GetInstructionSetInstructionAlignment(kRuntimeISA); + TEST(StackMapTest, Test1) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArenaBitVector sp_mask(&allocator, 0, false); size_t number_of_dex_registers = 2; - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask); stream.AddDexRegisterEntry(Kind::kInStack, 0); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Short location. stream.EndStackMapEntry(); - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - ASSERT_EQ(1u, code_info.GetNumberOfStackMaps(encoding)); + CodeInfo code_info(memory.data()); + ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding); + uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(2u, number_of_catalog_entries); - DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); - // The Dex register location catalog contains: - // - one 1-byte short Dex register location, and - // - one 5-byte large Dex register location. - size_t expected_location_catalog_size = 1u + 5u; - ASSERT_EQ(expected_location_catalog_size, location_catalog.Size()); - - StackMap stack_map = code_info.GetStackMapAt(0, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding))); - ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask)); - - ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); - ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); - // The Dex register map contains: - // - one 1-byte live bit mask, and - // - one 1-byte set of location catalog entry indices composed of two 2-bit values. - size_t expected_dex_register_map_size = 1u + 1u; - ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - - ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info, encoding)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); - ASSERT_EQ(0u, index0); - ASSERT_EQ(1u, index1); - DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); - DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign))); + ASSERT_EQ(0u, stack_map.GetDexPc()); + ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_TRUE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters()); + + ASSERT_EQ(Kind::kInStack, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kConstant, dex_register_map[1].GetKind()); + ASSERT_EQ(0, dex_register_map[0].GetStackOffsetInBytes()); + ASSERT_EQ(-2, dex_register_map[1].GetConstant()); + + DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(0); + DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(1); ASSERT_EQ(Kind::kInStack, location0.GetKind()); ASSERT_EQ(Kind::kConstant, location1.GetKind()); - ASSERT_EQ(Kind::kInStack, location0.GetInternalKind()); - ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind()); ASSERT_EQ(0, location0.GetValue()); ASSERT_EQ(-2, location1.GetValue()); - ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); + ASSERT_FALSE(stack_map.HasInlineInfo()); } TEST(StackMapTest, Test2) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArtMethod art_method; ArenaBitVector sp_mask1(&allocator, 0, true); @@ -139,7 +114,7 @@ TEST(StackMapTest, Test2) { sp_mask1.SetBit(4); size_t number_of_dex_registers = 2; size_t number_of_dex_registers_in_inline_info = 0; - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kInStack, 0); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.BeginInlineInfoEntry(&art_method, 3, number_of_dex_registers_in_inline_info); @@ -151,7 +126,7 @@ TEST(StackMapTest, Test2) { ArenaBitVector sp_mask2(&allocator, 0, true); sp_mask2.SetBit(3); sp_mask2.SetBit(8); - stream.BeginStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0); + stream.BeginStackMapEntry(1, 128 * kPcAlign, 0xFF, &sp_mask2); stream.AddDexRegisterEntry(Kind::kInRegister, 18); // Short location. stream.AddDexRegisterEntry(Kind::kInFpuRegister, 3); // Short location. stream.EndStackMapEntry(); @@ -159,7 +134,7 @@ TEST(StackMapTest, Test2) { ArenaBitVector sp_mask3(&allocator, 0, true); sp_mask3.SetBit(1); sp_mask3.SetBit(5); - stream.BeginStackMapEntry(2, 192, 0xAB, &sp_mask3, number_of_dex_registers, 0); + stream.BeginStackMapEntry(2, 192 * kPcAlign, 0xAB, &sp_mask3); stream.AddDexRegisterEntry(Kind::kInRegister, 6); // Short location. stream.AddDexRegisterEntry(Kind::kInRegisterHigh, 8); // Short location. stream.EndStackMapEntry(); @@ -167,256 +142,165 @@ TEST(StackMapTest, Test2) { ArenaBitVector sp_mask4(&allocator, 0, true); sp_mask4.SetBit(6); sp_mask4.SetBit(7); - stream.BeginStackMapEntry(3, 256, 0xCD, &sp_mask4, number_of_dex_registers, 0); + stream.BeginStackMapEntry(3, 256 * kPcAlign, 0xCD, &sp_mask4); stream.AddDexRegisterEntry(Kind::kInFpuRegister, 3); // Short location, same in stack map 2. stream.AddDexRegisterEntry(Kind::kInFpuRegisterHigh, 1); // Short location. stream.EndStackMapEntry(); - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - ASSERT_EQ(4u, code_info.GetNumberOfStackMaps(encoding)); + CodeInfo code_info(memory.data()); + ASSERT_EQ(4u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding); + uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(7u, number_of_catalog_entries); - DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); - // The Dex register location catalog contains: - // - six 1-byte short Dex register locations, and - // - one 5-byte large Dex register location. - size_t expected_location_catalog_size = 6u * 1u + 5u; - ASSERT_EQ(expected_location_catalog_size, location_catalog.Size()); // First stack map. { - StackMap stack_map = code_info.GetStackMapAt(0, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding))); - ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask1)); - - ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); - ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); - // The Dex register map contains: - // - one 1-byte live bit mask, and - // - one 1-byte set of location catalog entry indices composed of two 2-bit values. - size_t expected_dex_register_map_size = 1u + 1u; - ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - - ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info, encoding)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); - ASSERT_EQ(0u, index0); - ASSERT_EQ(1u, index1); - DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); - DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign))); + ASSERT_EQ(0u, stack_map.GetDexPc()); + ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask1)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_TRUE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters()); + + ASSERT_EQ(Kind::kInStack, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kConstant, dex_register_map[1].GetKind()); + ASSERT_EQ(0, dex_register_map[0].GetStackOffsetInBytes()); + ASSERT_EQ(-2, dex_register_map[1].GetConstant()); + + DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(0); + DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(1); ASSERT_EQ(Kind::kInStack, location0.GetKind()); ASSERT_EQ(Kind::kConstant, location1.GetKind()); - ASSERT_EQ(Kind::kInStack, location0.GetInternalKind()); - ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind()); ASSERT_EQ(0, location0.GetValue()); ASSERT_EQ(-2, location1.GetValue()); - ASSERT_TRUE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); - InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding); - ASSERT_EQ(2u, inline_info.GetDepth(encoding.inline_info.encoding)); - ASSERT_EQ(3u, inline_info.GetDexPcAtDepth(encoding.inline_info.encoding, 0)); - ASSERT_EQ(2u, inline_info.GetDexPcAtDepth(encoding.inline_info.encoding, 1)); - ASSERT_TRUE(inline_info.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 0)); - ASSERT_TRUE(inline_info.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 1)); + ASSERT_TRUE(stack_map.HasInlineInfo()); + auto inline_infos = code_info.GetInlineInfosOf(stack_map); + ASSERT_EQ(2u, inline_infos.size()); + ASSERT_EQ(3u, inline_infos[0].GetDexPc()); + ASSERT_EQ(2u, inline_infos[1].GetDexPc()); + ASSERT_TRUE(inline_infos[0].EncodesArtMethod()); + ASSERT_TRUE(inline_infos[1].EncodesArtMethod()); } // Second stack map. { - StackMap stack_map = code_info.GetStackMapAt(1, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1u, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u, encoding))); - ASSERT_EQ(1u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(128u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0xFFu, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask2)); - - ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); - ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); - // The Dex register map contains: - // - one 1-byte live bit mask, and - // - one 1-byte set of location catalog entry indices composed of two 2-bit values. - size_t expected_dex_register_map_size = 1u + 1u; - ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - - ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(18, dex_register_map.GetMachineRegister( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(3, dex_register_map.GetMachineRegister( - 1, number_of_dex_registers, code_info, encoding)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); - ASSERT_EQ(2u, index0); - ASSERT_EQ(3u, index1); - DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); - DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + StackMap stack_map = code_info.GetStackMapAt(1); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1u))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u * kPcAlign))); + ASSERT_EQ(1u, stack_map.GetDexPc()); + ASSERT_EQ(128u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0xFFu, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask2)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_TRUE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters()); + + ASSERT_EQ(Kind::kInRegister, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kInFpuRegister, dex_register_map[1].GetKind()); + ASSERT_EQ(18, dex_register_map[0].GetMachineRegister()); + ASSERT_EQ(3, dex_register_map[1].GetMachineRegister()); + + DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(2); + DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(3); ASSERT_EQ(Kind::kInRegister, location0.GetKind()); ASSERT_EQ(Kind::kInFpuRegister, location1.GetKind()); - ASSERT_EQ(Kind::kInRegister, location0.GetInternalKind()); - ASSERT_EQ(Kind::kInFpuRegister, location1.GetInternalKind()); ASSERT_EQ(18, location0.GetValue()); ASSERT_EQ(3, location1.GetValue()); - ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); + ASSERT_FALSE(stack_map.HasInlineInfo()); } // Third stack map. { - StackMap stack_map = code_info.GetStackMapAt(2, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(2u, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(192u, encoding))); - ASSERT_EQ(2u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(192u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0xABu, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask3)); - - ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); - ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); - // The Dex register map contains: - // - one 1-byte live bit mask, and - // - one 1-byte set of location catalog entry indices composed of two 2-bit values. - size_t expected_dex_register_map_size = 1u + 1u; - ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - - ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(6, dex_register_map.GetMachineRegister( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(8, dex_register_map.GetMachineRegister( - 1, number_of_dex_registers, code_info, encoding)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); - ASSERT_EQ(4u, index0); - ASSERT_EQ(5u, index1); - DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); - DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + StackMap stack_map = code_info.GetStackMapAt(2); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(2u))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(192u * kPcAlign))); + ASSERT_EQ(2u, stack_map.GetDexPc()); + ASSERT_EQ(192u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0xABu, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask3)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_TRUE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters()); + + ASSERT_EQ(Kind::kInRegister, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map[1].GetKind()); + ASSERT_EQ(6, dex_register_map[0].GetMachineRegister()); + ASSERT_EQ(8, dex_register_map[1].GetMachineRegister()); + + DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(4); + DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(5); ASSERT_EQ(Kind::kInRegister, location0.GetKind()); ASSERT_EQ(Kind::kInRegisterHigh, location1.GetKind()); - ASSERT_EQ(Kind::kInRegister, location0.GetInternalKind()); - ASSERT_EQ(Kind::kInRegisterHigh, location1.GetInternalKind()); ASSERT_EQ(6, location0.GetValue()); ASSERT_EQ(8, location1.GetValue()); - ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); + ASSERT_FALSE(stack_map.HasInlineInfo()); } // Fourth stack map. { - StackMap stack_map = code_info.GetStackMapAt(3, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(3u, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(256u, encoding))); - ASSERT_EQ(3u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(256u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0xCDu, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask4)); - - ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); - ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); - // The Dex register map contains: - // - one 1-byte live bit mask, and - // - one 1-byte set of location catalog entry indices composed of two 2-bit values. - size_t expected_dex_register_map_size = 1u + 1u; - ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - - ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(3, dex_register_map.GetMachineRegister( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(1, dex_register_map.GetMachineRegister( - 1, number_of_dex_registers, code_info, encoding)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); - ASSERT_EQ(3u, index0); // Shared with second stack map. - ASSERT_EQ(6u, index1); - DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); - DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + StackMap stack_map = code_info.GetStackMapAt(3); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(3u))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(256u * kPcAlign))); + ASSERT_EQ(3u, stack_map.GetDexPc()); + ASSERT_EQ(256u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0xCDu, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask4)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_TRUE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters()); + + ASSERT_EQ(Kind::kInFpuRegister, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map[1].GetKind()); + ASSERT_EQ(3, dex_register_map[0].GetMachineRegister()); + ASSERT_EQ(1, dex_register_map[1].GetMachineRegister()); + + DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(3); + DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(6); ASSERT_EQ(Kind::kInFpuRegister, location0.GetKind()); ASSERT_EQ(Kind::kInFpuRegisterHigh, location1.GetKind()); - ASSERT_EQ(Kind::kInFpuRegister, location0.GetInternalKind()); - ASSERT_EQ(Kind::kInFpuRegisterHigh, location1.GetInternalKind()); ASSERT_EQ(3, location0.GetValue()); ASSERT_EQ(1, location1.GetValue()); - ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); + ASSERT_FALSE(stack_map.HasInlineInfo()); } } TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArtMethod art_method; ArenaBitVector sp_mask1(&allocator, 0, true); @@ -424,7 +308,7 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { sp_mask1.SetBit(4); const size_t number_of_dex_registers = 2; const size_t number_of_dex_registers_in_inline_info = 2; - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 1); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kInStack, 0); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.BeginInlineInfoEntry(&art_method, 3, number_of_dex_registers_in_inline_info); @@ -433,338 +317,204 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { stream.EndInlineInfoEntry(); stream.EndStackMapEntry(); - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - ASSERT_EQ(1u, code_info.GetNumberOfStackMaps(encoding)); + CodeInfo code_info(memory.data()); + ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding); + uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(2u, number_of_catalog_entries); - DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); - // The Dex register location catalog contains: - // - one 1-byte short Dex register locations, and - // - one 5-byte large Dex register location. - const size_t expected_location_catalog_size = 1u + 5u; - ASSERT_EQ(expected_location_catalog_size, location_catalog.Size()); // First stack map. { - StackMap stack_map = code_info.GetStackMapAt(0, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding))); - ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask1)); - - ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - DexRegisterMap map(code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers)); - ASSERT_TRUE(map.IsDexRegisterLive(0)); - ASSERT_TRUE(map.IsDexRegisterLive(1)); - ASSERT_EQ(2u, map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); - // The Dex register map contains: - // - one 1-byte live bit mask, and - // - one 1-byte set of location catalog entry indices composed of two 2-bit values. - size_t expected_map_size = 1u + 1u; - ASSERT_EQ(expected_map_size, map.Size()); - - ASSERT_EQ(Kind::kInStack, map.GetLocationKind(0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kConstant, - map.GetLocationKind(1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInStack, - map.GetLocationInternalKind(0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kConstantLargeValue, - map.GetLocationInternalKind(1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(0, map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(-2, map.GetConstant(1, number_of_dex_registers, code_info, encoding)); - - const size_t index0 = - map.GetLocationCatalogEntryIndex(0, number_of_dex_registers, number_of_catalog_entries); - const size_t index1 = - map.GetLocationCatalogEntryIndex(1, number_of_dex_registers, number_of_catalog_entries); - ASSERT_EQ(0u, index0); - ASSERT_EQ(1u, index1); - DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); - DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign))); + ASSERT_EQ(0u, stack_map.GetDexPc()); + ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask1)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap map(code_info.GetDexRegisterMapOf(stack_map)); + ASSERT_EQ(number_of_dex_registers, map.size()); + ASSERT_TRUE(map[0].IsLive()); + ASSERT_TRUE(map[1].IsLive()); + ASSERT_EQ(2u, map.GetNumberOfLiveDexRegisters()); + + ASSERT_EQ(Kind::kInStack, map[0].GetKind()); + ASSERT_EQ(Kind::kConstant, map[1].GetKind()); + ASSERT_EQ(0, map[0].GetStackOffsetInBytes()); + ASSERT_EQ(-2, map[1].GetConstant()); + + DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(0); + DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(1); ASSERT_EQ(Kind::kInStack, location0.GetKind()); ASSERT_EQ(Kind::kConstant, location1.GetKind()); - ASSERT_EQ(Kind::kInStack, location0.GetInternalKind()); - ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind()); ASSERT_EQ(0, location0.GetValue()); ASSERT_EQ(-2, location1.GetValue()); - - // Test that the inline info dex register map deduplicated to the same offset as the stack map - // one. - ASSERT_TRUE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); - InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding); - EXPECT_EQ(inline_info.GetDexRegisterMapOffsetAtDepth(encoding.inline_info.encoding, 0), - stack_map.GetDexRegisterMapOffset(encoding.stack_map.encoding)); } } TEST(StackMapTest, TestNonLiveDexRegisters) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArenaBitVector sp_mask(&allocator, 0, false); uint32_t number_of_dex_registers = 2; - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask); stream.AddDexRegisterEntry(Kind::kNone, 0); // No location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.EndStackMapEntry(); - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - ASSERT_EQ(1u, code_info.GetNumberOfStackMaps(encoding)); + CodeInfo code_info(memory.data()); + ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding); + uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(1u, number_of_catalog_entries); - DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); - // The Dex register location catalog contains: - // - one 5-byte large Dex register location. - size_t expected_location_catalog_size = 5u; - ASSERT_EQ(expected_location_catalog_size, location_catalog.Size()); - - StackMap stack_map = code_info.GetStackMapAt(0, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding))); - ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers); - ASSERT_FALSE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); - ASSERT_EQ(1u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); - // The Dex register map contains: - // - one 1-byte live bit mask. - // No space is allocated for the sole location catalog entry index, as it is useless. - size_t expected_dex_register_map_size = 1u + 0u; - ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - - ASSERT_EQ(Kind::kNone, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kNone, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info, encoding)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); - ASSERT_EQ(DexRegisterLocationCatalog::kNoLocationEntryIndex, index0); - ASSERT_EQ(0u, index1); - DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); - DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); - ASSERT_EQ(Kind::kNone, location0.GetKind()); + + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign))); + ASSERT_EQ(0u, stack_map.GetDexPc()); + ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_FALSE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); + ASSERT_EQ(1u, dex_register_map.GetNumberOfLiveDexRegisters()); + + ASSERT_EQ(Kind::kNone, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kConstant, dex_register_map[1].GetKind()); + ASSERT_EQ(-2, dex_register_map[1].GetConstant()); + + DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(0); ASSERT_EQ(Kind::kConstant, location1.GetKind()); - ASSERT_EQ(Kind::kNone, location0.GetInternalKind()); - ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind()); - ASSERT_EQ(0, location0.GetValue()); ASSERT_EQ(-2, location1.GetValue()); - ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); -} - -// Generate a stack map whose dex register offset is -// StackMap::kNoDexRegisterMapSmallEncoding, and ensure we do -// not treat it as kNoDexRegisterMap. -TEST(StackMapTest, DexRegisterMapOffsetOverflow) { - ArenaPool pool; - ArenaStack arena_stack(&pool); - ScopedArenaAllocator allocator(&arena_stack); - StackMapStream stream(&allocator, kRuntimeISA); - - ArenaBitVector sp_mask(&allocator, 0, false); - uint32_t number_of_dex_registers = 1024; - // Create the first stack map (and its Dex register map). - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - uint32_t number_of_dex_live_registers_in_dex_register_map_0 = number_of_dex_registers - 8; - for (uint32_t i = 0; i < number_of_dex_live_registers_in_dex_register_map_0; ++i) { - // Use two different Dex register locations to populate this map, - // as using a single value (in the whole CodeInfo object) would - // make this Dex register mapping data empty (see - // art::DexRegisterMap::SingleEntrySizeInBits). - stream.AddDexRegisterEntry(Kind::kConstant, i % 2); // Short location. - } - stream.EndStackMapEntry(); - // Create the second stack map (and its Dex register map). - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - for (uint32_t i = 0; i < number_of_dex_registers; ++i) { - stream.AddDexRegisterEntry(Kind::kConstant, 0); // Short location. - } - stream.EndStackMapEntry(); - - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); - - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - // The location catalog contains two entries (DexRegisterLocation(kConstant, 0) - // and DexRegisterLocation(kConstant, 1)), therefore the location catalog index - // has a size of 1 bit. - uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding); - ASSERT_EQ(2u, number_of_catalog_entries); - ASSERT_EQ(1u, DexRegisterMap::SingleEntrySizeInBits(number_of_catalog_entries)); - - // The first Dex register map contains: - // - a live register bit mask for 1024 registers (that is, 128 bytes of - // data); and - // - Dex register mapping information for 1016 1-bit Dex (live) register - // locations (that is, 127 bytes of data). - // Hence it has a size of 255 bytes, and therefore... - ASSERT_EQ(128u, DexRegisterMap::GetLiveBitMaskSize(number_of_dex_registers)); - StackMap stack_map0 = code_info.GetStackMapAt(0, encoding); - DexRegisterMap dex_register_map0 = - code_info.GetDexRegisterMapOf(stack_map0, encoding, number_of_dex_registers); - ASSERT_EQ(127u, dex_register_map0.GetLocationMappingDataSize(number_of_dex_registers, - number_of_catalog_entries)); - ASSERT_EQ(255u, dex_register_map0.Size()); - - StackMap stack_map1 = code_info.GetStackMapAt(1, encoding); - ASSERT_TRUE(stack_map1.HasDexRegisterMap(encoding.stack_map.encoding)); - // ...the offset of the second Dex register map (relative to the - // beginning of the Dex register maps region) is 255 (i.e., - // kNoDexRegisterMapSmallEncoding). - ASSERT_NE(stack_map1.GetDexRegisterMapOffset(encoding.stack_map.encoding), - StackMap::kNoDexRegisterMap); - ASSERT_EQ(stack_map1.GetDexRegisterMapOffset(encoding.stack_map.encoding), 0xFFu); + ASSERT_FALSE(stack_map.HasInlineInfo()); } TEST(StackMapTest, TestShareDexRegisterMap) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArenaBitVector sp_mask(&allocator, 0, false); uint32_t number_of_dex_registers = 2; // First stack map. - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask); stream.AddDexRegisterEntry(Kind::kInRegister, 0); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.EndStackMapEntry(); // Second stack map, which should share the same dex register map. - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 65 * kPcAlign, 0x3, &sp_mask); stream.AddDexRegisterEntry(Kind::kInRegister, 0); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.EndStackMapEntry(); // Third stack map (doesn't share the dex register map). - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 66 * kPcAlign, 0x3, &sp_mask); stream.AddDexRegisterEntry(Kind::kInRegister, 2); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.EndStackMapEntry(); - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); - CodeInfo ci(region); - CodeInfoEncoding encoding = ci.ExtractEncoding(); + CodeInfo ci(memory.data()); // Verify first stack map. - StackMap sm0 = ci.GetStackMapAt(0, encoding); - DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, encoding, number_of_dex_registers); - ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers, ci, encoding)); - ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers, ci, encoding)); + StackMap sm0 = ci.GetStackMapAt(0); + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0); + ASSERT_EQ(number_of_dex_registers, dex_registers0.size()); + ASSERT_EQ(0, dex_registers0[0].GetMachineRegister()); + ASSERT_EQ(-2, dex_registers0[1].GetConstant()); // Verify second stack map. - StackMap sm1 = ci.GetStackMapAt(1, encoding); - DexRegisterMap dex_registers1 = ci.GetDexRegisterMapOf(sm1, encoding, number_of_dex_registers); - ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers, ci, encoding)); - ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers, ci, encoding)); + StackMap sm1 = ci.GetStackMapAt(1); + DexRegisterMap dex_registers1 = ci.GetDexRegisterMapOf(sm1); + ASSERT_EQ(number_of_dex_registers, dex_registers1.size()); + ASSERT_EQ(0, dex_registers1[0].GetMachineRegister()); + ASSERT_EQ(-2, dex_registers1[1].GetConstant()); // Verify third stack map. - StackMap sm2 = ci.GetStackMapAt(2, encoding); - DexRegisterMap dex_registers2 = ci.GetDexRegisterMapOf(sm2, encoding, number_of_dex_registers); - ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers, ci, encoding)); - ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers, ci, encoding)); - - // Verify dex register map offsets. - ASSERT_EQ(sm0.GetDexRegisterMapOffset(encoding.stack_map.encoding), - sm1.GetDexRegisterMapOffset(encoding.stack_map.encoding)); - ASSERT_NE(sm0.GetDexRegisterMapOffset(encoding.stack_map.encoding), - sm2.GetDexRegisterMapOffset(encoding.stack_map.encoding)); - ASSERT_NE(sm1.GetDexRegisterMapOffset(encoding.stack_map.encoding), - sm2.GetDexRegisterMapOffset(encoding.stack_map.encoding)); + StackMap sm2 = ci.GetStackMapAt(2); + DexRegisterMap dex_registers2 = ci.GetDexRegisterMapOf(sm2); + ASSERT_EQ(number_of_dex_registers, dex_registers2.size()); + ASSERT_EQ(2, dex_registers2[0].GetMachineRegister()); + ASSERT_EQ(-2, dex_registers2[1].GetConstant()); + + // Verify dex register mask offsets. + ASSERT_FALSE(sm1.HasDexRegisterMaskIndex()); // No delta. + ASSERT_TRUE(sm2.HasDexRegisterMaskIndex()); // Has delta. } TEST(StackMapTest, TestNoDexRegisterMap) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 1); ArenaBitVector sp_mask(&allocator, 0, false); uint32_t number_of_dex_registers = 0; - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask); stream.EndStackMapEntry(); number_of_dex_registers = 1; - stream.BeginStackMapEntry(1, 68, 0x4, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(1, 68 * kPcAlign, 0x4, &sp_mask); + stream.AddDexRegisterEntry(Kind::kNone, 0); stream.EndStackMapEntry(); - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - ASSERT_EQ(2u, code_info.GetNumberOfStackMaps(encoding)); + CodeInfo code_info(memory.data()); + ASSERT_EQ(2u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding); + uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(0u, number_of_catalog_entries); - DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); - ASSERT_EQ(0u, location_catalog.Size()); - - StackMap stack_map = code_info.GetStackMapAt(0, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding))); - ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); - - stack_map = code_info.GetStackMapAt(1, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(68, encoding))); - ASSERT_EQ(1u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(68u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0x4u, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); + + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign))); + ASSERT_EQ(0u, stack_map.GetDexPc()); + ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_FALSE(stack_map.HasDexRegisterMap()); + ASSERT_FALSE(stack_map.HasInlineInfo()); + + stack_map = code_info.GetStackMapAt(1); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(68 * kPcAlign))); + ASSERT_EQ(1u, stack_map.GetDexPc()); + ASSERT_EQ(68u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0x4u, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + ASSERT_FALSE(stack_map.HasInlineInfo()); } TEST(StackMapTest, InlineTest) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArtMethod art_method; ArenaBitVector sp_mask1(&allocator, 0, true); @@ -772,7 +522,7 @@ TEST(StackMapTest, InlineTest) { sp_mask1.SetBit(4); // First stack map. - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, 2, 2); + stream.BeginStackMapEntry(0, 10 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kInStack, 0); stream.AddDexRegisterEntry(Kind::kConstant, 4); @@ -788,7 +538,7 @@ TEST(StackMapTest, InlineTest) { stream.EndStackMapEntry(); // Second stack map. - stream.BeginStackMapEntry(2, 22, 0x3, &sp_mask1, 2, 3); + stream.BeginStackMapEntry(2, 22 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kInStack, 56); stream.AddDexRegisterEntry(Kind::kConstant, 0); @@ -806,13 +556,13 @@ TEST(StackMapTest, InlineTest) { stream.EndStackMapEntry(); // Third stack map. - stream.BeginStackMapEntry(4, 56, 0x3, &sp_mask1, 2, 0); + stream.BeginStackMapEntry(4, 56 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kNone, 0); stream.AddDexRegisterEntry(Kind::kConstant, 4); stream.EndStackMapEntry(); // Fourth stack map. - stream.BeginStackMapEntry(6, 78, 0x3, &sp_mask1, 2, 3); + stream.BeginStackMapEntry(6, 78 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kInStack, 56); stream.AddDexRegisterEntry(Kind::kConstant, 0); @@ -828,204 +578,202 @@ TEST(StackMapTest, InlineTest) { stream.EndStackMapEntry(); - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); - CodeInfo ci(region); - CodeInfoEncoding encoding = ci.ExtractEncoding(); + CodeInfo ci(memory.data()); { // Verify first stack map. - StackMap sm0 = ci.GetStackMapAt(0, encoding); - - DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, encoding, 2); - ASSERT_EQ(0, dex_registers0.GetStackOffsetInBytes(0, 2, ci, encoding)); - ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci, encoding)); - - InlineInfo if0 = ci.GetInlineInfoOf(sm0, encoding); - ASSERT_EQ(2u, if0.GetDepth(encoding.inline_info.encoding)); - ASSERT_EQ(2u, if0.GetDexPcAtDepth(encoding.inline_info.encoding, 0)); - ASSERT_TRUE(if0.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 0)); - ASSERT_EQ(3u, if0.GetDexPcAtDepth(encoding.inline_info.encoding, 1)); - ASSERT_TRUE(if0.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 1)); - - DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if0, encoding, 1); - ASSERT_EQ(8, dex_registers1.GetStackOffsetInBytes(0, 1, ci, encoding)); - - DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, if0, encoding, 3); - ASSERT_EQ(16, dex_registers2.GetStackOffsetInBytes(0, 3, ci, encoding)); - ASSERT_EQ(20, dex_registers2.GetConstant(1, 3, ci, encoding)); - ASSERT_EQ(15, dex_registers2.GetMachineRegister(2, 3, ci, encoding)); + StackMap sm0 = ci.GetStackMapAt(0); + + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0); + ASSERT_EQ(2u, dex_registers0.size()); + ASSERT_EQ(0, dex_registers0[0].GetStackOffsetInBytes()); + ASSERT_EQ(4, dex_registers0[1].GetConstant()); + + auto inline_infos = ci.GetInlineInfosOf(sm0); + ASSERT_EQ(2u, inline_infos.size()); + ASSERT_EQ(2u, inline_infos[0].GetDexPc()); + ASSERT_TRUE(inline_infos[0].EncodesArtMethod()); + ASSERT_EQ(3u, inline_infos[1].GetDexPc()); + ASSERT_TRUE(inline_infos[1].EncodesArtMethod()); + + DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm0, inline_infos[0]); + ASSERT_EQ(1u, dex_registers1.size()); + ASSERT_EQ(8, dex_registers1[0].GetStackOffsetInBytes()); + + DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm0, inline_infos[1]); + ASSERT_EQ(3u, dex_registers2.size()); + ASSERT_EQ(16, dex_registers2[0].GetStackOffsetInBytes()); + ASSERT_EQ(20, dex_registers2[1].GetConstant()); + ASSERT_EQ(15, dex_registers2[2].GetMachineRegister()); } { // Verify second stack map. - StackMap sm1 = ci.GetStackMapAt(1, encoding); - - DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm1, encoding, 2); - ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0, 2, ci, encoding)); - ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci, encoding)); - - InlineInfo if1 = ci.GetInlineInfoOf(sm1, encoding); - ASSERT_EQ(3u, if1.GetDepth(encoding.inline_info.encoding)); - ASSERT_EQ(2u, if1.GetDexPcAtDepth(encoding.inline_info.encoding, 0)); - ASSERT_TRUE(if1.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 0)); - ASSERT_EQ(3u, if1.GetDexPcAtDepth(encoding.inline_info.encoding, 1)); - ASSERT_TRUE(if1.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 1)); - ASSERT_EQ(5u, if1.GetDexPcAtDepth(encoding.inline_info.encoding, 2)); - ASSERT_TRUE(if1.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 2)); - - DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if1, encoding, 1); - ASSERT_EQ(12, dex_registers1.GetStackOffsetInBytes(0, 1, ci, encoding)); - - DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, if1, encoding, 3); - ASSERT_EQ(80, dex_registers2.GetStackOffsetInBytes(0, 3, ci, encoding)); - ASSERT_EQ(10, dex_registers2.GetConstant(1, 3, ci, encoding)); - ASSERT_EQ(5, dex_registers2.GetMachineRegister(2, 3, ci, encoding)); - - ASSERT_FALSE(if1.HasDexRegisterMapAtDepth(encoding.inline_info.encoding, 2)); + StackMap sm1 = ci.GetStackMapAt(1); + + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm1); + ASSERT_EQ(2u, dex_registers0.size()); + ASSERT_EQ(56, dex_registers0[0].GetStackOffsetInBytes()); + ASSERT_EQ(0, dex_registers0[1].GetConstant()); + + auto inline_infos = ci.GetInlineInfosOf(sm1); + ASSERT_EQ(3u, inline_infos.size()); + ASSERT_EQ(2u, inline_infos[0].GetDexPc()); + ASSERT_TRUE(inline_infos[0].EncodesArtMethod()); + ASSERT_EQ(3u, inline_infos[1].GetDexPc()); + ASSERT_TRUE(inline_infos[1].EncodesArtMethod()); + ASSERT_EQ(5u, inline_infos[2].GetDexPc()); + ASSERT_TRUE(inline_infos[2].EncodesArtMethod()); + + DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm1, inline_infos[0]); + ASSERT_EQ(1u, dex_registers1.size()); + ASSERT_EQ(12, dex_registers1[0].GetStackOffsetInBytes()); + + DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm1, inline_infos[1]); + ASSERT_EQ(3u, dex_registers2.size()); + ASSERT_EQ(80, dex_registers2[0].GetStackOffsetInBytes()); + ASSERT_EQ(10, dex_registers2[1].GetConstant()); + ASSERT_EQ(5, dex_registers2[2].GetMachineRegister()); } { // Verify third stack map. - StackMap sm2 = ci.GetStackMapAt(2, encoding); + StackMap sm2 = ci.GetStackMapAt(2); - DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm2, encoding, 2); - ASSERT_FALSE(dex_registers0.IsDexRegisterLive(0)); - ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci, encoding)); - ASSERT_FALSE(sm2.HasInlineInfo(encoding.stack_map.encoding)); + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm2); + ASSERT_EQ(2u, dex_registers0.size()); + ASSERT_FALSE(dex_registers0[0].IsLive()); + ASSERT_EQ(4, dex_registers0[1].GetConstant()); + ASSERT_FALSE(sm2.HasInlineInfo()); } { // Verify fourth stack map. - StackMap sm3 = ci.GetStackMapAt(3, encoding); - - DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm3, encoding, 2); - ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0, 2, ci, encoding)); - ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci, encoding)); - - InlineInfo if2 = ci.GetInlineInfoOf(sm3, encoding); - ASSERT_EQ(3u, if2.GetDepth(encoding.inline_info.encoding)); - ASSERT_EQ(2u, if2.GetDexPcAtDepth(encoding.inline_info.encoding, 0)); - ASSERT_TRUE(if2.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 0)); - ASSERT_EQ(5u, if2.GetDexPcAtDepth(encoding.inline_info.encoding, 1)); - ASSERT_TRUE(if2.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 1)); - ASSERT_EQ(10u, if2.GetDexPcAtDepth(encoding.inline_info.encoding, 2)); - ASSERT_TRUE(if2.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 2)); - - ASSERT_FALSE(if2.HasDexRegisterMapAtDepth(encoding.inline_info.encoding, 0)); - - DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(1, if2, encoding, 1); - ASSERT_EQ(2, dex_registers1.GetMachineRegister(0, 1, ci, encoding)); - - DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(2, if2, encoding, 2); - ASSERT_FALSE(dex_registers2.IsDexRegisterLive(0)); - ASSERT_EQ(3, dex_registers2.GetMachineRegister(1, 2, ci, encoding)); + StackMap sm3 = ci.GetStackMapAt(3); + + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm3); + ASSERT_EQ(2u, dex_registers0.size()); + ASSERT_EQ(56, dex_registers0[0].GetStackOffsetInBytes()); + ASSERT_EQ(0, dex_registers0[1].GetConstant()); + + auto inline_infos = ci.GetInlineInfosOf(sm3); + ASSERT_EQ(3u, inline_infos.size()); + ASSERT_EQ(2u, inline_infos[0].GetDexPc()); + ASSERT_TRUE(inline_infos[0].EncodesArtMethod()); + ASSERT_EQ(5u, inline_infos[1].GetDexPc()); + ASSERT_TRUE(inline_infos[1].EncodesArtMethod()); + ASSERT_EQ(10u, inline_infos[2].GetDexPc()); + ASSERT_TRUE(inline_infos[2].EncodesArtMethod()); + + DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm3, inline_infos[1]); + ASSERT_EQ(1u, dex_registers1.size()); + ASSERT_EQ(2, dex_registers1[0].GetMachineRegister()); + + DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm3, inline_infos[2]); + ASSERT_EQ(2u, dex_registers2.size()); + ASSERT_FALSE(dex_registers2[0].IsLive()); + ASSERT_EQ(3, dex_registers2[1].GetMachineRegister()); } } -TEST(StackMapTest, CodeOffsetTest) { - // Test minimum alignments, encoding, and decoding. - CodeOffset offset_thumb2 = - CodeOffset::FromOffset(kThumb2InstructionAlignment, InstructionSet::kThumb2); - CodeOffset offset_arm64 = - CodeOffset::FromOffset(kArm64InstructionAlignment, InstructionSet::kArm64); - CodeOffset offset_x86 = - CodeOffset::FromOffset(kX86InstructionAlignment, InstructionSet::kX86); - CodeOffset offset_x86_64 = - CodeOffset::FromOffset(kX86_64InstructionAlignment, InstructionSet::kX86_64); - CodeOffset offset_mips = - CodeOffset::FromOffset(kMipsInstructionAlignment, InstructionSet::kMips); - CodeOffset offset_mips64 = - CodeOffset::FromOffset(kMips64InstructionAlignment, InstructionSet::kMips64); - EXPECT_EQ(offset_thumb2.Uint32Value(InstructionSet::kThumb2), kThumb2InstructionAlignment); - EXPECT_EQ(offset_arm64.Uint32Value(InstructionSet::kArm64), kArm64InstructionAlignment); - EXPECT_EQ(offset_x86.Uint32Value(InstructionSet::kX86), kX86InstructionAlignment); - EXPECT_EQ(offset_x86_64.Uint32Value(InstructionSet::kX86_64), kX86_64InstructionAlignment); - EXPECT_EQ(offset_mips.Uint32Value(InstructionSet::kMips), kMipsInstructionAlignment); - EXPECT_EQ(offset_mips64.Uint32Value(InstructionSet::kMips64), kMips64InstructionAlignment); +TEST(StackMapTest, PackedNativePcTest) { + // Test minimum alignments, and decoding. + uint32_t packed_thumb2 = + StackMap::PackNativePc(kThumb2InstructionAlignment, InstructionSet::kThumb2); + uint32_t packed_arm64 = + StackMap::PackNativePc(kArm64InstructionAlignment, InstructionSet::kArm64); + uint32_t packed_x86 = + StackMap::PackNativePc(kX86InstructionAlignment, InstructionSet::kX86); + uint32_t packed_x86_64 = + StackMap::PackNativePc(kX86_64InstructionAlignment, InstructionSet::kX86_64); + uint32_t packed_mips = + StackMap::PackNativePc(kMipsInstructionAlignment, InstructionSet::kMips); + uint32_t packed_mips64 = + StackMap::PackNativePc(kMips64InstructionAlignment, InstructionSet::kMips64); + EXPECT_EQ(StackMap::UnpackNativePc(packed_thumb2, InstructionSet::kThumb2), + kThumb2InstructionAlignment); + EXPECT_EQ(StackMap::UnpackNativePc(packed_arm64, InstructionSet::kArm64), + kArm64InstructionAlignment); + EXPECT_EQ(StackMap::UnpackNativePc(packed_x86, InstructionSet::kX86), + kX86InstructionAlignment); + EXPECT_EQ(StackMap::UnpackNativePc(packed_x86_64, InstructionSet::kX86_64), + kX86_64InstructionAlignment); + EXPECT_EQ(StackMap::UnpackNativePc(packed_mips, InstructionSet::kMips), + kMipsInstructionAlignment); + EXPECT_EQ(StackMap::UnpackNativePc(packed_mips64, InstructionSet::kMips64), + kMips64InstructionAlignment); } TEST(StackMapTest, TestDeduplicateStackMask) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 0); ArenaBitVector sp_mask(&allocator, 0, true); sp_mask.SetBit(1); sp_mask.SetBit(4); - stream.BeginStackMapEntry(0, 4, 0x3, &sp_mask, 0, 0); + stream.BeginStackMapEntry(0, 4 * kPcAlign, 0x3, &sp_mask); stream.EndStackMapEntry(); - stream.BeginStackMapEntry(0, 8, 0x3, &sp_mask, 0, 0); + stream.BeginStackMapEntry(0, 8 * kPcAlign, 0x3, &sp_mask); stream.EndStackMapEntry(); - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - ASSERT_EQ(2u, code_info.GetNumberOfStackMaps(encoding)); + CodeInfo code_info(memory.data()); + ASSERT_EQ(2u, code_info.GetNumberOfStackMaps()); - StackMap stack_map1 = code_info.GetStackMapForNativePcOffset(4, encoding); - StackMap stack_map2 = code_info.GetStackMapForNativePcOffset(8, encoding); - EXPECT_EQ(stack_map1.GetStackMaskIndex(encoding.stack_map.encoding), - stack_map2.GetStackMaskIndex(encoding.stack_map.encoding)); + StackMap stack_map1 = code_info.GetStackMapForNativePcOffset(4 * kPcAlign); + StackMap stack_map2 = code_info.GetStackMapForNativePcOffset(8 * kPcAlign); + EXPECT_EQ(stack_map1.GetStackMaskIndex(), + stack_map2.GetStackMaskIndex()); } -TEST(StackMapTest, TestInvokeInfo) { - ArenaPool pool; +TEST(StackMapTest, TestDedupeBitTables) { + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); - ArenaBitVector sp_mask(&allocator, 0, true); - sp_mask.SetBit(1); - stream.BeginStackMapEntry(0, 4, 0x3, &sp_mask, 0, 0); - stream.AddInvoke(kSuper, 1); - stream.EndStackMapEntry(); - stream.BeginStackMapEntry(0, 8, 0x3, &sp_mask, 0, 0); - stream.AddInvoke(kStatic, 3); - stream.EndStackMapEntry(); - stream.BeginStackMapEntry(0, 16, 0x3, &sp_mask, 0, 0); - stream.AddInvoke(kDirect, 65535); + stream.BeginStackMapEntry(0, 64 * kPcAlign); + stream.AddDexRegisterEntry(Kind::kInStack, 0); + stream.AddDexRegisterEntry(Kind::kConstant, -2); stream.EndStackMapEntry(); - const size_t code_info_size = stream.PrepareForFillIn(); - MemoryRegion code_info_region(allocator.Alloc(code_info_size, kArenaAllocMisc), code_info_size); - stream.FillInCodeInfo(code_info_region); - - const size_t method_info_size = stream.ComputeMethodInfoSize(); - MemoryRegion method_info_region(allocator.Alloc(method_info_size, kArenaAllocMisc), - method_info_size); - stream.FillInMethodInfo(method_info_region); - - CodeInfo code_info(code_info_region); - MethodInfo method_info(method_info_region.begin()); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - ASSERT_EQ(3u, code_info.GetNumberOfStackMaps(encoding)); - - InvokeInfo invoke1(code_info.GetInvokeInfoForNativePcOffset(4, encoding)); - InvokeInfo invoke2(code_info.GetInvokeInfoForNativePcOffset(8, encoding)); - InvokeInfo invoke3(code_info.GetInvokeInfoForNativePcOffset(16, encoding)); - InvokeInfo invoke_invalid(code_info.GetInvokeInfoForNativePcOffset(12, encoding)); - EXPECT_FALSE(invoke_invalid.IsValid()); // No entry for that index. - EXPECT_TRUE(invoke1.IsValid()); - EXPECT_TRUE(invoke2.IsValid()); - EXPECT_TRUE(invoke3.IsValid()); - EXPECT_EQ(invoke1.GetInvokeType(encoding.invoke_info.encoding), kSuper); - EXPECT_EQ(invoke1.GetMethodIndex(encoding.invoke_info.encoding, method_info), 1u); - EXPECT_EQ(invoke1.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 4u); - EXPECT_EQ(invoke2.GetInvokeType(encoding.invoke_info.encoding), kStatic); - EXPECT_EQ(invoke2.GetMethodIndex(encoding.invoke_info.encoding, method_info), 3u); - EXPECT_EQ(invoke2.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 8u); - EXPECT_EQ(invoke3.GetInvokeType(encoding.invoke_info.encoding), kDirect); - EXPECT_EQ(invoke3.GetMethodIndex(encoding.invoke_info.encoding, method_info), 65535u); - EXPECT_EQ(invoke3.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 16u); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); + + std::vector<uint8_t> out; + CodeInfo::Deduper deduper(&out); + size_t deduped1 = deduper.Dedupe(memory.data()); + size_t deduped2 = deduper.Dedupe(memory.data()); + + for (size_t deduped : { deduped1, deduped2 }) { + CodeInfo code_info(out.data() + deduped); + ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); + + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign))); + ASSERT_EQ(0u, stack_map.GetDexPc()); + ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + + ASSERT_EQ(Kind::kInStack, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kConstant, dex_register_map[1].GetKind()); + ASSERT_EQ(0, dex_register_map[0].GetStackOffsetInBytes()); + ASSERT_EQ(-2, dex_register_map[1].GetConstant()); + } + + ASSERT_GT(memory.size() * 2, out.size()); } } // namespace art diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc index a7c23bef7e..dc433feb51 100644 --- a/compiler/optimizing/superblock_cloner.cc +++ b/compiler/optimizing/superblock_cloner.cc @@ -17,6 +17,7 @@ #include "superblock_cloner.h" #include "common_dominator.h" +#include "induction_var_range.h" #include "graph_checker.h" #include <iostream> @@ -70,20 +71,18 @@ static bool ArePhiInputsTheSame(const HPhi* phi) { return true; } -// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole -// graph. -static HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) { - if (loop1 != nullptr || loop2 != nullptr) { - return nullptr; +// Returns whether two Edge sets are equal (ArenaHashSet doesn't have "Equal" method). +static bool EdgeHashSetsEqual(const HEdgeSet* set1, const HEdgeSet* set2) { + if (set1->size() != set2->size()) { + return false; } - if (loop1->IsIn(*loop2)) { - return loop2; - } else if (loop2->IsIn(*loop1)) { - return loop1; + for (auto e : *set1) { + if (set2->find(e) == set2->end()) { + return false; + } } - HBasicBlock* block = CommonDominator::ForPair(loop1->GetHeader(), loop2->GetHeader()); - return block->GetLoopInformation(); + return true; } // Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph. @@ -95,6 +94,21 @@ static void OrderLoopsHeadersPredecessors(HGraph* graph) { } } +// Performs DFS on the subgraph (specified by 'bb_set') starting from the specified block; while +// traversing the function removes basic blocks from the bb_set (instead of traditional DFS +// 'marking'). So what is left in the 'bb_set' after the traversal is not reachable from the start +// block. +static void TraverseSubgraphForConnectivity(HBasicBlock* block, HBasicBlockSet* bb_set) { + DCHECK(bb_set->IsBitSet(block->GetBlockId())); + bb_set->ClearBit(block->GetBlockId()); + + for (HBasicBlock* succ : block->GetSuccessors()) { + if (bb_set->IsBitSet(succ->GetBlockId())) { + TraverseSubgraphForConnectivity(succ, bb_set); + } + } +} + // // Helpers for CloneBasicBlock. // @@ -268,7 +282,6 @@ void SuperblockCloner::FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVect } void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) { - // TODO: DCHECK that after the transformation the graph is connected. HBasicBlock* block_entry = nullptr; if (outer_loop_ == nullptr) { @@ -397,7 +410,7 @@ void SuperblockCloner::ResolvePhi(HPhi* phi) { // Main algorithm methods. // -void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) { +void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const { DCHECK(exits->empty()); for (uint32_t block_id : orig_bb_set_.Indexes()) { HBasicBlock* block = GetBlockById(block_id); @@ -424,6 +437,11 @@ void SuperblockCloner::FindAndSetLocalAreaForAdjustments() { outer_loop_ = nullptr; break; } + if (outer_loop_ == nullptr) { + // We should not use the initial outer_loop_ value 'nullptr' when finding the most outer + // common loop. + outer_loop_ = loop_exit_loop_info; + } outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info); } @@ -455,8 +473,8 @@ void SuperblockCloner::RemapEdgesSuccessors() { continue; } - auto orig_redir = remap_orig_internal_->Find(HEdge(orig_block_id, orig_succ_id)); - auto copy_redir = remap_copy_internal_->Find(HEdge(orig_block_id, orig_succ_id)); + auto orig_redir = remap_orig_internal_->find(HEdge(orig_block_id, orig_succ_id)); + auto copy_redir = remap_copy_internal_->find(HEdge(orig_block_id, orig_succ_id)); // Due to construction all successors of copied block were set to original. if (copy_redir != remap_copy_internal_->end()) { @@ -504,9 +522,152 @@ void SuperblockCloner::ResolveDataFlow() { } // +// Helpers for live-outs processing and Subgraph-closed SSA. +// + +bool SuperblockCloner::CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs) const { + DCHECK(live_outs->empty()); + for (uint32_t idx : orig_bb_set_.Indexes()) { + HBasicBlock* block = GetBlockById(idx); + + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + DCHECK(instr->IsClonable()); + + if (IsUsedOutsideRegion(instr, orig_bb_set_)) { + live_outs->FindOrAdd(instr, instr); + } + } + + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (!instr->IsClonable()) { + return false; + } + + if (IsUsedOutsideRegion(instr, orig_bb_set_)) { + // TODO: Investigate why HNewInstance, HCheckCast has a requirement for the input. + if (instr->IsLoadClass()) { + return false; + } + live_outs->FindOrAdd(instr, instr); + } + } + } + return true; +} + +void SuperblockCloner::UpdateInductionRangeInfoOf( + HInstruction* user, HInstruction* old_instruction, HInstruction* replacement) { + if (induction_range_ != nullptr) { + induction_range_->Replace(user, old_instruction, replacement); + } +} + +void SuperblockCloner::ConstructSubgraphClosedSSA() { + if (live_outs_.empty()) { + return; + } + + ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner)); + SearchForSubgraphExits(&exits); + if (exits.empty()) { + DCHECK(live_outs_.empty()); + return; + } + + DCHECK_EQ(exits.size(), 1u); + HBasicBlock* exit_block = exits[0]; + // There should be no critical edges. + DCHECK_EQ(exit_block->GetPredecessors().size(), 1u); + DCHECK(exit_block->GetPhis().IsEmpty()); + + // For each live-out value insert a phi into the loop exit and replace all the value's uses + // external to the loop with this phi. The phi will have the original value as its only input; + // after copying is done FixSubgraphClosedSSAAfterCloning will add a corresponding copy of the + // original value as the second input thus merging data flow from the original and copy parts of + // the subgraph. Also update the record in the live_outs_ map from (value, value) to + // (value, new_phi). + for (auto live_out_it = live_outs_.begin(); live_out_it != live_outs_.end(); ++live_out_it) { + HInstruction* value = live_out_it->first; + HPhi* phi = new (arena_) HPhi(arena_, kNoRegNumber, 0, value->GetType()); + + if (value->GetType() == DataType::Type::kReference) { + phi->SetReferenceTypeInfo(value->GetReferenceTypeInfo()); + } + + exit_block->AddPhi(phi); + live_out_it->second = phi; + + const HUseList<HInstruction*>& uses = value->GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { + HInstruction* user = it->GetUser(); + size_t index = it->GetIndex(); + // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). + ++it; + if (!IsInOrigBBSet(user->GetBlock())) { + user->ReplaceInput(phi, index); + UpdateInductionRangeInfoOf(user, value, phi); + } + } + + const HUseList<HEnvironment*>& env_uses = value->GetEnvUses(); + for (auto it = env_uses.begin(), e = env_uses.end(); it != e; /* ++it below */) { + HEnvironment* env = it->GetUser(); + size_t index = it->GetIndex(); + ++it; + if (!IsInOrigBBSet(env->GetHolder()->GetBlock())) { + env->ReplaceInput(phi, index); + } + } + + phi->AddInput(value); + } +} + +void SuperblockCloner::FixSubgraphClosedSSAAfterCloning() { + for (auto it : live_outs_) { + DCHECK(it.first != it.second); + HInstruction* orig_value = it.first; + HPhi* phi = it.second->AsPhi(); + HInstruction* copy_value = GetInstrCopy(orig_value); + // Copy edges are inserted after the original so we can just add new input to the phi. + phi->AddInput(copy_value); + } +} + +// // Debug and logging methods. // +// Debug function to dump graph' BasicBlocks info. +void DumpBB(HGraph* graph) { + for (HBasicBlock* bb : graph->GetBlocks()) { + if (bb == nullptr) { + continue; + } + std::cout << bb->GetBlockId(); + std::cout << " <- "; + for (HBasicBlock* pred : bb->GetPredecessors()) { + std::cout << pred->GetBlockId() << " "; + } + std::cout << " -> "; + for (HBasicBlock* succ : bb->GetSuccessors()) { + std::cout << succ->GetBlockId() << " "; + } + + if (bb->GetDominator()) { + std::cout << " dom " << bb->GetDominator()->GetBlockId(); + } + + if (bb->GetLoopInformation()) { + std::cout << "\tloop: " << bb->GetLoopInformation()->GetHeader()->GetBlockId(); + } + + std::cout << std::endl; + } +} + void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) { DCHECK(!orig_instr->IsPhi()); HInstruction* copy_instr = GetInstrCopy(orig_instr); @@ -542,6 +703,81 @@ void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) } } +bool SuperblockCloner::CheckRemappingInfoIsValid() { + for (HEdge edge : *remap_orig_internal_) { + if (!IsEdgeValid(edge, graph_) || + !IsInOrigBBSet(edge.GetFrom()) || + !IsInOrigBBSet(edge.GetTo())) { + return false; + } + } + + for (auto edge : *remap_copy_internal_) { + if (!IsEdgeValid(edge, graph_) || + !IsInOrigBBSet(edge.GetFrom()) || + !IsInOrigBBSet(edge.GetTo())) { + return false; + } + } + + for (auto edge : *remap_incoming_) { + if (!IsEdgeValid(edge, graph_) || + IsInOrigBBSet(edge.GetFrom()) || + !IsInOrigBBSet(edge.GetTo())) { + return false; + } + } + + return true; +} + +void SuperblockCloner::VerifyGraph() { + for (auto it : *hir_map_) { + HInstruction* orig_instr = it.first; + HInstruction* copy_instr = it.second; + if (!orig_instr->IsPhi() && !orig_instr->IsSuspendCheck()) { + DCHECK(it.first->GetBlock() != nullptr); + } + if (!copy_instr->IsPhi() && !copy_instr->IsSuspendCheck()) { + DCHECK(it.second->GetBlock() != nullptr); + } + } + + GraphChecker checker(graph_); + checker.Run(); + if (!checker.IsValid()) { + for (const std::string& error : checker.GetErrors()) { + std::cout << error << std::endl; + } + LOG(FATAL) << "GraphChecker failed: superblock cloner\n"; + } +} + +void DumpBBSet(const ArenaBitVector* set) { + for (uint32_t idx : set->Indexes()) { + std::cout << idx << "\n"; + } +} + +void SuperblockCloner::DumpInputSets() { + std::cout << "orig_bb_set:\n"; + for (uint32_t idx : orig_bb_set_.Indexes()) { + std::cout << idx << "\n"; + } + std::cout << "remap_orig_internal:\n"; + for (HEdge e : *remap_orig_internal_) { + std::cout << e << "\n"; + } + std::cout << "remap_copy_internal:\n"; + for (auto e : *remap_copy_internal_) { + std::cout << e << "\n"; + } + std::cout << "remap_incoming:\n"; + for (auto e : *remap_incoming_) { + std::cout << e << "\n"; + } +} + // // Public methods. // @@ -549,7 +785,8 @@ void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) SuperblockCloner::SuperblockCloner(HGraph* graph, const HBasicBlockSet* orig_bb_set, HBasicBlockMap* bb_map, - HInstructionMap* hir_map) + HInstructionMap* hir_map, + InductionVarRange* induction_range) : graph_(graph), arena_(graph->GetAllocator()), orig_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner), @@ -558,8 +795,11 @@ SuperblockCloner::SuperblockCloner(HGraph* graph, remap_incoming_(nullptr), bb_map_(bb_map), hir_map_(hir_map), + induction_range_(induction_range), outer_loop_(nullptr), - outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner) { + outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner), + live_outs_(std::less<HInstruction*>(), + graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)) { orig_bb_set_.Copy(orig_bb_set); } @@ -569,6 +809,7 @@ void SuperblockCloner::SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_inte remap_orig_internal_ = remap_orig_internal; remap_copy_internal_ = remap_copy_internal; remap_incoming_ = remap_incoming; + DCHECK(CheckRemappingInfoIsValid()); } bool SuperblockCloner::IsSubgraphClonable() const { @@ -577,29 +818,79 @@ bool SuperblockCloner::IsSubgraphClonable() const { return false; } - // Check that there are no instructions defined in the subgraph and used outside. - // TODO: Improve this by accepting graph with such uses but only one exit. + HInstructionMap live_outs( + std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + if (!CollectLiveOutsAndCheckClonable(&live_outs)) { + return false; + } + + ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner)); + SearchForSubgraphExits(&exits); + + // The only loops with live-outs which are currently supported are loops with a single exit. + if (!live_outs.empty() && exits.size() != 1) { + return false; + } + + return true; +} + +bool SuperblockCloner::IsFastCase() const { + // Check that loop unrolling/loop peeling is being conducted. + // Check that all the basic blocks belong to the same loop. + bool flag = false; + HLoopInformation* common_loop_info = nullptr; for (uint32_t idx : orig_bb_set_.Indexes()) { HBasicBlock* block = GetBlockById(idx); - - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* instr = it.Current(); - if (!instr->IsClonable() || - IsUsedOutsideRegion(instr, orig_bb_set_)) { + HLoopInformation* block_loop_info = block->GetLoopInformation(); + if (!flag) { + common_loop_info = block_loop_info; + } else { + if (block_loop_info != common_loop_info) { return false; } } + } - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - HInstruction* instr = it.Current(); - if (!instr->IsClonable() || - IsUsedOutsideRegion(instr, orig_bb_set_)) { - return false; - } - } + // Check that orig_bb_set_ corresponds to loop peeling/unrolling. + if (common_loop_info == nullptr || !orig_bb_set_.SameBitsSet(&common_loop_info->GetBlocks())) { + return false; } - return true; + bool peeling_or_unrolling = false; + HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + + // Check whether remapping info corresponds to loop unrolling. + CollectRemappingInfoForPeelUnroll(/* to_unroll*/ true, + common_loop_info, + &remap_orig_internal, + &remap_copy_internal, + &remap_incoming); + + peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) && + EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) && + EdgeHashSetsEqual(&remap_incoming, remap_incoming_); + + remap_orig_internal.clear(); + remap_copy_internal.clear(); + remap_incoming.clear(); + + // Check whether remapping info corresponds to loop peeling. + CollectRemappingInfoForPeelUnroll(/* to_unroll*/ false, + common_loop_info, + &remap_orig_internal, + &remap_copy_internal, + &remap_incoming); + + peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) && + EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) && + EdgeHashSetsEqual(&remap_incoming, remap_incoming_); + + return peeling_or_unrolling; } void SuperblockCloner::Run() { @@ -609,19 +900,40 @@ void SuperblockCloner::Run() { remap_copy_internal_ != nullptr && remap_incoming_ != nullptr); DCHECK(IsSubgraphClonable()); + DCHECK(IsFastCase()); + if (kSuperblockClonerLogging) { + DumpInputSets(); + } + + CollectLiveOutsAndCheckClonable(&live_outs_); // Find an area in the graph for which control flow information should be adjusted. FindAndSetLocalAreaForAdjustments(); + ConstructSubgraphClosedSSA(); // Clone the basic blocks from the orig_bb_set_; data flow is invalid after the call and is to be // adjusted. CloneBasicBlocks(); // Connect the blocks together/remap successors and fix phis which are directly affected my the // remapping. RemapEdgesSuccessors(); + + // Check that the subgraph is connected. + if (kIsDebugBuild) { + HBasicBlockSet work_set(arena_, orig_bb_set_.GetSizeOf(), true, kArenaAllocSuperblockCloner); + + // Add original and copy blocks of the subgraph to the work set. + for (auto iter : *bb_map_) { + work_set.SetBit(iter.first->GetBlockId()); // Original block. + work_set.SetBit(iter.second->GetBlockId()); // Copy block. + } + CHECK(IsSubgraphConnected(&work_set, graph_)); + } + // Recalculate dominance and backedge information which is required by the next stage. AdjustControlFlowInfo(); // Fix data flow of the graph. ResolveDataFlow(); + FixSubgraphClosedSSAAfterCloning(); } void SuperblockCloner::CleanUp() { @@ -650,6 +962,10 @@ void SuperblockCloner::CleanUp() { } } } + + if (kIsDebugBuild) { + VerifyGraph(); + } } HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) { @@ -701,4 +1017,135 @@ void SuperblockCloner::CloneBasicBlocks() { } } +// +// Stand-alone methods. +// + +void CollectRemappingInfoForPeelUnroll(bool to_unroll, + HLoopInformation* loop_info, + HEdgeSet* remap_orig_internal, + HEdgeSet* remap_copy_internal, + HEdgeSet* remap_incoming) { + DCHECK(loop_info != nullptr); + HBasicBlock* loop_header = loop_info->GetHeader(); + // Set up remap_orig_internal edges set - set is empty. + // Set up remap_copy_internal edges set. + for (HBasicBlock* back_edge_block : loop_info->GetBackEdges()) { + HEdge e = HEdge(back_edge_block, loop_header); + if (to_unroll) { + remap_orig_internal->insert(e); + remap_copy_internal->insert(e); + } else { + remap_copy_internal->insert(e); + } + } + + // Set up remap_incoming edges set. + if (!to_unroll) { + remap_incoming->insert(HEdge(loop_info->GetPreHeader(), loop_header)); + } +} + +bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph) { + ArenaVector<HBasicBlock*> entry_blocks( + graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + // Find subgraph entry blocks. + for (uint32_t orig_block_id : work_set->Indexes()) { + HBasicBlock* block = graph->GetBlocks()[orig_block_id]; + for (HBasicBlock* pred : block->GetPredecessors()) { + if (!work_set->IsBitSet(pred->GetBlockId())) { + entry_blocks.push_back(block); + break; + } + } + } + + for (HBasicBlock* entry_block : entry_blocks) { + if (work_set->IsBitSet(entry_block->GetBlockId())) { + TraverseSubgraphForConnectivity(entry_block, work_set); + } + } + + // Return whether there are unvisited - unreachable - blocks. + return work_set->NumSetBits() == 0; +} + +HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) { + if (loop1 == nullptr || loop2 == nullptr) { + return nullptr; + } + + if (loop1->IsIn(*loop2)) { + return loop2; + } + + HLoopInformation* current = loop1; + while (current != nullptr && !loop2->IsIn(*current)) { + current = current->GetPreHeader()->GetLoopInformation(); + } + + return current; +} + +bool PeelUnrollHelper::IsLoopClonable(HLoopInformation* loop_info) { + PeelUnrollHelper helper( + loop_info, /* bb_map= */ nullptr, /* hir_map= */ nullptr, /* induction_range= */ nullptr); + return helper.IsLoopClonable(); +} + +HBasicBlock* PeelUnrollHelper::DoPeelUnrollImpl(bool to_unroll) { + // For now do peeling only for natural loops. + DCHECK(!loop_info_->IsIrreducible()); + + HBasicBlock* loop_header = loop_info_->GetHeader(); + // Check that loop info is up-to-date. + DCHECK(loop_info_ == loop_header->GetLoopInformation()); + HGraph* graph = loop_header->GetGraph(); + + if (kSuperblockClonerLogging) { + std::cout << "Method: " << graph->PrettyMethod() << std::endl; + std::cout << "Scalar loop " << (to_unroll ? "unrolling" : "peeling") << + " was applied to the loop <" << loop_header->GetBlockId() << ">." << std::endl; + } + + ArenaAllocator allocator(graph->GetAllocator()->GetArenaPool()); + + HEdgeSet remap_orig_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_copy_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_incoming(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + CollectRemappingInfoForPeelUnroll(to_unroll, + loop_info_, + &remap_orig_internal, + &remap_copy_internal, + &remap_incoming); + + cloner_.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming); + cloner_.Run(); + cloner_.CleanUp(); + + // Check that loop info is preserved. + DCHECK(loop_info_ == loop_header->GetLoopInformation()); + + return loop_header; +} + +PeelUnrollSimpleHelper::PeelUnrollSimpleHelper(HLoopInformation* info, + InductionVarRange* induction_range) + : bb_map_(std::less<HBasicBlock*>(), + info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)), + hir_map_(std::less<HInstruction*>(), + info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)), + helper_(info, &bb_map_, &hir_map_, induction_range) {} + } // namespace art + +namespace std { + +ostream& operator<<(ostream& os, const art::HEdge& e) { + e.Dump(os); + return os; +} + +} // namespace std diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h index 23de692673..ece0914ddb 100644 --- a/compiler/optimizing/superblock_cloner.h +++ b/compiler/optimizing/superblock_cloner.h @@ -24,8 +24,9 @@ namespace art { +class InductionVarRange; + static const bool kSuperblockClonerLogging = false; -static const bool kSuperblockClonerVerify = false; // Represents an edge between two HBasicBlocks. // @@ -141,7 +142,8 @@ class SuperblockCloner : public ValueObject { SuperblockCloner(HGraph* graph, const HBasicBlockSet* orig_bb_set, HBasicBlockMap* bb_map, - HInstructionMap* hir_map); + HInstructionMap* hir_map, + InductionVarRange* induction_range); // Sets edge successor remapping info specified by corresponding edge sets. void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal, @@ -152,6 +154,15 @@ class SuperblockCloner : public ValueObject { // TODO: Start from small range of graph patterns then extend it. bool IsSubgraphClonable() const; + // Returns whether selected subgraph satisfies the criteria for fast data flow resolution + // when iterative DF algorithm is not required and dominators/instructions inputs can be + // trivially adjusted. + // + // TODO: formally describe the criteria. + // + // Loop peeling and unrolling satisfy the criteria. + bool IsFastCase() const; + // Runs the copy algorithm according to the description. void Run(); @@ -202,11 +213,17 @@ class SuperblockCloner : public ValueObject { return IsInOrigBBSet(block->GetBlockId()); } + // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops, + // dominators) needs to be adjusted. + HLoopInformation* GetRegionToBeAdjusted() const { + return outer_loop_; + } + private: // Fills the 'exits' vector with the subgraph exits. - void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits); + void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const; - // Finds and records information about the area in the graph for which control-flow (back edges, + // Finds and records information about the area in the graph for which control flow (back edges, // loops, dominators) needs to be adjusted. void FindAndSetLocalAreaForAdjustments(); @@ -217,7 +234,7 @@ class SuperblockCloner : public ValueObject { // phis' nor instructions' inputs values are resolved. void RemapEdgesSuccessors(); - // Adjusts control-flow (back edges, loops, dominators) for the local area defined by + // Adjusts control flow (back edges, loops, dominators) for the local area defined by // FindAndSetLocalAreaForAdjustments. void AdjustControlFlowInfo(); @@ -226,6 +243,33 @@ class SuperblockCloner : public ValueObject { void ResolveDataFlow(); // + // Helpers for live-outs processing and Subgraph-closed SSA. + // + // - live-outs - values which are defined inside the subgraph and have uses outside. + // - Subgraph-closed SSA - SSA form for which all the values defined inside the subgraph + // have no outside uses except for the phi-nodes in the subgraph exits. + // + // Note: now if the subgraph has live-outs it is only clonable if it has a single exit; this + // makes the subgraph-closed SSA form construction much easier. + // + // TODO: Support subgraphs with live-outs and multiple exits. + // + + // For each live-out value 'val' in the region puts a record <val, val> into the map. + // Returns whether all of the instructions in the subgraph are clonable. + bool CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs_) const; + + // Constructs Subgraph-closed SSA; precondition - a subgraph has a single exit. + // + // For each live-out 'val' in 'live_outs_' map inserts a HPhi 'phi' into the exit node, updates + // the record in the map to <val, phi> and replaces all outside uses with this phi. + void ConstructSubgraphClosedSSA(); + + // Fixes the data flow for the live-out 'val' by adding a 'copy_val' input to the corresponding + // (<val, phi>) phi after the cloning is done. + void FixSubgraphClosedSSAAfterCloning(); + + // // Helpers for CloneBasicBlock. // @@ -268,10 +312,17 @@ class SuperblockCloner : public ValueObject { // Resolves the inputs of the phi. void ResolvePhi(HPhi* phi); + // Update induction range after when fixing SSA. + void UpdateInductionRangeInfoOf( + HInstruction* user, HInstruction* old_instruction, HInstruction* replacement); + // // Debug and logging methods. // void CheckInstructionInputsRemapping(HInstruction* orig_instr); + bool CheckRemappingInfoIsValid(); + void VerifyGraph(); + void DumpInputSets(); HBasicBlock* GetBlockById(uint32_t block_id) const { DCHECK(block_id < graph_->GetBlocks().size()); @@ -295,15 +346,103 @@ class SuperblockCloner : public ValueObject { HBasicBlockMap* bb_map_; // Correspondence map for instructions: (original HInstruction, copy HInstruction). HInstructionMap* hir_map_; - // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted. + // As a result of cloning, the induction range analysis information can be invalidated + // and must be updated. If not null, the cloner updates it for changed instructions. + InductionVarRange* induction_range_; + // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted. HLoopInformation* outer_loop_; HBasicBlockSet outer_loop_bb_set_; + HInstructionMap live_outs_; + ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo); + ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected); DISALLOW_COPY_AND_ASSIGN(SuperblockCloner); }; +// Helper class to perform loop peeling/unrolling. +// +// This helper should be used when correspondence map between original and copied +// basic blocks/instructions are demanded. +class PeelUnrollHelper : public ValueObject { + public: + PeelUnrollHelper(HLoopInformation* info, + SuperblockCloner::HBasicBlockMap* bb_map, + SuperblockCloner::HInstructionMap* hir_map, + InductionVarRange* induction_range) : + loop_info_(info), + cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map, induction_range) { + // For now do peeling/unrolling only for natural loops. + DCHECK(!info->IsIrreducible()); + } + + // Returns whether the loop can be peeled/unrolled (static function). + static bool IsLoopClonable(HLoopInformation* loop_info); + + // Returns whether the loop can be peeled/unrolled. + bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); } + + HBasicBlock* DoPeeling() { return DoPeelUnrollImpl(/* to_unroll= */ false); } + HBasicBlock* DoUnrolling() { return DoPeelUnrollImpl(/* to_unroll= */ true); } + HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); } + + protected: + // Applies loop peeling/unrolling for the loop specified by 'loop_info'. + // + // Depending on 'do_unroll' either unrolls loop by 2 or peels one iteration from it. + HBasicBlock* DoPeelUnrollImpl(bool to_unroll); + + private: + HLoopInformation* loop_info_; + SuperblockCloner cloner_; + + DISALLOW_COPY_AND_ASSIGN(PeelUnrollHelper); +}; + +// Helper class to perform loop peeling/unrolling. +// +// This helper should be used when there is no need to get correspondence information between +// original and copied basic blocks/instructions. +class PeelUnrollSimpleHelper : public ValueObject { + public: + PeelUnrollSimpleHelper(HLoopInformation* info, InductionVarRange* induction_range); + bool IsLoopClonable() const { return helper_.IsLoopClonable(); } + HBasicBlock* DoPeeling() { return helper_.DoPeeling(); } + HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); } + HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); } + + const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; } + const SuperblockCloner::HInstructionMap* GetInstructionMap() const { return &hir_map_; } + + private: + SuperblockCloner::HBasicBlockMap bb_map_; + SuperblockCloner::HInstructionMap hir_map_; + PeelUnrollHelper helper_; + + DISALLOW_COPY_AND_ASSIGN(PeelUnrollSimpleHelper); +}; + +// Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info. +void CollectRemappingInfoForPeelUnroll(bool to_unroll, + HLoopInformation* loop_info, + SuperblockCloner::HEdgeSet* remap_orig_internal, + SuperblockCloner::HEdgeSet* remap_copy_internal, + SuperblockCloner::HEdgeSet* remap_incoming); + +// Returns whether blocks from 'work_set' are reachable from the rest of the graph. +// +// Returns whether such a set 'outer_entries' of basic blocks exists that: +// - each block from 'outer_entries' is not from 'work_set'. +// - each block from 'work_set' is reachable from at least one block from 'outer_entries'. +// +// After the function returns work_set contains only blocks from the original 'work_set' +// which are unreachable from the rest of the graph. +bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph); + +// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole +// graph. +HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2); } // namespace art namespace std { @@ -312,11 +451,12 @@ template <> struct hash<art::HEdge> { size_t operator()(art::HEdge const& x) const noexcept { // Use Cantor pairing function as the hash function. - uint32_t a = x.GetFrom(); - uint32_t b = x.GetTo(); + size_t a = x.GetFrom(); + size_t b = x.GetTo(); return (a + b) * (a + b + 1) / 2 + b; } }; +ostream& operator<<(ostream& os, const art::HEdge& e); } // namespace std diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc index f1b7bffdf5..aa19de683f 100644 --- a/compiler/optimizing/superblock_cloner_test.cc +++ b/compiler/optimizing/superblock_cloner_test.cc @@ -25,52 +25,35 @@ namespace art { using HBasicBlockMap = SuperblockCloner::HBasicBlockMap; using HInstructionMap = SuperblockCloner::HInstructionMap; +using HBasicBlockSet = SuperblockCloner::HBasicBlockSet; +using HEdgeSet = SuperblockCloner::HEdgeSet; // This class provides methods and helpers for testing various cloning and copying routines: // individual instruction cloning and cloning of the more coarse-grain structures. -class SuperblockClonerTest : public OptimizingUnitTest { +class SuperblockClonerTest : public ImprovedOptimizingUnitTest { public: - SuperblockClonerTest() - : graph_(CreateGraph()), entry_block_(nullptr), exit_block_(nullptr), parameter_(nullptr) {} - - void CreateBasicLoopControlFlow(/* out */ HBasicBlock** header_p, + void CreateBasicLoopControlFlow(HBasicBlock* position, + HBasicBlock* successor, + /* out */ HBasicBlock** header_p, /* out */ HBasicBlock** body_p) { - entry_block_ = new (GetAllocator()) HBasicBlock(graph_); - graph_->AddBlock(entry_block_); - graph_->SetEntryBlock(entry_block_); - HBasicBlock* loop_preheader = new (GetAllocator()) HBasicBlock(graph_); HBasicBlock* loop_header = new (GetAllocator()) HBasicBlock(graph_); HBasicBlock* loop_body = new (GetAllocator()) HBasicBlock(graph_); - HBasicBlock* loop_exit = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(loop_preheader); graph_->AddBlock(loop_header); graph_->AddBlock(loop_body); - graph_->AddBlock(loop_exit); - exit_block_ = new (GetAllocator()) HBasicBlock(graph_); - graph_->AddBlock(exit_block_); - graph_->SetExitBlock(exit_block_); + position->ReplaceSuccessor(successor, loop_preheader); - entry_block_->AddSuccessor(loop_preheader); loop_preheader->AddSuccessor(loop_header); // Loop exit first to have a proper exit condition/target for HIf. - loop_header->AddSuccessor(loop_exit); + loop_header->AddSuccessor(successor); loop_header->AddSuccessor(loop_body); loop_body->AddSuccessor(loop_header); - loop_exit->AddSuccessor(exit_block_); *header_p = loop_header; *body_p = loop_body; - - parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), - dex::TypeIndex(0), - 0, - DataType::Type::kInt32); - entry_block_->AddInstruction(parameter_); - loop_exit->AddInstruction(new (GetAllocator()) HReturnVoid()); - exit_block_->AddInstruction(new (GetAllocator()) HExit()); } void CreateBasicLoopDataFlow(HBasicBlock* loop_header, HBasicBlock* loop_body) { @@ -84,11 +67,12 @@ class SuperblockClonerTest : public OptimizingUnitTest { // Header block. HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32); HInstruction* suspend_check = new (GetAllocator()) HSuspendCheck(); + HInstruction* loop_check = new (GetAllocator()) HGreaterThanOrEqual(phi, const_128); loop_header->AddPhi(phi); loop_header->AddInstruction(suspend_check); - loop_header->AddInstruction(new (GetAllocator()) HGreaterThanOrEqual(phi, const_128)); - loop_header->AddInstruction(new (GetAllocator()) HIf(parameter_)); + loop_header->AddInstruction(loop_check); + loop_header->AddInstruction(new (GetAllocator()) HIf(loop_check)); // Loop body block. HInstruction* null_check = new (GetAllocator()) HNullCheck(parameter_, dex_pc); @@ -97,8 +81,8 @@ class SuperblockClonerTest : public OptimizingUnitTest { HInstruction* array_get = new (GetAllocator()) HArrayGet(null_check, bounds_check, DataType::Type::kInt32, dex_pc); HInstruction* add = new (GetAllocator()) HAdd(DataType::Type::kInt32, array_get, const_1); - HInstruction* array_set = - new (GetAllocator()) HArraySet(null_check, bounds_check, add, DataType::Type::kInt32, dex_pc); + HInstruction* array_set = new (GetAllocator()) HArraySet( + null_check, bounds_check, add, DataType::Type::kInt32, dex_pc); HInstruction* induction_inc = new (GetAllocator()) HAdd(DataType::Type::kInt32, phi, const_1); loop_body->AddInstruction(null_check); @@ -123,49 +107,17 @@ class SuperblockClonerTest : public OptimizingUnitTest { null_check->CopyEnvironmentFrom(env); bounds_check->CopyEnvironmentFrom(env); } - - HEnvironment* ManuallyBuildEnvFor(HInstruction* instruction, - ArenaVector<HInstruction*>* current_locals) { - HEnvironment* environment = new (GetAllocator()) HEnvironment( - (GetAllocator()), - current_locals->size(), - graph_->GetArtMethod(), - instruction->GetDexPc(), - instruction); - - environment->CopyFrom(ArrayRef<HInstruction* const>(*current_locals)); - instruction->SetRawEnvironment(environment); - return environment; - } - - bool CheckGraph() { - GraphChecker checker(graph_); - checker.Run(); - if (!checker.IsValid()) { - for (const std::string& error : checker.GetErrors()) { - std::cout << error << std::endl; - } - return false; - } - return true; - } - - HGraph* graph_; - - HBasicBlock* entry_block_; - HBasicBlock* exit_block_; - - HInstruction* parameter_; }; TEST_F(SuperblockClonerTest, IndividualInstrCloner) { HBasicBlock* header = nullptr; HBasicBlock* loop_body = nullptr; - CreateBasicLoopControlFlow(&header, &loop_body); + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); CreateBasicLoopDataFlow(header, loop_body); graph_->BuildDominatorTree(); - ASSERT_TRUE(CheckGraph()); + EXPECT_TRUE(CheckGraph()); HSuspendCheck* old_suspend_check = header->GetLoopInformation()->GetSuspendCheck(); CloneAndReplaceInstructionVisitor visitor(graph_); @@ -193,7 +145,8 @@ TEST_F(SuperblockClonerTest, CloneBasicBlocks) { HBasicBlock* loop_body = nullptr; ArenaAllocator* arena = graph_->GetAllocator(); - CreateBasicLoopControlFlow(&header, &loop_body); + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); CreateBasicLoopDataFlow(header, loop_body); graph_->BuildDominatorTree(); ASSERT_TRUE(CheckGraph()); @@ -209,7 +162,8 @@ TEST_F(SuperblockClonerTest, CloneBasicBlocks) { SuperblockCloner cloner(graph_, &orig_bb_set, &bb_map, - &hir_map); + &hir_map, + /* induction_range= */ nullptr); EXPECT_TRUE(cloner.IsSubgraphClonable()); cloner.CloneBasicBlocks(); @@ -272,7 +226,8 @@ TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) { HBasicBlock* loop_body = nullptr; ArenaAllocator* arena = graph_->GetAllocator(); - CreateBasicLoopControlFlow(&header, &loop_body); + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); CreateBasicLoopDataFlow(header, loop_body); graph_->BuildDominatorTree(); ASSERT_TRUE(CheckGraph()); @@ -285,8 +240,9 @@ TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) { SuperblockCloner cloner(graph_, &orig_bb_set, - nullptr, - nullptr); + /* bb_map= */ nullptr, + /* hir_map= */ nullptr, + /* induction_range= */ nullptr); EXPECT_TRUE(cloner.IsSubgraphClonable()); cloner.FindAndSetLocalAreaForAdjustments(); @@ -303,4 +259,488 @@ TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) { EXPECT_TRUE(loop_info->IsBackEdge(*loop_body)); } +// Tests IsSubgraphConnected function for negative case. +TEST_F(SuperblockClonerTest, IsGraphConnected) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + ArenaAllocator* arena = graph_->GetAllocator(); + + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* unreachable_block = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(unreachable_block); + + HBasicBlockSet bb_set( + arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + bb_set.SetBit(header->GetBlockId()); + bb_set.SetBit(loop_body->GetBlockId()); + bb_set.SetBit(unreachable_block->GetBlockId()); + + EXPECT_FALSE(IsSubgraphConnected(&bb_set, graph_)); + EXPECT_EQ(bb_set.NumSetBits(), 1u); + EXPECT_TRUE(bb_set.IsBitSet(unreachable_block->GetBlockId())); +} + +// Tests SuperblockCloner for loop peeling case. +// +// Control Flow of the example (ignoring critical edges splitting). +// +// Before After +// +// |B| |B| +// | | +// v v +// |1| |1| +// | | +// v v +// |2|<-\ (6) |2A| +// / \ / / \ +// v v/ / v +// |4| |3| / |3A| (7) +// | / / +// v | v +// |E| \ |2|<-\ +// \ / \ / +// v v / +// |4| |3| +// | +// v +// |E| +TEST_F(SuperblockClonerTest, LoopPeeling) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HBasicBlockMap bb_map( + std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HInstructionMap hir_map( + std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + HLoopInformation* loop_info = header->GetLoopInformation(); + PeelUnrollHelper helper(loop_info, &bb_map, &hir_map, /* induction_range= */ nullptr); + EXPECT_TRUE(helper.IsLoopClonable()); + HBasicBlock* new_header = helper.DoPeeling(); + HLoopInformation* new_loop_info = new_header->GetLoopInformation(); + + EXPECT_TRUE(CheckGraph()); + + // Check loop body successors. + EXPECT_EQ(loop_body->GetSingleSuccessor(), header); + EXPECT_EQ(bb_map.Get(loop_body)->GetSingleSuccessor(), header); + + // Check loop structure. + EXPECT_EQ(header, new_header); + EXPECT_EQ(new_loop_info->GetHeader(), header); + EXPECT_EQ(new_loop_info->GetBackEdges().size(), 1u); + EXPECT_EQ(new_loop_info->GetBackEdges()[0], loop_body); +} + +// Tests SuperblockCloner for loop unrolling case. +// +// Control Flow of the example (ignoring critical edges splitting). +// +// Before After +// +// |B| |B| +// | | +// v v +// |1| |1| +// | | +// v v +// |2|<-\ (6) |2A|<-\ +// / \ / / \ \ +// v v/ / v \ +// |4| |3| /(7)|3A| | +// | / / / +// v | v / +// |E| \ |2| / +// \ / \ / +// v v/ +// |4| |3| +// | +// v +// |E| +TEST_F(SuperblockClonerTest, LoopUnrolling) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HBasicBlockMap bb_map( + std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HInstructionMap hir_map( + std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + HLoopInformation* loop_info = header->GetLoopInformation(); + PeelUnrollHelper helper(loop_info, &bb_map, &hir_map, /* induction_range= */ nullptr); + EXPECT_TRUE(helper.IsLoopClonable()); + HBasicBlock* new_header = helper.DoUnrolling(); + + EXPECT_TRUE(CheckGraph()); + + // Check loop body successors. + EXPECT_EQ(loop_body->GetSingleSuccessor(), bb_map.Get(header)); + EXPECT_EQ(bb_map.Get(loop_body)->GetSingleSuccessor(), header); + + // Check loop structure. + EXPECT_EQ(header, new_header); + EXPECT_EQ(loop_info, new_header->GetLoopInformation()); + EXPECT_EQ(loop_info->GetHeader(), new_header); + EXPECT_EQ(loop_info->GetBackEdges().size(), 1u); + EXPECT_EQ(loop_info->GetBackEdges()[0], bb_map.Get(loop_body)); +} + +// Checks that loop unrolling works fine for a loop with multiple back edges. Tests that after +// the transformation the loop has a single preheader. +TEST_F(SuperblockClonerTest, LoopPeelingMultipleBackEdges) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + + // Transform a basic loop to have multiple back edges. + HBasicBlock* latch = header->GetSuccessors()[1]; + HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* temp1 = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(if_block); + graph_->AddBlock(temp1); + header->ReplaceSuccessor(latch, if_block); + if_block->AddSuccessor(latch); + if_block->AddSuccessor(temp1); + temp1->AddSuccessor(header); + + if_block->AddInstruction(new (GetAllocator()) HIf(parameter_)); + + HInstructionIterator it(header->GetPhis()); + DCHECK(!it.Done()); + HPhi* loop_phi = it.Current()->AsPhi(); + HInstruction* temp_add = new (GetAllocator()) HAdd(DataType::Type::kInt32, + loop_phi, + graph_->GetIntConstant(2)); + temp1->AddInstruction(temp_add); + temp1->AddInstruction(new (GetAllocator()) HGoto()); + loop_phi->AddInput(temp_add); + + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HLoopInformation* loop_info = header->GetLoopInformation(); + PeelUnrollSimpleHelper helper(loop_info, /* induction_range= */ nullptr); + HBasicBlock* new_header = helper.DoPeeling(); + EXPECT_EQ(header, new_header); + + EXPECT_TRUE(CheckGraph()); + EXPECT_EQ(header->GetPredecessors().size(), 3u); +} + +static void CheckLoopStructureForLoopPeelingNested(HBasicBlock* loop1_header, + HBasicBlock* loop2_header, + HBasicBlock* loop3_header) { + EXPECT_EQ(loop1_header->GetLoopInformation()->GetHeader(), loop1_header); + EXPECT_EQ(loop2_header->GetLoopInformation()->GetHeader(), loop2_header); + EXPECT_EQ(loop3_header->GetLoopInformation()->GetHeader(), loop3_header); + EXPECT_EQ(loop1_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation(), nullptr); + EXPECT_EQ(loop2_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation(), nullptr); + EXPECT_EQ(loop3_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation()->GetHeader(), + loop2_header); +} + +TEST_F(SuperblockClonerTest, LoopPeelingNested) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + + // Create the following nested structure of loops + // Headers: 1 2 3 + // [ ], [ [ ] ] + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop1_header = header; + + CreateBasicLoopControlFlow(header, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop2_header = header; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop3_header = header; + + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HLoopInformation* loop2_info_before = loop2_header->GetLoopInformation(); + HLoopInformation* loop3_info_before = loop3_header->GetLoopInformation(); + + // Check nested loops structure. + CheckLoopStructureForLoopPeelingNested(loop1_header, loop2_header, loop3_header); + PeelUnrollSimpleHelper helper(loop1_header->GetLoopInformation(), /* induction_range= */ nullptr); + helper.DoPeeling(); + // Check that nested loops structure has not changed after the transformation. + CheckLoopStructureForLoopPeelingNested(loop1_header, loop2_header, loop3_header); + + // Test that the loop info is preserved. + EXPECT_EQ(loop2_info_before, loop2_header->GetLoopInformation()); + EXPECT_EQ(loop3_info_before, loop3_header->GetLoopInformation()); + + EXPECT_EQ(loop3_info_before->GetPreHeader()->GetLoopInformation(), loop2_info_before); + EXPECT_EQ(loop2_info_before->GetPreHeader()->GetLoopInformation(), nullptr); + + EXPECT_EQ(helper.GetRegionToBeAdjusted(), nullptr); + + EXPECT_TRUE(CheckGraph()); +} + +// Checks that the loop population is correctly propagated after an inner loop is peeled. +TEST_F(SuperblockClonerTest, OuterLoopPopulationAfterInnerPeeled) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + + // Create the following nested structure of loops + // Headers: 1 2 3 4 + // [ [ [ ] ] ], [ ] + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop1_header = header; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop2_header = header; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop3_header = header; + + CreateBasicLoopControlFlow(loop1_header, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop4_header = header; + + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + PeelUnrollSimpleHelper helper(loop3_header->GetLoopInformation(), /* induction_range= */ nullptr); + helper.DoPeeling(); + HLoopInformation* loop1 = loop1_header->GetLoopInformation(); + HLoopInformation* loop2 = loop2_header->GetLoopInformation(); + HLoopInformation* loop3 = loop3_header->GetLoopInformation(); + HLoopInformation* loop4 = loop4_header->GetLoopInformation(); + + EXPECT_TRUE(loop1->Contains(*loop2_header)); + EXPECT_TRUE(loop1->Contains(*loop3_header)); + EXPECT_TRUE(loop1->Contains(*loop3_header->GetLoopInformation()->GetPreHeader())); + + // Check that loop4 info has not been touched after local run of AnalyzeLoops. + EXPECT_EQ(loop4, loop4_header->GetLoopInformation()); + + EXPECT_TRUE(loop1->IsIn(*loop1)); + EXPECT_TRUE(loop2->IsIn(*loop1)); + EXPECT_TRUE(loop3->IsIn(*loop1)); + EXPECT_TRUE(loop3->IsIn(*loop2)); + EXPECT_TRUE(!loop4->IsIn(*loop1)); + + EXPECT_EQ(loop4->GetPreHeader()->GetLoopInformation(), nullptr); + + EXPECT_EQ(helper.GetRegionToBeAdjusted(), loop2); + + EXPECT_TRUE(CheckGraph()); +} + +// Checks the case when inner loop have an exit not to its immediate outer_loop but some other loop +// in the hierarchy. Loop population information must be valid after loop peeling. +TEST_F(SuperblockClonerTest, NestedCaseExitToOutermost) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + + // Create the following nested structure of loops then peel loop3. + // Headers: 1 2 3 + // [ [ [ ] ] ] + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop1_header = header; + HBasicBlock* loop_body1 = loop_body; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop3_header = header; + HBasicBlock* loop_body3 = loop_body; + + // Change the loop3 - insert an exit which leads to loop1. + HBasicBlock* loop3_extra_if_block = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(loop3_extra_if_block); + loop3_extra_if_block->AddInstruction(new (GetAllocator()) HIf(parameter_)); + + loop3_header->ReplaceSuccessor(loop_body3, loop3_extra_if_block); + loop3_extra_if_block->AddSuccessor(loop_body1); // Long exit. + loop3_extra_if_block->AddSuccessor(loop_body3); + + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HBasicBlock* loop3_long_exit = loop3_extra_if_block->GetSuccessors()[0]; + EXPECT_TRUE(loop1_header->GetLoopInformation()->Contains(*loop3_long_exit)); + + PeelUnrollSimpleHelper helper(loop3_header->GetLoopInformation(), /* induction_range= */ nullptr); + helper.DoPeeling(); + + HLoopInformation* loop1 = loop1_header->GetLoopInformation(); + // Check that after the transformation the local area for CF adjustments has been chosen + // correctly and loop population has been updated. + loop3_long_exit = loop3_extra_if_block->GetSuccessors()[0]; + EXPECT_TRUE(loop1->Contains(*loop3_long_exit)); + + EXPECT_EQ(helper.GetRegionToBeAdjusted(), loop1); + + EXPECT_TRUE(loop1->Contains(*loop3_header)); + EXPECT_TRUE(loop1->Contains(*loop3_header->GetLoopInformation()->GetPreHeader())); + + EXPECT_TRUE(CheckGraph()); +} + +TEST_F(SuperblockClonerTest, FastCaseCheck) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + ArenaAllocator* arena = graph_->GetAllocator(); + + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + + HLoopInformation* loop_info = header->GetLoopInformation(); + + ArenaBitVector orig_bb_set( + arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + orig_bb_set.Union(&loop_info->GetBlocks()); + + HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + CollectRemappingInfoForPeelUnroll(true, + loop_info, + &remap_orig_internal, + &remap_copy_internal, + &remap_incoming); + + // Insert some extra nodes and edges. + HBasicBlock* preheader = loop_info->GetPreHeader(); + orig_bb_set.SetBit(preheader->GetBlockId()); + + // Adjust incoming edges. + remap_incoming.clear(); + remap_incoming.insert(HEdge(preheader->GetSinglePredecessor(), preheader)); + + HBasicBlockMap bb_map(std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner)); + HInstructionMap hir_map(std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner)); + + SuperblockCloner cloner(graph_, + &orig_bb_set, + &bb_map, + &hir_map, + /* induction_range= */ nullptr); + cloner.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming); + + EXPECT_FALSE(cloner.IsFastCase()); +} + +// Helper for FindCommonLoop which also check that FindCommonLoop is symmetric. +static HLoopInformation* FindCommonLoopCheck(HLoopInformation* loop1, HLoopInformation* loop2) { + HLoopInformation* common_loop12 = FindCommonLoop(loop1, loop2); + HLoopInformation* common_loop21 = FindCommonLoop(loop2, loop1); + EXPECT_EQ(common_loop21, common_loop12); + return common_loop12; +} + +// Tests FindCommonLoop function on a loop nest. +TEST_F(SuperblockClonerTest, FindCommonLoop) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + + // Create the following nested structure of loops + // Headers: 1 2 3 4 5 + // [ [ [ ] ], [ ] ], [ ] + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop1_header = header; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop2_header = header; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop3_header = header; + + CreateBasicLoopControlFlow(loop2_header, loop2_header->GetSuccessors()[0], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop4_header = header; + + CreateBasicLoopControlFlow(loop1_header, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop5_header = header; + + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HLoopInformation* loop1 = loop1_header->GetLoopInformation(); + HLoopInformation* loop2 = loop2_header->GetLoopInformation(); + HLoopInformation* loop3 = loop3_header->GetLoopInformation(); + HLoopInformation* loop4 = loop4_header->GetLoopInformation(); + HLoopInformation* loop5 = loop5_header->GetLoopInformation(); + + EXPECT_TRUE(loop1->IsIn(*loop1)); + EXPECT_TRUE(loop2->IsIn(*loop1)); + EXPECT_TRUE(loop3->IsIn(*loop1)); + EXPECT_TRUE(loop3->IsIn(*loop2)); + EXPECT_TRUE(loop4->IsIn(*loop1)); + + EXPECT_FALSE(loop5->IsIn(*loop1)); + EXPECT_FALSE(loop4->IsIn(*loop2)); + EXPECT_FALSE(loop4->IsIn(*loop3)); + + EXPECT_EQ(loop1->GetPreHeader()->GetLoopInformation(), nullptr); + EXPECT_EQ(loop4->GetPreHeader()->GetLoopInformation(), loop1); + + EXPECT_EQ(FindCommonLoopCheck(nullptr, nullptr), nullptr); + EXPECT_EQ(FindCommonLoopCheck(loop2, nullptr), nullptr); + + EXPECT_EQ(FindCommonLoopCheck(loop1, loop1), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop1, loop2), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop1, loop3), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop1, loop4), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop1, loop5), nullptr); + + EXPECT_EQ(FindCommonLoopCheck(loop2, loop3), loop2); + EXPECT_EQ(FindCommonLoopCheck(loop2, loop4), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop2, loop5), nullptr); + + EXPECT_EQ(FindCommonLoopCheck(loop3, loop4), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop3, loop5), nullptr); + + EXPECT_EQ(FindCommonLoopCheck(loop4, loop5), nullptr); + + EXPECT_EQ(FindCommonLoopCheck(loop5, loop5), loop5); +} + } // namespace art diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc index 0271850f29..b1abcf6747 100644 --- a/compiler/optimizing/x86_memory_gen.cc +++ b/compiler/optimizing/x86_memory_gen.cc @@ -31,7 +31,7 @@ class MemoryOperandVisitor : public HGraphVisitor { do_implicit_null_checks_(do_implicit_null_checks) {} private: - void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE { + void VisitBoundsCheck(HBoundsCheck* check) override { // Replace the length by the array itself, so that we can do compares to memory. HArrayLength* array_len = check->InputAt(1)->AsArrayLength(); @@ -76,9 +76,10 @@ X86MemoryOperandGeneration::X86MemoryOperandGeneration(HGraph* graph, do_implicit_null_checks_(codegen->GetCompilerOptions().GetImplicitNullChecks()) { } -void X86MemoryOperandGeneration::Run() { +bool X86MemoryOperandGeneration::Run() { MemoryOperandVisitor visitor(graph_, do_implicit_null_checks_); visitor.VisitInsertionOrder(); + return true; } } // namespace x86 diff --git a/compiler/optimizing/x86_memory_gen.h b/compiler/optimizing/x86_memory_gen.h index 5f15d9f1e6..3f4178d58a 100644 --- a/compiler/optimizing/x86_memory_gen.h +++ b/compiler/optimizing/x86_memory_gen.h @@ -31,7 +31,7 @@ class X86MemoryOperandGeneration : public HOptimization { CodeGenerator* codegen, OptimizingCompilerStats* stats); - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kX86MemoryOperandGenerationPassName = "x86_memory_operand_generation"; |