diff options
Diffstat (limited to 'compiler/optimizing')
29 files changed, 1454 insertions, 628 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index da2f9cbed5..eee6116098 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1142,7 +1142,7 @@ class BCEVisitor : public HGraphVisitor { loop->IsDefinedOutOfTheLoop(array_get->InputAt(1))) { SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader()); if (!array_get->GetSideEffects().MayDependOn(loop_effects)) { - HoistToPreheaderOrDeoptBlock(loop, array_get); + HoistToPreHeaderOrDeoptBlock(loop, array_get); } } } @@ -1280,7 +1280,8 @@ class BCEVisitor : public HGraphVisitor { // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests // correctly guard against any possible OOB (including arithmetic wrap-around cases). - HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); + TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); + HBasicBlock* block = GetPreHeader(loop, instruction); induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper); if (lower != nullptr) { InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper)); @@ -1358,7 +1359,7 @@ class BCEVisitor : public HGraphVisitor { return true; } else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) { if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) { - HoistToPreheaderOrDeoptBlock(loop, length); + HoistToPreHeaderOrDeoptBlock(loop, length); return true; } } @@ -1376,7 +1377,8 @@ class BCEVisitor : public HGraphVisitor { HInstruction* array = check->InputAt(0); if (loop->IsDefinedOutOfTheLoop(array)) { // Generate: if (array == null) deoptimize; - HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); + TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); + HBasicBlock* block = GetPreHeader(loop, check); HInstruction* cond = new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant()); InsertDeopt(loop, block, cond); @@ -1423,6 +1425,28 @@ class BCEVisitor : public HGraphVisitor { return true; } + /** + * Returns appropriate preheader for the loop, depending on whether the + * instruction appears in the loop header or proper loop-body. + */ + HBasicBlock* GetPreHeader(HLoopInformation* loop, HInstruction* instruction) { + // Use preheader unless there is an earlier generated deoptimization block since + // hoisted expressions may depend on and/or used by the deoptimization tests. + HBasicBlock* header = loop->GetHeader(); + const uint32_t loop_id = header->GetBlockId(); + auto it = taken_test_loop_.find(loop_id); + if (it != taken_test_loop_.end()) { + HBasicBlock* block = it->second; + // If always taken, keep it that way by returning the original preheader, + // which can be found by following the predecessor of the true-block twice. + if (instruction->GetBlock() == header) { + return block->GetSinglePredecessor()->GetSinglePredecessor(); + } + return block; + } + return loop->GetPreHeader(); + } + /** Inserts a deoptimization test. */ void InsertDeopt(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) { HInstruction* suspend = loop->GetSuspendCheck(); @@ -1437,28 +1461,17 @@ class BCEVisitor : public HGraphVisitor { } /** Hoists instruction out of the loop to preheader or deoptimization block. */ - void HoistToPreheaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) { - // Use preheader unless there is an earlier generated deoptimization block since - // hoisted expressions may depend on and/or used by the deoptimization tests. - const uint32_t loop_id = loop->GetHeader()->GetBlockId(); - HBasicBlock* preheader = loop->GetPreHeader(); - HBasicBlock* block = preheader; - auto it = taken_test_loop_.find(loop_id); - if (it != taken_test_loop_.end()) { - block = it->second; - } - // Hoist the instruction. + void HoistToPreHeaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) { + HBasicBlock* block = GetPreHeader(loop, instruction); DCHECK(!instruction->HasEnvironment()); instruction->MoveBefore(block->GetLastInstruction()); } /** - * Adds a new taken-test structure to a loop if needed (and not already done). + * Adds a new taken-test structure to a loop if needed and not already done. * The taken-test protects range analysis evaluation code to avoid any * deoptimization caused by incorrect trip-count evaluation in non-taken loops. * - * Returns block in which deoptimizations/invariants can be put. - * * old_preheader * | * if_block <- taken-test protects deoptimization block @@ -1490,16 +1503,11 @@ class BCEVisitor : public HGraphVisitor { * array[i] = 0; * } */ - HBasicBlock* TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) { - // Not needed (can use preheader), or already done (can reuse)? + void TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) { + // Not needed (can use preheader) or already done (can reuse)? const uint32_t loop_id = loop->GetHeader()->GetBlockId(); - if (!needs_taken_test) { - return loop->GetPreHeader(); - } else { - auto it = taken_test_loop_.find(loop_id); - if (it != taken_test_loop_.end()) { - return it->second; - } + if (!needs_taken_test || taken_test_loop_.find(loop_id) != taken_test_loop_.end()) { + return; } // Generate top test structure. @@ -1528,7 +1536,6 @@ class BCEVisitor : public HGraphVisitor { if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition)); taken_test_loop_.Put(loop_id, true_block); - return true_block; } /** @@ -1543,7 +1550,7 @@ class BCEVisitor : public HGraphVisitor { * \ / * x_1 = phi(x_0, null) <- synthetic phi * | - * header + * new_preheader */ void InsertPhiNodes() { // Scan all new deoptimization blocks. diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 53d3615a41..ea0b9eca9a 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -997,6 +997,12 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, stack_map_stream_.EndStackMapEntry(); } +bool CodeGenerator::HasStackMapAtCurrentPc() { + uint32_t pc = GetAssembler()->CodeSize(); + size_t count = stack_map_stream_.GetNumberOfStackMaps(); + return count > 0 && stack_map_stream_.GetStackMap(count - 1).native_pc_offset == pc; +} + void CodeGenerator::RecordCatchBlockInfo() { ArenaAllocator* arena = graph_->GetArena(); @@ -1320,12 +1326,6 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod << "instruction->DebugName()=" << instruction->DebugName() << " slow_path->GetDescription()=" << slow_path->GetDescription(); DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) || - // Control flow would not come back into the code if a fatal slow - // path is taken, so we do not care if it triggers GC. - slow_path->IsFatal() || - // HDeoptimize is a special case: we know we are not coming back from - // it into the code. - instruction->IsDeoptimize() || // When read barriers are enabled, some instructions use a // slow path to emit a read barrier, which does not trigger // GC, is not fatal, nor is emitted by HDeoptimize diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index eade05d7b6..5958cd89bc 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -269,6 +269,8 @@ class CodeGenerator { // Record native to dex mapping for a suspend point. Required by runtime. void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); + // Check whether we have already recorded mapping at this PC. + bool HasStackMapAtCurrentPc(); bool CanMoveNullCheckToUser(HNullCheck* null_check); void MaybeRecordImplicitNullCheck(HInstruction* instruction); @@ -611,7 +613,7 @@ class CodeGenerator { ArenaVector<SlowPathCode*> slow_paths_; - // The current slow path that we're generating code for. + // The current slow-path that we're generating code for. SlowPathCode* current_slow_path_; // The current block index in `block_order_` of the block @@ -672,6 +674,122 @@ class CallingConvention { DISALLOW_COPY_AND_ASSIGN(CallingConvention); }; +/** + * A templated class SlowPathGenerator with a templated method NewSlowPath() + * that can be used by any code generator to share equivalent slow-paths with + * the objective of reducing generated code size. + * + * InstructionType: instruction that requires SlowPathCodeType + * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *) + */ +template <typename InstructionType> +class SlowPathGenerator { + static_assert(std::is_base_of<HInstruction, InstructionType>::value, + "InstructionType is not a subclass of art::HInstruction"); + + public: + SlowPathGenerator(HGraph* graph, CodeGenerator* codegen) + : graph_(graph), + codegen_(codegen), + slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {} + + // Creates and adds a new slow-path, if needed, or returns existing one otherwise. + // Templating the method (rather than the whole class) on the slow-path type enables + // keeping this code at a generic, non architecture-specific place. + // + // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType. + // To relax this requirement, we would need some RTTI on the stored slow-paths, + // or template the class as a whole on SlowPathType. + template <typename SlowPathCodeType> + SlowPathCodeType* NewSlowPath(InstructionType* instruction) { + static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value, + "SlowPathCodeType is not a subclass of art::SlowPathCode"); + static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value, + "SlowPathCodeType is not constructible from InstructionType*"); + // Iterate over potential candidates for sharing. Currently, only same-typed + // slow-paths with exactly the same dex-pc are viable candidates. + // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing? + const uint32_t dex_pc = instruction->GetDexPc(); + auto iter = slow_path_map_.find(dex_pc); + if (iter != slow_path_map_.end()) { + auto candidates = iter->second; + for (const auto& it : candidates) { + InstructionType* other_instruction = it.first; + SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second); + // Determine if the instructions allow for slow-path sharing. + if (HaveSameLiveRegisters(instruction, other_instruction) && + HaveSameStackMap(instruction, other_instruction)) { + // Can share: reuse existing one. + return other_slow_path; + } + } + } else { + // First time this dex-pc is seen. + iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}}); + } + // Cannot share: create and add new slow-path for this particular dex-pc. + SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction); + iter->second.emplace_back(std::make_pair(instruction, slow_path)); + codegen_->AddSlowPath(slow_path); + return slow_path; + } + + private: + // Tests if both instructions have same set of live physical registers. This ensures + // the slow-path has exactly the same preamble on saving these registers to stack. + bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const { + const uint32_t core_spill = ~codegen_->GetCoreSpillMask(); + const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask(); + RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters(); + RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters(); + return (((live1->GetCoreRegisters() & core_spill) == + (live2->GetCoreRegisters() & core_spill)) && + ((live1->GetFloatingPointRegisters() & fpu_spill) == + (live2->GetFloatingPointRegisters() & fpu_spill))); + } + + // Tests if both instructions have the same stack map. This ensures the interpreter + // will find exactly the same dex-registers at the same entries. + bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const { + DCHECK(i1->HasEnvironment()); + DCHECK(i2->HasEnvironment()); + // We conservatively test if the two instructions find exactly the same instructions + // and location in each dex-register. This guarantees they will have the same stack map. + HEnvironment* e1 = i1->GetEnvironment(); + HEnvironment* e2 = i2->GetEnvironment(); + if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) { + return false; + } + for (size_t i = 0, sz = e1->Size(); i < sz; ++i) { + if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) || + !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) { + return false; + } + } + return true; + } + + HGraph* const graph_; + CodeGenerator* const codegen_; + + // Map from dex-pc to vector of already existing instruction/slow-path pairs. + ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_; + + DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator); +}; + +class InstructionCodeGenerator : public HGraphVisitor { + public: + InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen) + : HGraphVisitor(graph), + deopt_slow_paths_(graph, codegen) {} + + protected: + // Add slow-path generator for each instruction/slow-path combination that desires sharing. + // TODO: under current regime, only deopt sharing make sense; extend later. + SlowPathGenerator<HDeoptimize> deopt_slow_paths_; +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 9a1f2b8717..d64b8784e1 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -350,24 +350,24 @@ class TypeCheckSlowPathARM : public SlowPathCode { class DeoptimizationSlowPathARM : public SlowPathCode { public: - explicit DeoptimizationSlowPathARM(HInstruction* instruction) + explicit DeoptimizationSlowPathARM(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - DCHECK(instruction_->IsDeoptimize()); - HDeoptimize* deoptimize = instruction_->AsDeoptimize(); - uint32_t dex_pc = deoptimize->GetDexPc(); - CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); - arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), + instruction_, + instruction_->GetDexPc(), + this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM); }; @@ -417,6 +417,56 @@ class ArraySetSlowPathARM : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM); }; +// Slow path marking an object during a read barrier. +class ReadBarrierMarkSlowPathARM : public SlowPathCode { + public: + ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location out, Location obj) + : instruction_(instruction), out_(out), obj_(obj) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast()) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_); + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>(); + arm_codegen->Move32(out_, Location::RegisterLocation(R0)); + + RestoreLiveRegisters(codegen, locations); + __ b(GetExitLabel()); + } + + private: + HInstruction* const instruction_; + const Location out_; + const Location obj_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { public: @@ -438,7 +488,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { // to be instrumented, e.g.: // // __ LoadFromOffset(kLoadWord, out, out, offset); - // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); // // In that case, we have lost the information about the original // object, and the emitted read barrier cannot work properly. @@ -454,7 +504,9 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); DCHECK(!instruction_->IsInvoke() || (instruction_->IsInvokeStaticOrDirect() && - instruction_->GetLocations()->Intrinsified())); + instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier for heap reference slow path: " + << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -596,14 +648,18 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { class ReadBarrierForRootSlowPathARM : public SlowPathCode { public: ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root) - : instruction_(instruction), out_(out), root_(root) {} + : instruction_(instruction), out_(out), root_(root) { + DCHECK(kEmitCompilerReadBarrier); + } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Register reg_out = out_.AsRegister<Register>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) + << "Unexpected instruction in read barrier for GC root slow path: " + << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -857,7 +913,7 @@ void CodeGeneratorARM::UpdateBlockedPairRegisters() const { } InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -1358,17 +1414,6 @@ void LocationsBuilderARM::VisitExit(HExit* exit) { void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } -void InstructionCodeGeneratorARM::GenerateCompareWithImmediate(Register left, int32_t right) { - ShifterOperand operand; - if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, right, &operand)) { - __ cmp(left, operand); - } else { - Register temp = IP; - __ LoadImmediate(temp, right); - __ cmp(left, ShifterOperand(temp)); - } -} - void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label) { @@ -1434,7 +1479,7 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, int32_t val_low = Low32Bits(value); int32_t val_high = High32Bits(value); - GenerateCompareWithImmediate(left_high, val_high); + __ CmpConstant(left_high, val_high); if (if_cond == kCondNE) { __ b(true_label, ARMCondition(true_high_cond)); } else if (if_cond == kCondEQ) { @@ -1444,7 +1489,7 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, __ b(false_label, ARMCondition(false_high_cond)); } // Must be equal high, so compare the lows. - GenerateCompareWithImmediate(left_low, val_low); + __ CmpConstant(left_low, val_low); } else { Register right_high = right.AsRegisterPairHigh<Register>(); Register right_low = right.AsRegisterPairLow<Register>(); @@ -1568,7 +1613,7 @@ void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instructio __ cmp(left, ShifterOperand(right.AsRegister<Register>())); } else { DCHECK(right.IsConstant()); - GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant())); + __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant())); } if (true_target == nullptr) { __ b(false_target, ARMCondition(condition->GetOppositeCondition())); @@ -1610,8 +1655,7 @@ void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathARM(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), @@ -1623,6 +1667,10 @@ void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorARM::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ nop(); + } codegen_->RecordPcInfo(info, info->GetDexPc()); } @@ -1675,8 +1723,8 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) { __ cmp(left.AsRegister<Register>(), ShifterOperand(right.AsRegister<Register>())); } else { DCHECK(right.IsConstant()); - GenerateCompareWithImmediate(left.AsRegister<Register>(), - CodeGenerator::GetInt32ValueOf(right.GetConstant())); + __ CmpConstant(left.AsRegister<Register>(), + CodeGenerator::GetInt32ValueOf(right.GetConstant())); } __ it(ARMCondition(cond->GetCondition()), kItElse); __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1), @@ -1891,7 +1939,7 @@ void LocationsBuilderARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { } void InstructionCodeGeneratorARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { - GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); + codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); } void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) { @@ -2846,8 +2894,7 @@ void InstructionCodeGeneratorARM::DivRemByPowerOfTwo(HBinaryOperation* instructi Register dividend = locations->InAt(0).AsRegister<Register>(); Register temp = locations->GetTemp(0).AsRegister<Register>(); int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); - uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm)); - DCHECK(IsPowerOfTwo(abs_imm)); + uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); int ctz_imm = CTZ(abs_imm); if (ctz_imm == 1) { @@ -2923,7 +2970,7 @@ void InstructionCodeGeneratorARM::GenerateDivRemConstantIntegral(HBinaryOperatio // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (IsPowerOfTwo(std::abs(imm))) { + } else if (IsPowerOfTwo(AbsOrMin(imm))) { DivRemByPowerOfTwo(instruction); } else { DCHECK(imm <= -2 || imm >= 2); @@ -2952,12 +2999,12 @@ void LocationsBuilderARM::VisitDiv(HDiv* div) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant())); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - int32_t abs_imm = std::abs(div->InputAt(1)->AsIntConstant()->GetValue()); - if (abs_imm <= 1) { + int32_t value = div->InputAt(1)->AsIntConstant()->GetValue(); + if (value == 1 || value == 0 || value == -1) { // No temp register required. } else { locations->AddTemp(Location::RequiresRegister()); - if (!IsPowerOfTwo(abs_imm)) { + if (!IsPowerOfTwo(AbsOrMin(value))) { locations->AddTemp(Location::RequiresRegister()); } } @@ -3078,12 +3125,12 @@ void LocationsBuilderARM::VisitRem(HRem* rem) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant())); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - int32_t abs_imm = std::abs(rem->InputAt(1)->AsIntConstant()->GetValue()); - if (abs_imm <= 1) { + int32_t value = rem->InputAt(1)->AsIntConstant()->GetValue(); + if (value == 1 || value == 0 || value == -1) { // No temp register required. } else { locations->AddTemp(Location::RequiresRegister()); - if (!IsPowerOfTwo(abs_imm)) { + if (!IsPowerOfTwo(AbsOrMin(value))) { locations->AddTemp(Location::RequiresRegister()); } } @@ -3437,7 +3484,7 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) { Register first_reg = first.AsRegister<Register>(); if (second.IsRegister()) { Register second_reg = second.AsRegister<Register>(); - // Arm doesn't mask the shift count so we need to do it ourselves. + // ARM doesn't mask the shift count so we need to do it ourselves. __ and_(out_reg, second_reg, ShifterOperand(kMaxIntShiftValue)); if (op->IsShl()) { __ Lsl(out_reg, first_reg, out_reg); @@ -3449,7 +3496,7 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) { } else { int32_t cst = second.GetConstant()->AsIntConstant()->GetValue(); uint32_t shift_value = static_cast<uint32_t>(cst & kMaxIntShiftValue); - if (shift_value == 0) { // arm does not support shifting with 0 immediate. + if (shift_value == 0) { // ARM does not support shifting with 0 immediate. __ Mov(out_reg, first_reg); } else if (op->IsShl()) { __ Lsl(out_reg, first_reg, shift_value); @@ -3796,9 +3843,9 @@ void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { LOG(FATAL) << "Unreachable"; } -void InstructionCodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) { - // TODO (ported from quick): revisit Arm barrier kinds - DmbOptions flavor = DmbOptions::ISH; // quiet c++ warnings +void CodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) { + // TODO (ported from quick): revisit ARM barrier kinds. + DmbOptions flavor = DmbOptions::ISH; // Quiet C++ warnings. switch (kind) { case MemBarrierKind::kAnyStore: case MemBarrierKind::kLoadAny: @@ -3879,11 +3926,11 @@ void LocationsBuilderARM::HandleFieldSet(HInstruction* instruction, const FieldI locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. locations->AddTemp(Location::RequiresRegister()); } else if (generate_volatile) { - // Arm encoding have some additional constraints for ldrexd/strexd: + // ARM encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive // - the first register should be even but not R14. - // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever - // enable Arm encoding. + // We don't test for ARM yet, and the assertion makes sure that we + // revisit this if we ever enable ARM encoding. DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); locations->AddTemp(Location::RequiresRegister()); @@ -3913,7 +3960,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); } switch (field_type) { @@ -4005,7 +4052,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, } if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } } @@ -4039,14 +4086,18 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap)); } if (volatile_for_double) { - // Arm encoding have some additional constraints for ldrexd/strexd: + // ARM encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive // - the first register should be even but not R14. - // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever - // enable Arm encoding. + // We don't test for ARM yet, and the assertion makes sure that we + // revisit this if we ever enable ARM encoding. DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); } } @@ -4105,33 +4156,52 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (field_type) { - case Primitive::kPrimBoolean: { + case Primitive::kPrimBoolean: __ LoadFromOffset(kLoadUnsignedByte, out.AsRegister<Register>(), base, offset); break; - } - case Primitive::kPrimByte: { + case Primitive::kPrimByte: __ LoadFromOffset(kLoadSignedByte, out.AsRegister<Register>(), base, offset); break; - } - case Primitive::kPrimShort: { + case Primitive::kPrimShort: __ LoadFromOffset(kLoadSignedHalfword, out.AsRegister<Register>(), base, offset); break; - } - case Primitive::kPrimChar: { + case Primitive::kPrimChar: __ LoadFromOffset(kLoadUnsignedHalfword, out.AsRegister<Register>(), base, offset); break; - } case Primitive::kPrimInt: - case Primitive::kPrimNot: { __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset); break; + + case Primitive::kPrimNot: { + // /* HeapReference<Object> */ out = *(base + offset) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp_loc = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier call. + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + } else { + __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); + } + break; } - case Primitive::kPrimLong: { + case Primitive::kPrimLong: if (is_volatile && !atomic_ldrd_strd) { GenerateWideAtomicLoad(base, offset, out.AsRegisterPairLow<Register>(), @@ -4140,12 +4210,10 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), base, offset); } break; - } - case Primitive::kPrimFloat: { + case Primitive::kPrimFloat: __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), base, offset); break; - } case Primitive::kPrimDouble: { DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()); @@ -4167,17 +4235,20 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, UNREACHABLE(); } - // Doubles are handled in the switch. - if (field_type != Primitive::kPrimDouble) { + if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimDouble) { + // Potential implicit null checks, in the case of reference or + // double fields, are handled in the previous switch statement. + } else { codegen_->MaybeRecordImplicitNullCheck(instruction); } if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kLoadAny); - } - - if (field_type == Primitive::kPrimNot) { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset); + if (field_type == Primitive::kPrimNot) { + // Memory barriers, in the case of references, are also handled + // in the previous switch statement. + } else { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } } } @@ -4340,6 +4411,11 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->AddTemp(Location::RequiresRegister()); + } } void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { @@ -4347,12 +4423,13 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); Location index = locations->InAt(1); - Primitive::Type type = instruction->GetType(); + Location out_loc = locations->Out(); + Primitive::Type type = instruction->GetType(); switch (type) { case Primitive::kPrimBoolean: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; @@ -4366,7 +4443,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimByte: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; @@ -4380,7 +4457,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimShort: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; @@ -4394,7 +4471,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimChar: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; @@ -4406,13 +4483,9 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes."); + case Primitive::kPrimInt: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; @@ -4424,44 +4497,79 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { break; } + case Primitive::kPrimNot: { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + // /* HeapReference<Object> */ out = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier call. + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + } else { + Register out = out_loc.AsRegister<Register>(); + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ LoadFromOffset(kLoadWord, out, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); + __ LoadFromOffset(kLoadWord, out, IP, data_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow( + instruction, out_loc, out_loc, obj_loc, data_offset, index); + } + } + break; + } + case Primitive::kPrimLong: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); - Location out = locations->Out(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), obj, offset); + __ LoadFromOffset(kLoadWordPair, out_loc.AsRegisterPairLow<Register>(), obj, offset); } else { __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8)); - __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), IP, data_offset); + __ LoadFromOffset(kLoadWordPair, out_loc.AsRegisterPairLow<Register>(), IP, data_offset); } break; } case Primitive::kPrimFloat: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); - Location out = locations->Out(); - DCHECK(out.IsFpuRegister()); + SRegister out = out_loc.AsFpuRegister<SRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), obj, offset); + __ LoadSFromOffset(out, obj, offset); } else { __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); - __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), IP, data_offset); + __ LoadSFromOffset(out, IP, data_offset); } break; } case Primitive::kPrimDouble: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); - Location out = locations->Out(); - DCHECK(out.IsFpuRegisterPair()); + SRegister out = out_loc.AsFpuRegisterPairLow<SRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), obj, offset); + __ LoadDFromOffset(FromLowSToD(out), obj, offset); } else { __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8)); - __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), IP, data_offset); + __ LoadDFromOffset(FromLowSToD(out), IP, data_offset); } break; } @@ -4470,20 +4578,12 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } - codegen_->MaybeRecordImplicitNullCheck(instruction); if (type == Primitive::kPrimNot) { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Location out = locations->Out(); - if (index.IsConstant()) { - uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); - } else { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index); - } + // Potential implicit null checks, in the case of reference + // arrays, are handled in the previous switch statement. + } else { + codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -4574,6 +4674,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); __ StoreToOffset(kStoreWord, source, IP, data_offset); } + codegen_->MaybeRecordImplicitNullCheck(instruction); DCHECK(!needs_write_barrier); DCHECK(!may_need_runtime_call_for_type_check); break; @@ -4615,12 +4716,12 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { // __ Mov(temp2, temp1); // // /* HeapReference<Class> */ temp1 = temp1->component_type_ // __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp1_loc, temp1_loc, temp2_loc, component_offset); // // // /* HeapReference<Class> */ temp2 = value->klass_ // __ LoadFromOffset(kLoadWord, temp2, value, class_offset); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc); // // __ cmp(temp1, ShifterOperand(temp2)); @@ -4717,8 +4818,6 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); __ StoreToOffset(kStoreWord, value, IP, data_offset); } - - codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -4770,8 +4869,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { UNREACHABLE(); } - // Ints and objects are handled in the switch. - if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) { + // Objects are handled in the switch. + if (value_type != Primitive::kPrimNot) { codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -5140,16 +5239,9 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ AddConstant(out, current_method, declaring_class_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset); - } + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ @@ -5157,17 +5249,8 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { out, current_method, ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value()); - - size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &out[type_index] - __ AddConstant(out, out, cache_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = out[type_index] - __ LoadFromOffset(kLoadWord, out, out, cache_offset); - } + // /* GcRoot<mirror::Class> */ out = out[type_index] + GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); @@ -5230,30 +5313,14 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ AddConstant(out, current_method, declaring_class_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset); - } - + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - - size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::String>* */ out = &out[string_index] - __ AddConstant(out, out, cache_offset); - // /* mirror::String* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::String> */ out = out[string_index] - __ LoadFromOffset(kLoadWord, out, out, cache_offset); - } + // /* GcRoot<mirror::String> */ out = out[string_index] + GenerateGcRootFieldLoad( + load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); if (!load->IsInDexCache()) { SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); @@ -5300,6 +5367,14 @@ void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) { CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } +static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { + return kEmitCompilerReadBarrier && + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck); +} + void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); @@ -5326,21 +5401,22 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); // When read barriers are enabled, we need a temporary register for // some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + if (TypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); + Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(0) : + Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -5355,10 +5431,9 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadWord, out, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: { __ cmp(out, ShifterOperand(cls)); // Classes must be equal for the instanceof to succeed. @@ -5373,17 +5448,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { // object to avoid doing a comparison we know will fail. Label loop; __ Bind(&loop); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = temp_loc.AsRegister<Register>(); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ LoadFromOffset(kLoadWord, out, out, super_offset); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc); // If `out` is null, we use it for the result, and jump to `done`. __ CompareAndBranchIfZero(out, &done); __ cmp(out, ShifterOperand(cls)); @@ -5401,17 +5467,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ Bind(&loop); __ cmp(out, ShifterOperand(cls)); __ b(&success, EQ); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = temp_loc.AsRegister<Register>(); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ LoadFromOffset(kLoadWord, out, out, super_offset); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc); __ CompareAndBranchIfNonZero(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ b(&done); @@ -5429,17 +5486,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ cmp(out, ShifterOperand(cls)); __ b(&exact_check, EQ); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = temp_loc.AsRegister<Register>(); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->component_type_ - __ LoadFromOffset(kLoadWord, out, out, component_offset); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc); // If `out` is null, we use it for the result, and jump to `done`. __ CompareAndBranchIfZero(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); @@ -5478,6 +5526,13 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { // HInstanceOf instruction (following the runtime calling // convention), which might be cluttered by the potential first // read barrier emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction, /* is_fatal */ false); @@ -5532,27 +5587,27 @@ void LocationsBuilderARM::VisitCheckCast(HCheckCast* instruction) { locations->AddTemp(Location::RequiresRegister()); // When read barriers are enabled, we need an additional temporary // register for some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + if (TypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); Location temp_loc = locations->GetTemp(0); Register temp = temp_loc.AsRegister<Register>(); + Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(1) : + Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); bool is_type_check_slow_path_fatal = (type_check_kind == TypeCheckKind::kExactCheck || type_check_kind == TypeCheckKind::kAbstractClassCheck || @@ -5571,8 +5626,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); switch (type_check_kind) { case TypeCheckKind::kExactCheck: @@ -5589,18 +5643,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // object to avoid doing a comparison we know will fail. Label loop, compare_classes; __ Bind(&loop); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = temp2_loc.AsRegister<Register>(); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc); // If the class reference currently in `temp` is not null, jump // to the `compare_classes` label to compare it with the checked @@ -5612,8 +5656,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ b(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -5629,18 +5672,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { __ cmp(temp, ShifterOperand(cls)); __ b(&done, EQ); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = temp2_loc.AsRegister<Register>(); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc); // If the class reference currently in `temp` is not null, jump // back at the beginning of the loop. @@ -5651,8 +5684,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ b(type_check_slow_path->GetEntryLabel()); break; } @@ -5664,19 +5696,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { __ b(&done, EQ); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = temp2_loc.AsRegister<Register>(); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->component_type_ - __ LoadFromOffset(kLoadWord, temp, temp, component_offset); - codegen_->MaybeGenerateReadBarrier( - instruction, temp_loc, temp_loc, temp2_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc); // If the component type is not null (i.e. the object is indeed // an array), jump to label `check_non_primitive_component_type` @@ -5689,8 +5710,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ b(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -5699,8 +5719,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { __ CompareAndBranchIfZero(temp, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ b(type_check_slow_path->GetEntryLabel()); break; } @@ -5717,6 +5736,13 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // instruction (following the runtime calling convention), which // might be cluttered by the potential first read barrier // emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. __ b(type_check_slow_path->GetEntryLabel()); break; } @@ -5901,14 +5927,249 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr } } -void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void InstructionCodeGeneratorARM::GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location temp) { + Register out_reg = out.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, out_reg, offset, temp, /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + __ Mov(temp.AsRegister<Register>(), out_reg); + // /* HeapReference<Object> */ out = *(out + offset) + __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorARM::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location temp) { + Register out_reg = out.AsRegister<Register>(); + Register obj_reg = obj.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, obj_reg, offset, temp, /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + Register obj, + uint32_t offset) { + Register root_reg = root.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Fast path implementation of art::ReadBarrier::BarrierForRoot when + // Baker's read barrier are used: + // + // root = obj.field; + // if (Thread::Current()->GetIsGcMarking()) { + // root = ReadBarrier::Mark(root) + // } + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path used to mark the GC root `root`. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root); + codegen_->AddSlowPath(slow_path); + + __ LoadFromOffset( + kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value()); + __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } else { + // GC root loaded through a slow path for read barriers other + // than Baker's. + // /* GcRoot<mirror::Object>* */ root = obj + offset + __ AddConstant(root_reg, obj, offset); + // /* mirror::Object* */ root = root->Read() + codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + } + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. + } +} + +void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = *(obj + offset) + Location no_index = Location::NoLocation(); + GenerateReferenceLoadWithBakerReadBarrier( + instruction, ref, obj, offset, no_index, temp, needs_null_check); +} + +void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + GenerateReferenceLoadWithBakerReadBarrier( + instruction, ref, obj, data_offset, index, temp, needs_null_check); +} + +void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location index, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // In slow path based read barriers, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. + // } + // + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as: + // - it implements the load-load fence using a data dependency on + // the high-bits of rb_state, which are expected to be all zeroes; + // - it performs additional checks that we do not do here for + // performance reasons. + + Register ref_reg = ref.AsRegister<Register>(); + Register temp_reg = temp.AsRegister<Register>(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + // /* int32_t */ monitor = obj->monitor_ + __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + // /* uint32_t */ rb_state = lock_word.ReadBarrierState() + __ Lsr(temp_reg, temp_reg, LockWord::kReadBarrierStateShift); + __ and_(temp_reg, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask)); + static_assert( + LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, + "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); + + // Introduce a dependency on the high bits of rb_state, which shall + // be all zeroes, to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + // IP = rb_state & ~LockWord::kReadBarrierStateMask = 0 + __ bic(IP, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask)); + // obj is unchanged by this operation, but its value now depends on + // IP, which depends on temp_reg. + __ add(obj, obj, ShifterOperand(IP)); + + // The actual reference load. + if (index.IsValid()) { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + // /* HeapReference<Object> */ ref = + // *(obj + offset + index * sizeof(HeapReference<Object>)) + if (index.IsConstant()) { + size_t computed_offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset; + __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); + __ LoadFromOffset(kLoadWord, ref_reg, IP, offset); + } + } else { + // /* HeapReference<Object> */ ref = *(obj + offset) + __ LoadFromOffset(kLoadWord, ref_reg, obj, offset); + } + + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_reg); + + // Slow path used to mark the object `ref` when it is gray. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref, ref); + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + __ cmp(temp_reg, ShifterOperand(ReadBarrier::gray_ptr_)); + __ b(slow_path->GetEntryLabel(), EQ); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM::GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { DCHECK(kEmitCompilerReadBarrier); + // Insert a slow path based read barrier *after* the reference load. + // // If heap poisoning is enabled, the unpoisoning of the loaded // reference will be carried out by the runtime within the slow // path. @@ -5922,57 +6183,41 @@ void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction, ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index); AddSlowPath(slow_path); - // TODO: When read barrier has a fast path, add it here. - /* Currently the read barrier call is inserted after the original load. - * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the - * original load. This load-load ordering is required by the read barrier. - * The fast path/slow path (for Baker's algorithm) should look like: - * - * bool isGray = obj.LockWord & kReadBarrierMask; - * lfence; // load fence or artificial data dependence to prevent load-load reordering - * ref = obj.field; // this is the original load - * if (isGray) { - * ref = Mark(ref); // ideally the slow path just does Mark(ref) - * } - */ - __ b(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } -void CodeGeneratorARM::MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void CodeGeneratorARM::MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { if (kEmitCompilerReadBarrier) { + // Baker's read barriers shall be handled by the fast path + // (CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier). + DCHECK(!kUseBakerReadBarrier); // If heap poisoning is enabled, unpoisoning will be taken care of // by the runtime within the slow path. - GenerateReadBarrier(instruction, out, ref, obj, offset, index); + GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); } else if (kPoisonHeapReferences) { __ UnpoisonHeapReference(out.AsRegister<Register>()); } } -void CodeGeneratorARM::GenerateReadBarrierForRoot(HInstruction* instruction, - Location out, - Location root) { +void CodeGeneratorARM::GenerateReadBarrierForRootSlow(HInstruction* instruction, + Location out, + Location root) { DCHECK(kEmitCompilerReadBarrier); + // Insert a slow path based read barrier *after* the GC root load. + // // Note that GC roots are not affected by heap poisoning, so we do // not need to do anything special for this here. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root); AddSlowPath(slow_path); - // TODO: Implement a fast path for ReadBarrierForRoot, performing - // the following operation (for Baker's algorithm): - // - // if (thread.tls32_.is_gc_marking) { - // root = Mark(root); - // } - __ b(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -6304,7 +6549,7 @@ void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr) } if (num_entries - last_index == 2) { // The last missing case_value. - GenerateCompareWithImmediate(temp_reg, 1); + __ CmpConstant(temp_reg, 1); __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ); } @@ -6364,7 +6609,7 @@ void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysB void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) { if (!trg.IsValid()) { - DCHECK(type == Primitive::kPrimVoid); + DCHECK_EQ(type, Primitive::kPrimVoid); return; } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index b7c58e1248..26d6d63b31 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -188,7 +188,7 @@ class LocationsBuilderARM : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM); }; -class InstructionCodeGeneratorARM : public HGraphVisitor { +class InstructionCodeGeneratorARM : public InstructionCodeGenerator { public: InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen); @@ -222,24 +222,57 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { void HandleLongRotate(LocationSummary* locations); void HandleRotate(HRor* ror); void HandleShift(HBinaryOperation* operation); - void GenerateMemoryBarrier(MemBarrierKind kind); + void GenerateWideAtomicStore(Register addr, uint32_t offset, Register value_lo, Register value_hi, Register temp1, Register temp2, HInstruction* instruction); void GenerateWideAtomicLoad(Register addr, uint32_t offset, Register out_lo, Register out_hi); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + + // Generate a heap reference load using one register `out`: + // + // out <- *(out + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // Register `temp` is used when generating a read barrier. + void GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location temp); + // Generate a heap reference load using two different registers + // `out` and `obj`: + // + // out <- *(obj + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // Register `temp` is used when generating a Baker's read barrier. + void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location temp); + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers (if any). + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + Register obj, + uint32_t offset); + void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, Label* true_target, Label* false_target); - void GenerateCompareWithImmediate(Register left, int32_t right); void GenerateCompareTestAndBranch(HCondition* condition, Label* true_target, Label* false_target); @@ -346,6 +379,8 @@ class CodeGeneratorARM : public CodeGenerator { // Emit a write barrier. void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null); + void GenerateMemoryBarrier(MemBarrierKind kind); + Label* GetLabelOf(HBasicBlock* block) const { return CommonGetLabelOf<Label>(block_labels_, block); } @@ -406,7 +441,26 @@ class CodeGeneratorARM : public CodeGenerator { return &it->second; } - // Generate a read barrier for a heap reference within `instruction`. + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location out, + Register obj, + uint32_t offset, + Location temp, + bool needs_null_check); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference array load when Baker's read barriers are used. + void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location out, + Register obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check); + + // Generate a read barrier for a heap reference within `instruction` + // using a slow path. // // A read barrier for an object reference read from the heap is // implemented as a call to the artReadBarrierSlow runtime entry @@ -423,23 +477,25 @@ class CodeGeneratorARM : public CodeGenerator { // When `index` is provided (i.e. for array accesses), the offset // value passed to artReadBarrierSlow is adjusted to take `index` // into account. - void GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // If read barriers are enabled, generate a read barrier for a heap reference. - // If heap poisoning is enabled, also unpoison the reference in `out`. - void MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // Generate a read barrier for a GC root within `instruction`. + void GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction` using + // a slow path. // // A read barrier for an object reference GC root is implemented as // a call to the artReadBarrierForRootSlow runtime entry point, @@ -449,9 +505,19 @@ class CodeGeneratorARM : public CodeGenerator { // // The `out` location contains the value returned by // artReadBarrierForRootSlow. - void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); private: + // Factored implementation of GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location index, + Location temp, + bool needs_null_check); + Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index b49f42b6c8..a3150d3d22 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -477,24 +477,24 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { public: - explicit DeoptimizationSlowPathARM64(HInstruction* instruction) + explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - DCHECK(instruction_->IsDeoptimize()); - HDeoptimize* deoptimize = instruction_->AsDeoptimize(); - uint32_t dex_pc = deoptimize->GetDexPc(); - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), + instruction_, + instruction_->GetDexPc(), + this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64); }; @@ -1605,7 +1605,7 @@ void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruct InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -2534,8 +2534,7 @@ void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruc Register out = OutputRegister(instruction); Register dividend = InputRegisterAt(instruction, 0); int64_t imm = Int64FromConstant(second.GetConstant()); - uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm)); - DCHECK(IsPowerOfTwo(abs_imm)); + uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); int ctz_imm = CTZ(abs_imm); UseScratchRegisterScope temps(GetVIXLAssembler()); @@ -2627,7 +2626,7 @@ void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* ins // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (IsPowerOfTwo(std::abs(imm))) { + } else if (IsPowerOfTwo(AbsOrMin(imm))) { DivRemByPowerOfTwo(instruction); } else { DCHECK(imm <= -2 || imm >= 2); @@ -2940,9 +2939,8 @@ void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) - DeoptimizationSlowPathARM64(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCodeARM64* slow_path = + deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), @@ -2954,6 +2952,10 @@ void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ Nop(); + } codegen_->RecordPcInfo(info, info->GetDexPc()); } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 0e90ac6345..f2ff89488e 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -186,7 +186,7 @@ class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention { DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARM64); }; -class InstructionCodeGeneratorARM64 : public HGraphVisitor { +class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { public: InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen); diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 4648606da8..322912976e 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -444,19 +444,16 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { public: - explicit DeoptimizationSlowPathMIPS(HInstruction* instruction) + explicit DeoptimizationSlowPathMIPS(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - DCHECK(instruction_->IsDeoptimize()); - HDeoptimize* deoptimize = instruction_->AsDeoptimize(); - uint32_t dex_pc = deoptimize->GetDexPc(); - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, - dex_pc, + instruction_->GetDexPc(), this, IsDirectEntrypoint(kQuickDeoptimize)); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); @@ -465,7 +462,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS); }; @@ -608,9 +605,9 @@ void ParallelMoveResolverMIPS::EmitSwap(size_t index) { // then swap the high 32 bits of the same FPR. mtc1 makes the high 32 bits of an FPR // unpredictable and the following mfch1 will fail. __ Mfc1(TMP, f1); - __ Mfhc1(AT, f1); + __ MoveFromFpuHigh(AT, f1); __ Mtc1(r2_l, f1); - __ Mthc1(r2_h, f1); + __ MoveToFpuHigh(r2_h, f1); __ Move(r2_l, TMP); __ Move(r2_h, AT); } else if (loc1.IsStackSlot() && loc2.IsStackSlot()) { @@ -862,7 +859,7 @@ void CodeGeneratorMIPS::Move64(Location destination, Location source) { Register dst_low = destination.AsRegisterPairLow<Register>(); FRegister src = source.AsFpuRegister<FRegister>(); __ Mfc1(dst_low, src); - __ Mfhc1(dst_high, src); + __ MoveFromFpuHigh(dst_high, src); } else { DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination; int32_t off = source.GetStackIndex(); @@ -875,7 +872,7 @@ void CodeGeneratorMIPS::Move64(Location destination, Location source) { Register src_high = source.AsRegisterPairHigh<Register>(); Register src_low = source.AsRegisterPairLow<Register>(); __ Mtc1(src_low, dst); - __ Mthc1(src_high, dst); + __ MoveToFpuHigh(src_high, dst); } else if (source.IsFpuRegister()) { __ MovD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>()); } else { @@ -1241,7 +1238,7 @@ void InstructionCodeGeneratorMIPS::GenerateSuspendCheck(HSuspendCheck* instructi InstructionCodeGeneratorMIPS::InstructionCodeGeneratorMIPS(HGraph* graph, CodeGeneratorMIPS* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -1511,7 +1508,7 @@ void InstructionCodeGeneratorMIPS::HandleBinaryOp(HBinaryOperation* instruction) } void LocationsBuilderMIPS::HandleShift(HBinaryOperation* instr) { - DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); + DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); Primitive::Type type = instr->GetResultType(); @@ -1534,7 +1531,7 @@ void LocationsBuilderMIPS::HandleShift(HBinaryOperation* instr) { static constexpr size_t kMipsBitsPerWord = kMipsWordSize * kBitsPerByte; void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { - DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); + DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); LocationSummary* locations = instr->GetLocations(); Primitive::Type type = instr->GetType(); @@ -1542,30 +1539,58 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { bool use_imm = rhs_location.IsConstant(); Register rhs_reg = use_imm ? ZERO : rhs_location.AsRegister<Register>(); int64_t rhs_imm = use_imm ? CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()) : 0; - uint32_t shift_mask = (type == Primitive::kPrimInt) ? kMaxIntShiftValue : kMaxLongShiftValue; - uint32_t shift_value = rhs_imm & shift_mask; - // Is the INS (Insert Bit Field) instruction supported? - bool has_ins = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); + const uint32_t shift_mask = (type == Primitive::kPrimInt) + ? kMaxIntShiftValue + : kMaxLongShiftValue; + const uint32_t shift_value = rhs_imm & shift_mask; + // Are the INS (Insert Bit Field) and ROTR instructions supported? + bool has_ins_rotr = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); switch (type) { case Primitive::kPrimInt: { Register dst = locations->Out().AsRegister<Register>(); Register lhs = locations->InAt(0).AsRegister<Register>(); if (use_imm) { - if (instr->IsShl()) { + if (shift_value == 0) { + if (dst != lhs) { + __ Move(dst, lhs); + } + } else if (instr->IsShl()) { __ Sll(dst, lhs, shift_value); } else if (instr->IsShr()) { __ Sra(dst, lhs, shift_value); - } else { + } else if (instr->IsUShr()) { __ Srl(dst, lhs, shift_value); + } else { + if (has_ins_rotr) { + __ Rotr(dst, lhs, shift_value); + } else { + __ Sll(TMP, lhs, (kMipsBitsPerWord - shift_value) & shift_mask); + __ Srl(dst, lhs, shift_value); + __ Or(dst, dst, TMP); + } } } else { if (instr->IsShl()) { __ Sllv(dst, lhs, rhs_reg); } else if (instr->IsShr()) { __ Srav(dst, lhs, rhs_reg); - } else { + } else if (instr->IsUShr()) { __ Srlv(dst, lhs, rhs_reg); + } else { + if (has_ins_rotr) { + __ Rotrv(dst, lhs, rhs_reg); + } else { + __ Subu(TMP, ZERO, rhs_reg); + // 32-bit shift instructions use the 5 least significant bits of the shift count, so + // shifting by `-rhs_reg` is equivalent to shifting by `(32 - rhs_reg) & 31`. The case + // when `rhs_reg & 31 == 0` is OK even though we don't shift `lhs` left all the way out + // by 32, because the result in this case is computed as `(lhs >> 0) | (lhs << 0)`, + // IOW, the OR'd values are equal. + __ Sllv(TMP, lhs, TMP); + __ Srlv(dst, lhs, rhs_reg); + __ Or(dst, dst, TMP); + } } } break; @@ -1580,7 +1605,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { if (shift_value == 0) { codegen_->Move64(locations->Out(), locations->InAt(0)); } else if (shift_value < kMipsBitsPerWord) { - if (has_ins) { + if (has_ins_rotr) { if (instr->IsShl()) { __ Srl(dst_high, lhs_low, kMipsBitsPerWord - shift_value); __ Ins(dst_high, lhs_high, shift_value, kMipsBitsPerWord - shift_value); @@ -1589,10 +1614,15 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Srl(dst_low, lhs_low, shift_value); __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value); __ Sra(dst_high, lhs_high, shift_value); + } else if (instr->IsUShr()) { + __ Srl(dst_low, lhs_low, shift_value); + __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value); + __ Srl(dst_high, lhs_high, shift_value); } else { __ Srl(dst_low, lhs_low, shift_value); __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value); __ Srl(dst_high, lhs_high, shift_value); + __ Ins(dst_high, lhs_low, kMipsBitsPerWord - shift_value, shift_value); } } else { if (instr->IsShl()) { @@ -1605,24 +1635,51 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value); __ Srl(dst_low, lhs_low, shift_value); __ Or(dst_low, dst_low, TMP); - } else { + } else if (instr->IsUShr()) { __ Srl(dst_high, lhs_high, shift_value); __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value); __ Srl(dst_low, lhs_low, shift_value); __ Or(dst_low, dst_low, TMP); + } else { + __ Srl(TMP, lhs_low, shift_value); + __ Sll(dst_low, lhs_high, kMipsBitsPerWord - shift_value); + __ Or(dst_low, dst_low, TMP); + __ Srl(TMP, lhs_high, shift_value); + __ Sll(dst_high, lhs_low, kMipsBitsPerWord - shift_value); + __ Or(dst_high, dst_high, TMP); } } } else { - shift_value -= kMipsBitsPerWord; + const uint32_t shift_value_high = shift_value - kMipsBitsPerWord; if (instr->IsShl()) { - __ Sll(dst_high, lhs_low, shift_value); + __ Sll(dst_high, lhs_low, shift_value_high); __ Move(dst_low, ZERO); } else if (instr->IsShr()) { - __ Sra(dst_low, lhs_high, shift_value); + __ Sra(dst_low, lhs_high, shift_value_high); __ Sra(dst_high, dst_low, kMipsBitsPerWord - 1); - } else { - __ Srl(dst_low, lhs_high, shift_value); + } else if (instr->IsUShr()) { + __ Srl(dst_low, lhs_high, shift_value_high); __ Move(dst_high, ZERO); + } else { + if (shift_value == kMipsBitsPerWord) { + // 64-bit rotation by 32 is just a swap. + __ Move(dst_low, lhs_high); + __ Move(dst_high, lhs_low); + } else { + if (has_ins_rotr) { + __ Srl(dst_low, lhs_high, shift_value_high); + __ Ins(dst_low, lhs_low, kMipsBitsPerWord - shift_value_high, shift_value_high); + __ Srl(dst_high, lhs_low, shift_value_high); + __ Ins(dst_high, lhs_high, kMipsBitsPerWord - shift_value_high, shift_value_high); + } else { + __ Sll(TMP, lhs_low, kMipsBitsPerWord - shift_value_high); + __ Srl(dst_low, lhs_high, shift_value_high); + __ Or(dst_low, dst_low, TMP); + __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value_high); + __ Srl(dst_high, lhs_low, shift_value_high); + __ Or(dst_high, dst_high, TMP); + } + } } } } else { @@ -1649,7 +1706,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Beqz(TMP, &done); __ Move(dst_low, dst_high); __ Sra(dst_high, dst_high, 31); - } else { + } else if (instr->IsUShr()) { __ Srlv(dst_high, lhs_high, rhs_reg); __ Nor(AT, ZERO, rhs_reg); __ Sll(TMP, lhs_high, 1); @@ -1660,6 +1717,21 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Beqz(TMP, &done); __ Move(dst_low, dst_high); __ Move(dst_high, ZERO); + } else { + __ Nor(AT, ZERO, rhs_reg); + __ Srlv(TMP, lhs_low, rhs_reg); + __ Sll(dst_low, lhs_high, 1); + __ Sllv(dst_low, dst_low, AT); + __ Or(dst_low, dst_low, TMP); + __ Srlv(TMP, lhs_high, rhs_reg); + __ Sll(dst_high, lhs_low, 1); + __ Sllv(dst_high, dst_high, AT); + __ Or(dst_high, dst_high, TMP); + __ Andi(TMP, rhs_reg, kMipsBitsPerWord); + __ Beqz(TMP, &done); + __ Move(TMP, dst_high); + __ Move(dst_high, dst_low); + __ Move(dst_low, TMP); } __ Bind(&done); } @@ -2314,8 +2386,7 @@ void InstructionCodeGeneratorMIPS::DivRemByPowerOfTwo(HBinaryOperation* instruct Register out = locations->Out().AsRegister<Register>(); Register dividend = locations->InAt(0).AsRegister<Register>(); int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); - uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm)); - DCHECK(IsPowerOfTwo(abs_imm)); + uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); int ctz_imm = CTZ(abs_imm); if (instruction->IsDiv()) { @@ -2418,7 +2489,7 @@ void InstructionCodeGeneratorMIPS::GenerateDivRemIntegral(HBinaryOperation* inst // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (IsPowerOfTwo(std::abs(imm))) { + } else if (IsPowerOfTwo(AbsOrMin(imm))) { DivRemByPowerOfTwo(instruction); } else { DCHECK(imm <= -2 || imm >= 2); @@ -3358,8 +3429,8 @@ void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathMIPS(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCodeMIPS* slow_path = + deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), @@ -3371,6 +3442,10 @@ void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ Nop(); + } codegen_->RecordPcInfo(info, info->GetDexPc()); } @@ -3457,8 +3532,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, // Need to move to FP regs since FP results are returned in core registers. __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), locations->Out().AsFpuRegister<FRegister>()); - __ Mthc1(locations->GetTemp(2).AsRegister<Register>(), - locations->Out().AsFpuRegister<FRegister>()); + __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(), + locations->Out().AsFpuRegister<FRegister>()); } } else { if (!Primitive::IsFloatingPointType(type)) { @@ -3578,8 +3653,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, // Pass FP parameters in core registers. __ Mfc1(locations->GetTemp(1).AsRegister<Register>(), locations->InAt(1).AsFpuRegister<FRegister>()); - __ Mfhc1(locations->GetTemp(2).AsRegister<Register>(), - locations->InAt(1).AsFpuRegister<FRegister>()); + __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(), + locations->InAt(1).AsFpuRegister<FRegister>()); } codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store), instruction, @@ -4536,14 +4611,12 @@ void InstructionCodeGeneratorMIPS::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UN codegen_->GenerateFrameExit(); } -void LocationsBuilderMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; - UNREACHABLE(); +void LocationsBuilderMIPS::VisitRor(HRor* ror) { + HandleShift(ror); } -void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; - UNREACHABLE(); +void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror) { + HandleShift(ror); } void LocationsBuilderMIPS::VisitShl(HShl* shl) { @@ -4731,6 +4804,7 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) { Primitive::Type input_type = conversion->GetInputType(); Primitive::Type result_type = conversion->GetResultType(); DCHECK_NE(input_type, result_type); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) || (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) { @@ -4738,8 +4812,9 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) { } LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) || - (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) { + if (!isR6 && + ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) || + (result_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(input_type)))) { call_kind = LocationSummary::kCall; } @@ -4777,6 +4852,8 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi Primitive::Type result_type = conversion->GetResultType(); Primitive::Type input_type = conversion->GetInputType(); bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + bool fpu_32bit = codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint(); DCHECK_NE(input_type, result_type); @@ -4822,7 +4899,37 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi << " to " << result_type; } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) { - if (input_type != Primitive::kPrimLong) { + if (input_type == Primitive::kPrimLong) { + if (isR6) { + // cvt.s.l/cvt.d.l requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary + // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction. + Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register src_low = locations->InAt(0).AsRegisterPairLow<Register>(); + FRegister dst = locations->Out().AsFpuRegister<FRegister>(); + __ Mtc1(src_low, FTMP); + __ Mthc1(src_high, FTMP); + if (result_type == Primitive::kPrimFloat) { + __ Cvtsl(dst, FTMP); + } else { + __ Cvtdl(dst, FTMP); + } + } else { + int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f) + : QUICK_ENTRY_POINT(pL2d); + bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f) + : IsDirectEntrypoint(kQuickL2d); + codegen_->InvokeRuntime(entry_offset, + conversion, + conversion->GetDexPc(), + nullptr, + direct); + if (result_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + } else { + CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + } + } + } else { Register src = locations->InAt(0).AsRegister<Register>(); FRegister dst = locations->Out().AsFpuRegister<FRegister>(); __ Mtc1(src, FTMP); @@ -4831,54 +4938,168 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi } else { __ Cvtdw(dst, FTMP); } - } else { - int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f) - : QUICK_ENTRY_POINT(pL2d); - bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f) - : IsDirectEntrypoint(kQuickL2d); - codegen_->InvokeRuntime(entry_offset, - conversion, - conversion->GetDexPc(), - nullptr, - direct); - if (result_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickL2f, float, int64_t>(); - } else { - CheckEntrypointTypes<kQuickL2d, double, int64_t>(); - } } } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); - int32_t entry_offset; - bool direct; - if (result_type != Primitive::kPrimLong) { - entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz) - : QUICK_ENTRY_POINT(pD2iz); - direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2iz) - : IsDirectEntrypoint(kQuickD2iz); + if (result_type == Primitive::kPrimLong) { + if (isR6) { + // trunc.l.s/trunc.l.d requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary + // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction. + FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); + Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); + Register dst_low = locations->Out().AsRegisterPairLow<Register>(); + MipsLabel truncate; + MipsLabel done; + + // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive + // value when the input is either a NaN or is outside of the range of the output type + // after the truncation. IOW, the three special cases (NaN, too small, too big) produce + // the same result. + // + // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum + // value of the output type if the input is outside of the range after the truncation or + // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct + // results. This matches the desired float/double-to-int/long conversion exactly. + // + // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction. + // + // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate + // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6, + // even though it must be NAN2008=1 on R6. + // + // The code takes care of the different behaviors by first comparing the input to the + // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int). + // If the input is greater than or equal to the minimum, it procedes to the truncate + // instruction, which will handle such an input the same way irrespective of NAN2008. + // Otherwise the input is compared to itself to determine whether it is a NaN or not + // in order to return either zero or the minimum value. + // + // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the + // truncate instruction for MIPS64R6. + if (input_type == Primitive::kPrimFloat) { + uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min()); + __ LoadConst32(TMP, min_val); + __ Mtc1(TMP, FTMP); + __ CmpLeS(FTMP, FTMP, src); + } else { + uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min()); + __ LoadConst32(TMP, High32Bits(min_val)); + __ Mtc1(ZERO, FTMP); + __ Mthc1(TMP, FTMP); + __ CmpLeD(FTMP, FTMP, src); + } + + __ Bc1nez(FTMP, &truncate); + + if (input_type == Primitive::kPrimFloat) { + __ CmpEqS(FTMP, src, src); + } else { + __ CmpEqD(FTMP, src, src); + } + __ Move(dst_low, ZERO); + __ LoadConst32(dst_high, std::numeric_limits<int32_t>::min()); + __ Mfc1(TMP, FTMP); + __ And(dst_high, dst_high, TMP); + + __ B(&done); + + __ Bind(&truncate); + + if (input_type == Primitive::kPrimFloat) { + __ TruncLS(FTMP, src); + } else { + __ TruncLD(FTMP, src); + } + __ Mfc1(dst_low, FTMP); + __ Mfhc1(dst_high, FTMP); + + __ Bind(&done); + } else { + int32_t entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l) + : QUICK_ENTRY_POINT(pD2l); + bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l) + : IsDirectEntrypoint(kQuickD2l); + codegen_->InvokeRuntime(entry_offset, conversion, conversion->GetDexPc(), nullptr, direct); + if (input_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + } else { + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + } + } } else { - entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l) - : QUICK_ENTRY_POINT(pD2l); - direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l) - : IsDirectEntrypoint(kQuickD2l); - } - codegen_->InvokeRuntime(entry_offset, - conversion, - conversion->GetDexPc(), - nullptr, - direct); - if (result_type != Primitive::kPrimLong) { + FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); + Register dst = locations->Out().AsRegister<Register>(); + MipsLabel truncate; + MipsLabel done; + + // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate + // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6, + // even though it must be NAN2008=1 on R6. + // + // For details see the large comment above for the truncation of float/double to long on R6. + // + // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the + // truncate instruction for MIPS64R6. if (input_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickF2iz, int32_t, float>(); + uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min()); + __ LoadConst32(TMP, min_val); + __ Mtc1(TMP, FTMP); } else { - CheckEntrypointTypes<kQuickD2iz, int32_t, double>(); + uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min()); + __ LoadConst32(TMP, High32Bits(min_val)); + __ Mtc1(ZERO, FTMP); + if (fpu_32bit) { + __ Mtc1(TMP, static_cast<FRegister>(FTMP + 1)); + } else { + __ Mthc1(TMP, FTMP); + } } - } else { + + if (isR6) { + if (input_type == Primitive::kPrimFloat) { + __ CmpLeS(FTMP, FTMP, src); + } else { + __ CmpLeD(FTMP, FTMP, src); + } + __ Bc1nez(FTMP, &truncate); + + if (input_type == Primitive::kPrimFloat) { + __ CmpEqS(FTMP, src, src); + } else { + __ CmpEqD(FTMP, src, src); + } + __ LoadConst32(dst, std::numeric_limits<int32_t>::min()); + __ Mfc1(TMP, FTMP); + __ And(dst, dst, TMP); + } else { + if (input_type == Primitive::kPrimFloat) { + __ ColeS(0, FTMP, src); + } else { + __ ColeD(0, FTMP, src); + } + __ Bc1t(0, &truncate); + + if (input_type == Primitive::kPrimFloat) { + __ CeqS(0, src, src); + } else { + __ CeqD(0, src, src); + } + __ LoadConst32(dst, std::numeric_limits<int32_t>::min()); + __ Movf(dst, ZERO, 0); + } + + __ B(&done); + + __ Bind(&truncate); + if (input_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + __ TruncWS(FTMP, src); } else { - CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + __ TruncWD(FTMP, src); } + __ Mfc1(dst, FTMP); + + __ Bind(&done); } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsFloatingPointType(input_type)) { diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 38302ad315..c3d4851ee9 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -197,7 +197,7 @@ class LocationsBuilderMIPS : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS); }; -class InstructionCodeGeneratorMIPS : public HGraphVisitor { +class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { public: InstructionCodeGeneratorMIPS(HGraph* graph, CodeGeneratorMIPS* codegen); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 05834ff063..38c32cad06 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -391,24 +391,24 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: - explicit DeoptimizationSlowPathMIPS64(HInstruction* instruction) + explicit DeoptimizationSlowPathMIPS64(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - DCHECK(instruction_->IsDeoptimize()); - HDeoptimize* deoptimize = instruction_->AsDeoptimize(); - uint32_t dex_pc = deoptimize->GetDexPc(); - CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), + instruction_, + instruction_->GetDexPc(), + this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64); }; @@ -1113,7 +1113,7 @@ void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruc InstructionCodeGeneratorMIPS64::InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -1247,7 +1247,7 @@ void InstructionCodeGeneratorMIPS64::HandleBinaryOp(HBinaryOperation* instructio } void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) { - DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); + DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); Primitive::Type type = instr->GetResultType(); @@ -1265,7 +1265,7 @@ void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) { } void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { - DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); + DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); LocationSummary* locations = instr->GetLocations(); Primitive::Type type = instr->GetType(); @@ -1290,13 +1290,19 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { ? static_cast<uint32_t>(rhs_imm & kMaxIntShiftValue) : static_cast<uint32_t>(rhs_imm & kMaxLongShiftValue); - if (type == Primitive::kPrimInt) { + if (shift_value == 0) { + if (dst != lhs) { + __ Move(dst, lhs); + } + } else if (type == Primitive::kPrimInt) { if (instr->IsShl()) { __ Sll(dst, lhs, shift_value); } else if (instr->IsShr()) { __ Sra(dst, lhs, shift_value); - } else { + } else if (instr->IsUShr()) { __ Srl(dst, lhs, shift_value); + } else { + __ Rotr(dst, lhs, shift_value); } } else { if (shift_value < 32) { @@ -1304,8 +1310,10 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { __ Dsll(dst, lhs, shift_value); } else if (instr->IsShr()) { __ Dsra(dst, lhs, shift_value); - } else { + } else if (instr->IsUShr()) { __ Dsrl(dst, lhs, shift_value); + } else { + __ Drotr(dst, lhs, shift_value); } } else { shift_value -= 32; @@ -1313,8 +1321,10 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { __ Dsll32(dst, lhs, shift_value); } else if (instr->IsShr()) { __ Dsra32(dst, lhs, shift_value); - } else { + } else if (instr->IsUShr()) { __ Dsrl32(dst, lhs, shift_value); + } else { + __ Drotr32(dst, lhs, shift_value); } } } @@ -1324,16 +1334,20 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { __ Sllv(dst, lhs, rhs_reg); } else if (instr->IsShr()) { __ Srav(dst, lhs, rhs_reg); - } else { + } else if (instr->IsUShr()) { __ Srlv(dst, lhs, rhs_reg); + } else { + __ Rotrv(dst, lhs, rhs_reg); } } else { if (instr->IsShl()) { __ Dsllv(dst, lhs, rhs_reg); } else if (instr->IsShr()) { __ Dsrav(dst, lhs, rhs_reg); - } else { + } else if (instr->IsUShr()) { __ Dsrlv(dst, lhs, rhs_reg); + } else { + __ Drotrv(dst, lhs, rhs_reg); } } } @@ -1955,8 +1969,7 @@ void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instru GpuRegister out = locations->Out().AsRegister<GpuRegister>(); GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); int64_t imm = Int64FromConstant(second.GetConstant()); - uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm)); - DCHECK(IsPowerOfTwo(abs_imm)); + uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); int ctz_imm = CTZ(abs_imm); if (instruction->IsDiv()) { @@ -2138,7 +2151,7 @@ void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* in // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (IsPowerOfTwo(std::abs(imm))) { + } else if (IsPowerOfTwo(AbsOrMin(imm))) { DivRemByPowerOfTwo(instruction); } else { DCHECK(imm <= -2 || imm >= 2); @@ -2736,9 +2749,8 @@ void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) - DeoptimizationSlowPathMIPS64(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCodeMIPS64* slow_path = + deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS64>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), @@ -2750,6 +2762,10 @@ void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ Nop(); + } codegen_->RecordPcInfo(info, info->GetDexPc()); } @@ -3722,14 +3738,12 @@ void InstructionCodeGeneratorMIPS64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_ codegen_->GenerateFrameExit(); } -void LocationsBuilderMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; - UNREACHABLE(); +void LocationsBuilderMIPS64::VisitRor(HRor* ror) { + HandleShift(ror); } -void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; - UNREACHABLE(); +void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror) { + HandleShift(ror); } void LocationsBuilderMIPS64::VisitShl(HShl* shl) { @@ -3918,36 +3932,18 @@ void LocationsBuilderMIPS64::VisitTypeConversion(HTypeConversion* conversion) { LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; } - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) || - (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) { - call_kind = LocationSummary::kCall; - } - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion); - if (call_kind == LocationSummary::kNoCall) { - if (Primitive::IsFloatingPointType(input_type)) { - locations->SetInAt(0, Location::RequiresFpuRegister()); - } else { - locations->SetInAt(0, Location::RequiresRegister()); - } - - if (Primitive::IsFloatingPointType(result_type)) { - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } + if (Primitive::IsFloatingPointType(input_type)) { + locations->SetInAt(0, Location::RequiresFpuRegister()); } else { - InvokeRuntimeCallingConvention calling_convention; - - if (Primitive::IsFloatingPointType(input_type)) { - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - } else { - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - } + locations->SetInAt(0, Location::RequiresRegister()); + } - locations->SetOut(calling_convention.GetReturnLocation(result_type)); + if (Primitive::IsFloatingPointType(result_type)) { + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + } else { + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } } @@ -3992,55 +3988,107 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver << " to " << result_type; } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) { - if (input_type != Primitive::kPrimLong) { - FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); - GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); - __ Mtc1(src, FTMP); + FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); + GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); + if (input_type == Primitive::kPrimLong) { + __ Dmtc1(src, FTMP); if (result_type == Primitive::kPrimFloat) { - __ Cvtsw(dst, FTMP); + __ Cvtsl(dst, FTMP); } else { - __ Cvtdw(dst, FTMP); + __ Cvtdl(dst, FTMP); } } else { - int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f) - : QUICK_ENTRY_POINT(pL2d); - codegen_->InvokeRuntime(entry_offset, - conversion, - conversion->GetDexPc(), - nullptr); + __ Mtc1(src, FTMP); if (result_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + __ Cvtsw(dst, FTMP); } else { - CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + __ Cvtdw(dst, FTMP); } } } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); - int32_t entry_offset; - if (result_type != Primitive::kPrimLong) { - entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz) - : QUICK_ENTRY_POINT(pD2iz); + GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); + FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>(); + Mips64Label truncate; + Mips64Label done; + + // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive + // value when the input is either a NaN or is outside of the range of the output type + // after the truncation. IOW, the three special cases (NaN, too small, too big) produce + // the same result. + // + // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum + // value of the output type if the input is outside of the range after the truncation or + // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct + // results. This matches the desired float/double-to-int/long conversion exactly. + // + // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction. + // + // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate + // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6, + // even though it must be NAN2008=1 on R6. + // + // The code takes care of the different behaviors by first comparing the input to the + // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int). + // If the input is greater than or equal to the minimum, it procedes to the truncate + // instruction, which will handle such an input the same way irrespective of NAN2008. + // Otherwise the input is compared to itself to determine whether it is a NaN or not + // in order to return either zero or the minimum value. + // + // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the + // truncate instruction for MIPS64R6. + if (input_type == Primitive::kPrimFloat) { + uint32_t min_val = (result_type == Primitive::kPrimLong) + ? bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min()) + : bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min()); + __ LoadConst32(TMP, min_val); + __ Mtc1(TMP, FTMP); + __ CmpLeS(FTMP, FTMP, src); } else { - entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l) - : QUICK_ENTRY_POINT(pD2l); + uint64_t min_val = (result_type == Primitive::kPrimLong) + ? bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min()) + : bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min()); + __ LoadConst64(TMP, min_val); + __ Dmtc1(TMP, FTMP); + __ CmpLeD(FTMP, FTMP, src); } - codegen_->InvokeRuntime(entry_offset, - conversion, - conversion->GetDexPc(), - nullptr); - if (result_type != Primitive::kPrimLong) { + + __ Bc1nez(FTMP, &truncate); + + if (input_type == Primitive::kPrimFloat) { + __ CmpEqS(FTMP, src, src); + } else { + __ CmpEqD(FTMP, src, src); + } + if (result_type == Primitive::kPrimLong) { + __ LoadConst64(dst, std::numeric_limits<int64_t>::min()); + } else { + __ LoadConst32(dst, std::numeric_limits<int32_t>::min()); + } + __ Mfc1(TMP, FTMP); + __ And(dst, dst, TMP); + + __ Bc(&done); + + __ Bind(&truncate); + + if (result_type == Primitive::kPrimLong) { if (input_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickF2iz, int32_t, float>(); + __ TruncLS(FTMP, src); } else { - CheckEntrypointTypes<kQuickD2iz, int32_t, double>(); + __ TruncLD(FTMP, src); } + __ Dmfc1(dst, FTMP); } else { if (input_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + __ TruncWS(FTMP, src); } else { - CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + __ TruncWD(FTMP, src); } + __ Mfc1(dst, FTMP); } + + __ Bind(&done); } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsFloatingPointType(input_type)) { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 60ff96dc43..7182e8e987 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -201,7 +201,7 @@ class LocationsBuilderMIPS64 : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS64); }; -class InstructionCodeGeneratorMIPS64 : public HGraphVisitor { +class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { public: InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 86327fb741..6ab3aaff4b 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -365,11 +365,10 @@ class TypeCheckSlowPathX86 : public SlowPathCode { class DeoptimizationSlowPathX86 : public SlowPathCode { public: - explicit DeoptimizationSlowPathX86(HInstruction* instruction) + explicit DeoptimizationSlowPathX86(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - DCHECK(instruction_->IsDeoptimize()); CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); @@ -383,7 +382,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86); }; @@ -892,7 +891,7 @@ void CodeGeneratorX86::UpdateBlockedPairRegisters() const { } InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -1611,9 +1610,7 @@ void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) - DeoptimizationSlowPathX86(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), @@ -1625,6 +1622,10 @@ void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ nop(); + } codegen_->RecordPcInfo(info, info->GetDexPc()); } @@ -3223,11 +3224,12 @@ void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) { Register out_register = locations->Out().AsRegister<Register>(); Register input_register = locations->InAt(0).AsRegister<Register>(); int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + DCHECK(IsPowerOfTwo(AbsOrMin(imm))); + uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); - DCHECK(IsPowerOfTwo(std::abs(imm))); Register num = locations->GetTemp(0).AsRegister<Register>(); - __ leal(num, Address(input_register, std::abs(imm) - 1)); + __ leal(num, Address(input_register, abs_imm - 1)); __ testl(input_register, input_register); __ cmovl(kGreaterEqual, num, input_register); int shift = CTZ(imm); @@ -3340,7 +3342,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr // Do not generate anything for 0. DivZeroCheck would forbid any generated code. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (is_div && IsPowerOfTwo(std::abs(imm))) { + } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) { DivByPowerOfTwo(instruction->AsDiv()); } else { DCHECK(imm <= -2 || imm >= 2); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index df7347658b..c65c423eae 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -178,7 +178,7 @@ class LocationsBuilderX86 : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86); }; -class InstructionCodeGeneratorX86 : public HGraphVisitor { +class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { public: InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 76a4ce2e93..294b40e3d4 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -387,18 +387,16 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { class DeoptimizationSlowPathX86_64 : public SlowPathCode { public: - explicit DeoptimizationSlowPathX86_64(HInstruction* instruction) + explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - DCHECK(instruction_->IsDeoptimize()); - HDeoptimize* deoptimize = instruction_->AsDeoptimize(); x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), - deoptimize, - deoptimize->GetDexPc(), + instruction_, + instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } @@ -406,7 +404,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64); }; @@ -1000,7 +998,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -1594,9 +1592,7 @@ void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) - DeoptimizationSlowPathX86_64(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), @@ -1608,6 +1604,10 @@ void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ nop(); + } codegen_->RecordPcInfo(info, info->GetDexPc()); } @@ -3350,13 +3350,13 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>(); int64_t imm = Int64FromConstant(second.GetConstant()); - - DCHECK(IsPowerOfTwo(std::abs(imm))); + DCHECK(IsPowerOfTwo(AbsOrMin(imm))); + uint64_t abs_imm = AbsOrMin(imm); CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); if (instruction->GetResultType() == Primitive::kPrimInt) { - __ leal(tmp, Address(numerator, std::abs(imm) - 1)); + __ leal(tmp, Address(numerator, abs_imm - 1)); __ testl(numerator, numerator); __ cmov(kGreaterEqual, tmp, numerator); int shift = CTZ(imm); @@ -3371,7 +3371,7 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>(); - codegen_->Load64BitValue(rdx, std::abs(imm) - 1); + codegen_->Load64BitValue(rdx, abs_imm - 1); __ addq(rdx, numerator); __ testq(numerator, numerator); __ cmov(kGreaterEqual, rdx, numerator); @@ -3529,7 +3529,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))) { + } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) { DivByPowerOfTwo(instruction->AsDiv()); } else { DCHECK(imm <= -2 || imm >= 2); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index c5e8a04da6..505c9dcdad 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -183,7 +183,7 @@ class LocationsBuilderX86_64 : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64); }; -class InstructionCodeGeneratorX86_64 : public HGraphVisitor { +class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { public: InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen); diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index c504ded54c..b90afb1d73 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -211,19 +211,6 @@ bool InstructionSimplifierVisitor::ReplaceRotateWithRor(HBinaryOperation* op, // Try to replace a binary operation flanked by one UShr and one Shl with a bitfield rotation. bool InstructionSimplifierVisitor::TryReplaceWithRotate(HBinaryOperation* op) { - // This simplification is currently supported on x86, x86_64, ARM and ARM64. - // TODO: Implement it for MIPS/64. - const InstructionSet instruction_set = GetGraph()->GetInstructionSet(); - switch (instruction_set) { - case kArm: - case kArm64: - case kThumb2: - case kX86: - case kX86_64: - break; - default: - return false; - } DCHECK(op->IsAdd() || op->IsXor() || op->IsOr()); HInstruction* left = op->GetLeft(); HInstruction* right = op->GetRight(); @@ -1261,19 +1248,6 @@ void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) { void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke, bool is_left) { DCHECK(invoke->IsInvokeStaticOrDirect()); DCHECK_EQ(invoke->GetOriginalInvokeType(), InvokeType::kStatic); - // This simplification is currently supported on x86, x86_64, ARM and ARM64. - // TODO: Implement it for MIPS/64. - const InstructionSet instruction_set = GetGraph()->GetInstructionSet(); - switch (instruction_set) { - case kArm: - case kArm64: - case kThumb2: - case kX86: - case kX86_64: - break; - default: - return; - } HInstruction* value = invoke->InputAt(0); HInstruction* distance = invoke->InputAt(1); // Replace the invoke with an HRor. diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 4683aee603..b1fbf28204 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -502,9 +502,6 @@ static void GenUnsafeGet(HInvoke* invoke, bool is_volatile, CodeGeneratorARM* codegen) { LocationSummary* locations = invoke->GetLocations(); - DCHECK((type == Primitive::kPrimInt) || - (type == Primitive::kPrimLong) || - (type == Primitive::kPrimNot)); ArmAssembler* assembler = codegen->GetAssembler(); Location base_loc = locations->InAt(1); Register base = base_loc.AsRegister<Register>(); // Object pointer. @@ -512,30 +509,67 @@ static void GenUnsafeGet(HInvoke* invoke, Register offset = offset_loc.AsRegisterPairLow<Register>(); // Long offset, lo part only. Location trg_loc = locations->Out(); - if (type == Primitive::kPrimLong) { - Register trg_lo = trg_loc.AsRegisterPairLow<Register>(); - __ add(IP, base, ShifterOperand(offset)); - if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { - Register trg_hi = trg_loc.AsRegisterPairHigh<Register>(); - __ ldrexd(trg_lo, trg_hi, IP); - } else { - __ ldrd(trg_lo, Address(IP)); + switch (type) { + case Primitive::kPrimInt: { + Register trg = trg_loc.AsRegister<Register>(); + __ ldr(trg, Address(base, offset)); + if (is_volatile) { + __ dmb(ISH); + } + break; } - } else { - Register trg = trg_loc.AsRegister<Register>(); - __ ldr(trg, Address(base, offset)); - } - if (is_volatile) { - __ dmb(ISH); - } + case Primitive::kPrimNot: { + Register trg = trg_loc.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + codegen->GenerateArrayLoadWithBakerReadBarrier( + invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + if (is_volatile) { + __ dmb(ISH); + } + } else { + __ ldr(trg, Address(base, offset)); + if (is_volatile) { + __ dmb(ISH); + } + codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + } + } else { + __ ldr(trg, Address(base, offset)); + if (is_volatile) { + __ dmb(ISH); + } + __ MaybeUnpoisonHeapReference(trg); + } + break; + } - if (type == Primitive::kPrimNot) { - codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + case Primitive::kPrimLong: { + Register trg_lo = trg_loc.AsRegisterPairLow<Register>(); + __ add(IP, base, ShifterOperand(offset)); + if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { + Register trg_hi = trg_loc.AsRegisterPairHigh<Register>(); + __ ldrexd(trg_lo, trg_hi, IP); + } else { + __ ldrd(trg_lo, Address(IP)); + } + if (is_volatile) { + __ dmb(ISH); + } + break; + } + + default: + LOG(FATAL) << "Unexpected type " << type; + UNREACHABLE(); } } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + Primitive::Type type) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); @@ -548,25 +582,30 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in InstructionCodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } } void IntrinsicLocationsBuilderARM::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderARM::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderARM::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicLocationsBuilderARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicLocationsBuilderARM::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicLocationsBuilderARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) { @@ -808,6 +847,9 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat } // Prevent reordering with prior memory operations. + // Emit a DMB ISH instruction instead of an DMB ISHST one, as the + // latter allows a preceding load to be delayed past the STXR + // instruction below. __ dmb(ISH); __ add(tmp_ptr, base, ShifterOperand(offset)); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index f723940444..81cab86c83 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1035,7 +1035,11 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); __ Cbnz(tmp_32, &loop_head); } else { - __ Dmb(InnerShareable, BarrierWrites); + // Emit a `Dmb(InnerShareable, BarrierAll)` (DMB ISH) instruction + // instead of a `Dmb(InnerShareable, BarrierWrites)` (DMB ISHST) + // one, as the latter allows a preceding load to be delayed past + // the STXR instruction below. + __ Dmb(InnerShareable, BarrierAll); __ Bind(&loop_head); // TODO: When `type == Primitive::kPrimNot`, add a read barrier for // the reference stored in the object before attempting the CAS, diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 06fab616ad..bc126a2716 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -43,14 +43,18 @@ ArenaAllocator* IntrinsicCodeGeneratorMIPS::GetAllocator() { return codegen_->GetGraph()->GetArena(); } -inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() { +inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() const { return codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); } -inline bool IntrinsicCodeGeneratorMIPS::IsR6() { +inline bool IntrinsicCodeGeneratorMIPS::IsR6() const { return codegen_->GetInstructionSetFeatures().IsR6(); } +inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const { + return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint(); +} + #define __ codegen->GetAssembler()-> static void MoveFromReturnRegister(Location trg, @@ -162,7 +166,7 @@ static void MoveFPToInt(LocationSummary* locations, bool is64bit, MipsAssembler* Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); __ Mfc1(out_lo, in); - __ Mfhc1(out_hi, in); + __ MoveFromFpuHigh(out_hi, in); } else { Register out = locations->Out().AsRegister<Register>(); @@ -204,7 +208,7 @@ static void MoveIntToFP(LocationSummary* locations, bool is64bit, MipsAssembler* Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); __ Mtc1(in_lo, out); - __ Mthc1(in_hi, out); + __ MoveToFpuHigh(in_hi, out); } else { Register in = locations->InAt(0).AsRegister<Register>(); diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h index f86b0efe4a..575a7d0a23 100644 --- a/compiler/optimizing/intrinsics_mips.h +++ b/compiler/optimizing/intrinsics_mips.h @@ -67,8 +67,9 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS - bool IsR2OrNewer(void); - bool IsR6(void); + bool IsR2OrNewer() const; + bool IsR6() const; + bool Is32BitFPU() const; private: MipsAssembler* GetAssembler(); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index f5a7048b01..b80c6bde82 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2183,10 +2183,7 @@ void HInvoke::SetIntrinsic(Intrinsics intrinsic, IntrinsicExceptions exceptions) { intrinsic_ = intrinsic; IntrinsicOptimizations opt(this); - if (needs_env_or_cache == kNoEnvironmentOrCache) { - opt.SetDoesNotNeedDexCache(); - opt.SetDoesNotNeedEnvironment(); - } + // Adjust method's side effects from intrinsic table. switch (side_effects) { case kNoSideEffects: SetSideEffects(SideEffects::None()); break; @@ -2194,6 +2191,14 @@ void HInvoke::SetIntrinsic(Intrinsics intrinsic, case kWriteSideEffects: SetSideEffects(SideEffects::AllWrites()); break; case kAllSideEffects: SetSideEffects(SideEffects::AllExceptGCDependency()); break; } + + if (needs_env_or_cache == kNoEnvironmentOrCache) { + opt.SetDoesNotNeedDexCache(); + opt.SetDoesNotNeedEnvironment(); + } else { + // If we need an environment, that means there will be a call, which can trigger GC. + SetSideEffects(GetSideEffects().Union(SideEffects::CanTriggerGC())); + } // Adjust method's exception status from intrinsic table. switch (exceptions) { case kNoThrow: SetCanThrow(false); break; @@ -2325,4 +2330,19 @@ HInstruction* HGraph::InsertOppositeCondition(HInstruction* cond, HInstruction* } } +std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs) { + os << "[" + << " source=" << rhs.GetSource() + << " destination=" << rhs.GetDestination() + << " type=" << rhs.GetType() + << " instruction="; + if (rhs.GetInstruction() != nullptr) { + os << rhs.GetInstruction()->DebugName() << ' ' << rhs.GetInstruction()->GetId(); + } else { + os << "null"; + } + os << " ]"; + return os; +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 59c07690b1..23132308f0 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1881,6 +1881,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { return false; } + virtual bool IsActualObject() const { + return GetType() == Primitive::kPrimNot; + } + void SetReferenceTypeInfo(ReferenceTypeInfo rti); ReferenceTypeInfo GetReferenceTypeInfo() const { @@ -2500,8 +2504,10 @@ class HTryBoundary : public HTemplateInstruction<0> { // Deoptimize to interpreter, upon checking a condition. class HDeoptimize : public HTemplateInstruction<1> { public: + // We set CanTriggerGC to prevent any intermediate address to be live + // at the point of the `HDeoptimize`. HDeoptimize(HInstruction* cond, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None(), dex_pc) { + : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, cond); } @@ -4017,8 +4023,10 @@ class HRem : public HBinaryOperation { class HDivZeroCheck : public HExpression<1> { public: + // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException` + // constructor. HDivZeroCheck(HInstruction* value, uint32_t dex_pc) - : HExpression(value->GetType(), SideEffects::None(), dex_pc) { + : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, value); } @@ -4539,8 +4547,10 @@ class HPhi : public HInstruction { class HNullCheck : public HExpression<1> { public: + // `HNullCheck` can trigger GC, as it may call the `NullPointerException` + // constructor. HNullCheck(HInstruction* value, uint32_t dex_pc) - : HExpression(value->GetType(), SideEffects::None(), dex_pc) { + : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, value); } @@ -4861,8 +4871,10 @@ class HArrayLength : public HExpression<1> { class HBoundsCheck : public HExpression<2> { public: + // `HBoundsCheck` can trigger GC, as it may call the `IndexOutOfBoundsException` + // constructor. HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc) - : HExpression(index->GetType(), SideEffects::None(), dex_pc) { + : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc) { DCHECK(index->GetType() == Primitive::kPrimInt); SetRawInputAt(0, index); SetRawInputAt(1, length); @@ -5626,8 +5638,8 @@ class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> { } bool IsPending() const { - DCHECK(!source_.IsInvalid() || destination_.IsInvalid()); - return destination_.IsInvalid() && !source_.IsInvalid(); + DCHECK(source_.IsValid() || destination_.IsInvalid()); + return destination_.IsInvalid() && source_.IsValid(); } // True if this blocks a move from the given location. @@ -5671,6 +5683,8 @@ class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> { HInstruction* instruction_; }; +std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs); + static constexpr size_t kDefaultNumberOfMoves = 4; class HParallelMove : public HTemplateInstruction<0> { diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h index 18405f2623..445cdab191 100644 --- a/compiler/optimizing/nodes_arm64.h +++ b/compiler/optimizing/nodes_arm64.h @@ -107,6 +107,7 @@ class HArm64IntermediateAddress : public HExpression<2> { bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; } + bool IsActualObject() const OVERRIDE { return false; } HInstruction* GetBaseAddress() const { return InputAt(0); } HInstruction* GetOffset() const { return InputAt(1); } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index cafc6c5440..bb840eabdd 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -17,6 +17,7 @@ #include "optimizing_compiler.h" #include <fstream> +#include <memory> #include <stdint.h> #ifdef ART_ENABLE_CODEGEN_arm64 @@ -52,6 +53,8 @@ #include "driver/compiler_driver-inl.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" +#include "dwarf/method_debug_info.h" +#include "elf_writer_debug.h" #include "elf_writer_quick.h" #include "graph_checker.h" #include "graph_visualizer.h" @@ -60,6 +63,7 @@ #include "inliner.h" #include "instruction_simplifier.h" #include "intrinsics.h" +#include "jit/debugger_interface.h" #include "jit/jit_code_cache.h" #include "licm.h" #include "jni/quick/jni_compiler.h" @@ -68,6 +72,7 @@ #include "prepare_for_register_allocation.h" #include "reference_type_propagation.h" #include "register_allocator.h" +#include "oat_quick_method_header.h" #include "sharpening.h" #include "side_effects_analysis.h" #include "ssa_builder.h" @@ -965,6 +970,39 @@ bool OptimizingCompiler::JitCompile(Thread* self, return false; } + if (GetCompilerDriver()->GetCompilerOptions().GetGenerateDebugInfo()) { + const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); + const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); + CompiledMethod compiled_method( + GetCompilerDriver(), + codegen->GetInstructionSet(), + ArrayRef<const uint8_t>(code_allocator.GetMemory()), + codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), + codegen->GetCoreSpillMask(), + codegen->GetFpuSpillMask(), + ArrayRef<const SrcMapElem>(), + ArrayRef<const uint8_t>(), // mapping_table. + ArrayRef<const uint8_t>(stack_map_data, stack_map_size), + ArrayRef<const uint8_t>(), // native_gc_map. + ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), + ArrayRef<const LinkerPatch>()); + dwarf::MethodDebugInfo method_debug_info { + dex_file, + class_def_idx, + method_idx, + access_flags, + code_item, + false, // deduped. + code_address, + code_address + code_allocator.GetSize(), + &compiled_method + }; + ArrayRef<const uint8_t> elf_file = dwarf::WriteDebugElfFileForMethod(method_debug_info); + CreateJITCodeEntryForAddress(code_address, + std::unique_ptr<const uint8_t[]>(elf_file.data()), + elf_file.size()); + } + return true; } diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 176c50ce21..9d136f3ae6 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include <iostream> #include "parallel_move_resolver.h" @@ -172,7 +171,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { i = -1; } else if (required_swap != nullptr) { // A move is required to swap. We walk back the cycle to find the - // move by just returning from this `PerforrmMove`. + // move by just returning from this `PerformMove`. moves_[index]->ClearPending(destination); return required_swap; } @@ -201,7 +200,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { } else { for (MoveOperands* other_move : moves_) { if (other_move->Blocks(destination)) { - DCHECK(other_move->IsPending()); + DCHECK(other_move->IsPending()) << "move=" << *move << " other_move=" << *other_move; if (!move->Is64BitMove() && other_move->Is64BitMove()) { // We swap 64bits moves before swapping 32bits moves. Go back from the // cycle by returning the move that must be swapped. diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index d1770b75ab..63ef600756 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -96,7 +96,7 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { if (can_merge_with_load_class && !load_class->HasUses()) { load_class->GetBlock()->RemoveInstruction(load_class); } - } else if (can_merge_with_load_class) { + } else if (can_merge_with_load_class && !load_class->NeedsAccessCheck()) { // Pass the initialization duty to the `HLoadClass` instruction, // and remove the instruction from the graph. load_class->SetMustGenerateClinitCheck(true); diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 5ab4547e22..2bae4bc5c8 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -1679,6 +1679,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { LocationSummary* locations = safepoint_position->GetLocations(); if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) { + DCHECK(interval->GetDefinedBy()->IsActualObject()) + << interval->GetDefinedBy()->DebugName() + << "@" << safepoint_position->GetInstruction()->DebugName(); locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize); } @@ -1691,6 +1694,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { maximum_number_of_live_fp_registers_); } if (current->GetType() == Primitive::kPrimNot) { + DCHECK(interval->GetDefinedBy()->IsActualObject()) + << interval->GetDefinedBy()->DebugName() + << "@" << safepoint_position->GetInstruction()->DebugName(); locations->SetRegisterBit(source.reg()); } break; diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index c60a4eacaa..4784de1380 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -270,7 +270,7 @@ void StackMapStream::FillIn(MemoryRegion region) { stack_map.SetStackMask(stack_map_encoding_, *entry.sp_mask); } - if (entry.num_dex_registers == 0) { + if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) { // No dex map available. stack_map.SetDexRegisterMapOffset(stack_map_encoding_, StackMap::kNoDexRegisterMap); } else { diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 560502fde6..604787fd92 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -614,6 +614,10 @@ TEST(StackMapTest, TestNoDexRegisterMap) { stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); stream.EndStackMapEntry(); + number_of_dex_registers = 1; + stream.BeginStackMapEntry(1, 67, 0x4, &sp_mask, number_of_dex_registers, 0); + stream.EndStackMapEntry(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); @@ -622,7 +626,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) { CodeInfo code_info(region); StackMapEncoding encoding = code_info.ExtractEncoding(); ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask()); - ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); + ASSERT_EQ(2u, code_info.GetNumberOfStackMaps()); uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(0u, number_of_location_catalog_entries); @@ -638,6 +642,16 @@ TEST(StackMapTest, TestNoDexRegisterMap) { ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding)); ASSERT_FALSE(stack_map.HasInlineInfo(encoding)); + + stack_map = code_info.GetStackMapAt(1, encoding); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1, encoding))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(67, encoding))); + ASSERT_EQ(1u, stack_map.GetDexPc(encoding)); + ASSERT_EQ(67u, stack_map.GetNativePcOffset(encoding)); + ASSERT_EQ(0x4u, stack_map.GetRegisterMask(encoding)); + + ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding)); + ASSERT_FALSE(stack_map.HasInlineInfo(encoding)); } TEST(StackMapTest, InlineTest) { |