diff options
Diffstat (limited to 'compiler/optimizing')
39 files changed, 3812 insertions, 883 deletions
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc index f985745e7a..f0cafc847f 100644 --- a/compiler/optimizing/boolean_simplifier.cc +++ b/compiler/optimizing/boolean_simplifier.cc @@ -61,40 +61,6 @@ static bool NegatesCondition(HInstruction* input_true, HInstruction* input_false && input_false->IsIntConstant() && input_false->AsIntConstant()->IsOne(); } -// Returns an instruction with the opposite boolean value from 'cond'. -static HInstruction* GetOppositeCondition(HInstruction* cond) { - HGraph* graph = cond->GetBlock()->GetGraph(); - ArenaAllocator* allocator = graph->GetArena(); - - if (cond->IsCondition()) { - HInstruction* lhs = cond->InputAt(0); - HInstruction* rhs = cond->InputAt(1); - switch (cond->AsCondition()->GetOppositeCondition()) { // get *opposite* - case kCondEQ: return new (allocator) HEqual(lhs, rhs); - case kCondNE: return new (allocator) HNotEqual(lhs, rhs); - case kCondLT: return new (allocator) HLessThan(lhs, rhs); - case kCondLE: return new (allocator) HLessThanOrEqual(lhs, rhs); - case kCondGT: return new (allocator) HGreaterThan(lhs, rhs); - case kCondGE: return new (allocator) HGreaterThanOrEqual(lhs, rhs); - case kCondB: return new (allocator) HBelow(lhs, rhs); - case kCondBE: return new (allocator) HBelowOrEqual(lhs, rhs); - case kCondA: return new (allocator) HAbove(lhs, rhs); - case kCondAE: return new (allocator) HAboveOrEqual(lhs, rhs); - } - } else if (cond->IsIntConstant()) { - HIntConstant* int_const = cond->AsIntConstant(); - if (int_const->IsZero()) { - return graph->GetIntConstant(1); - } else { - DCHECK(int_const->IsOne()); - return graph->GetIntConstant(0); - } - } - // General case when 'cond' is another instruction of type boolean, - // as verified by SSAChecker. - return new (allocator) HBooleanNot(cond); -} - void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) { DCHECK(block->EndsWithIf()); @@ -126,10 +92,7 @@ void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) { HInstruction* replacement; if (NegatesCondition(true_value, false_value)) { - replacement = GetOppositeCondition(if_condition); - if (replacement->GetBlock() == nullptr) { - block->InsertInstructionBefore(replacement, if_instruction); - } + replacement = graph_->InsertOppositeCondition(if_condition, if_instruction); } else if (PreservesCondition(true_value, false_value)) { replacement = if_condition; } else { diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 3257de1858..d7754e8ea9 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -876,12 +876,96 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, clinit_check); } +bool HGraphBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) { + bool finalizable; + bool can_throw = NeedsAccessCheck(type_index, &finalizable); + + // Only the non-resolved entrypoint handles the finalizable class case. If we + // need access checks, then we haven't resolved the method and the class may + // again be finalizable. + QuickEntrypointEnum entrypoint = (finalizable || can_throw) + ? kQuickAllocObject + : kQuickAllocObjectInitialized; + + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<3> hs(soa.Self()); + Handle<mirror::DexCache> dex_cache(hs.NewHandle( + dex_compilation_unit_->GetClassLinker()->FindDexCache( + soa.Self(), *dex_compilation_unit_->GetDexFile()))); + Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index))); + const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile(); + Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle( + outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file))); + + if (outer_dex_cache.Get() != dex_cache.Get()) { + // We currently do not support inlining allocations across dex files. + return false; + } + + HLoadClass* load_class = new (arena_) HLoadClass( + graph_->GetCurrentMethod(), + type_index, + outer_dex_file, + IsOutermostCompilingClass(type_index), + dex_pc, + /*needs_access_check*/ can_throw, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, type_index)); + + current_block_->AddInstruction(load_class); + HInstruction* cls = load_class; + if (!IsInitialized(resolved_class)) { + cls = new (arena_) HClinitCheck(load_class, dex_pc); + current_block_->AddInstruction(cls); + } + + current_block_->AddInstruction(new (arena_) HNewInstance( + cls, + graph_->GetCurrentMethod(), + dex_pc, + type_index, + *dex_compilation_unit_->GetDexFile(), + can_throw, + finalizable, + entrypoint)); + return true; +} + +static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class) + SHARED_REQUIRES(Locks::mutator_lock_) { + return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class); +} + +bool HGraphBuilder::IsInitialized(Handle<mirror::Class> cls) const { + if (cls.Get() == nullptr) { + return false; + } + + // `CanAssumeClassIsLoaded` will return true if we're JITting, or will + // check whether the class is in an image for the AOT compilation. + if (cls->IsInitialized() && + compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) { + return true; + } + + if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) { + return true; + } + + // TODO: We should walk over the inlined methods, but we don't pass + // that information to the builder. + if (IsSubClass(GetCompilingClass(), cls.Get())) { + return true; + } + + return false; +} + HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke( uint32_t dex_pc, uint32_t method_idx, HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) { ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<4> hs(soa.Self()); + StackHandleScope<5> hs(soa.Self()); Handle<mirror::DexCache> dex_cache(hs.NewHandle( dex_compilation_unit_->GetClassLinker()->FindDexCache( soa.Self(), *dex_compilation_unit_->GetDexFile()))); @@ -896,6 +980,7 @@ HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke( Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle( outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file))); Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass())); + Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass())); // The index at which the method's class is stored in the DexCache's type array. uint32_t storage_index = DexFile::kDexNoIndex; @@ -913,41 +998,21 @@ HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke( HClinitCheck* clinit_check = nullptr; - if (!outer_class->IsInterface() - && outer_class->IsSubClass(resolved_method->GetDeclaringClass())) { - // If the outer class is the declaring class or a subclass - // of the declaring class, no class initialization is needed - // before the static method call. - // Note that in case of inlining, we do not need to add clinit checks - // to calls that satisfy this subclass check with any inlined methods. This - // will be detected by the optimization passes. + if (IsInitialized(resolved_method_class)) { *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone; } else if (storage_index != DexFile::kDexNoIndex) { - // If the method's class type index is available, check - // whether we should add an explicit class initialization - // check for its declaring class before the static method call. - - // TODO: find out why this check is needed. - bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache( - *outer_compilation_unit_->GetDexFile(), storage_index); - bool is_initialized = - resolved_method->GetDeclaringClass()->IsInitialized() && is_in_dex_cache; - - if (is_initialized) { - *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone; - } else { - *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit; - HLoadClass* load_class = new (arena_) HLoadClass( - graph_->GetCurrentMethod(), - storage_index, - *dex_compilation_unit_->GetDexFile(), - is_outer_class, - dex_pc, - /*needs_access_check*/ false); - current_block_->AddInstruction(load_class); - clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); - current_block_->AddInstruction(clinit_check); - } + *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit; + HLoadClass* load_class = new (arena_) HLoadClass( + graph_->GetCurrentMethod(), + storage_index, + outer_dex_file, + is_outer_class, + dex_pc, + /*needs_access_check*/ false, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index)); + current_block_->AddInstruction(load_class); + clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); + current_block_->AddInstruction(clinit_check); } return clinit_check; } @@ -1272,7 +1337,7 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, uint16_t field_index = instruction.VRegB_21c(); ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<4> hs(soa.Self()); + StackHandleScope<5> hs(soa.Self()); Handle<mirror::DexCache> dex_cache(hs.NewHandle( dex_compilation_unit_->GetClassLinker()->FindDexCache( soa.Self(), *dex_compilation_unit_->GetDexFile()))); @@ -1318,26 +1383,26 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, } } - // TODO: find out why this check is needed. - bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache( - *outer_compilation_unit_->GetDexFile(), storage_index); - bool is_initialized = resolved_field->GetDeclaringClass()->IsInitialized() && is_in_dex_cache; - + bool is_in_cache = + compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index); HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(), storage_index, - *dex_compilation_unit_->GetDexFile(), + outer_dex_file, is_outer_class, dex_pc, - /*needs_access_check*/ false); + /*needs_access_check*/ false, + is_in_cache); current_block_->AddInstruction(constant); HInstruction* cls = constant; - if (!is_initialized && !is_outer_class) { + + Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass())); + if (!IsInitialized(klass)) { cls = new (arena_) HClinitCheck(constant, dex_pc); current_block_->AddInstruction(cls); } - uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex(); + uint16_t class_def_index = klass->GetDexClassDefIndex(); if (is_put) { // We need to keep the class alive before loading the value. Temporaries temps(graph_); @@ -1601,19 +1666,20 @@ void HGraphBuilder::BuildTypeCheck(const Instruction& instruction, ScopedObjectAccess soa(Thread::Current()); StackHandleScope<2> hs(soa.Self()); + const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); Handle<mirror::DexCache> dex_cache(hs.NewHandle( - dex_compilation_unit_->GetClassLinker()->FindDexCache( - soa.Self(), *dex_compilation_unit_->GetDexFile()))); + dex_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), dex_file))); Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index))); HInstruction* object = LoadLocal(reference, Primitive::kPrimNot, dex_pc); HLoadClass* cls = new (arena_) HLoadClass( graph_->GetCurrentMethod(), type_index, - *dex_compilation_unit_->GetDexFile(), + dex_file, IsOutermostCompilingClass(type_index), dex_pc, - !can_access); + !can_access, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_file, type_index)); current_block_->AddInstruction(cls); // The class needs a temporary before being used by the type check. @@ -2509,20 +2575,9 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 current_block_->AddInstruction(fake_string); UpdateLocal(register_index, fake_string, dex_pc); } else { - bool finalizable; - bool can_throw = NeedsAccessCheck(type_index, &finalizable); - QuickEntrypointEnum entrypoint = can_throw - ? kQuickAllocObjectWithAccessCheck - : kQuickAllocObject; - - current_block_->AddInstruction(new (arena_) HNewInstance( - graph_->GetCurrentMethod(), - dex_pc, - type_index, - *dex_compilation_unit_->GetDexFile(), - can_throw, - finalizable, - entrypoint)); + if (!BuildNewInstance(type_index, dex_pc)) { + return false; + } UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc); } break; @@ -2750,10 +2805,11 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 current_block_->AddInstruction(new (arena_) HLoadClass( graph_->GetCurrentMethod(), type_index, - *dex_compilation_unit_->GetDexFile(), + *dex_file_, IsOutermostCompilingClass(type_index), dex_pc, - !can_access)); + !can_access, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(*dex_file_, type_index))); UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc); break; } diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index f857ef0e12..5ada93f684 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -308,6 +308,13 @@ class HGraphBuilder : public ValueObject { uint32_t method_idx, HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement); + // Build a HNewInstance instruction. + bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc); + + // Return whether the compiler can assume `cls` is initialized. + bool IsInitialized(Handle<mirror::Class> cls) const + SHARED_REQUIRES(Locks::mutator_lock_); + ArenaAllocator* const arena_; // A list of the size of the dex code holding block information for diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 77d53fcd8f..0baa0e30dc 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -383,11 +383,11 @@ void CodeGenerator::CreateCommonInvokeLocationSummary( HInvokeStaticOrDirect* call = invoke->AsInvokeStaticOrDirect(); switch (call->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - locations->SetInAt(call->GetCurrentMethodInputIndex(), visitor->GetMethodLocation()); + locations->SetInAt(call->GetSpecialInputIndex(), visitor->GetMethodLocation()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: locations->AddTemp(visitor->GetMethodLocation()); - locations->SetInAt(call->GetCurrentMethodInputIndex(), Location::RequiresRegister()); + locations->SetInAt(call->GetSpecialInputIndex(), Location::RequiresRegister()); break; default: locations->AddTemp(visitor->GetMethodLocation()); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 655bbb8a8e..a98d9c68b7 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -34,6 +34,9 @@ namespace art { +template<class MirrorType> +class GcRoot; + namespace arm { static bool ExpectedPairLayout(Location location) { @@ -74,6 +77,7 @@ class NullCheckSlowPathARM : public SlowPathCode { } arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -98,6 +102,7 @@ class DivZeroCheckSlowPathARM : public SlowPathCode { } arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -120,6 +125,7 @@ class SuspendCheckSlowPathARM : public SlowPathCode { SaveLiveRegisters(codegen, instruction_->GetLocations()); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ b(GetReturnLabel()); @@ -176,6 +182,7 @@ class BoundsCheckSlowPathARM : public SlowPathCode { Primitive::kPrimInt); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } bool IsFatal() const OVERRIDE { return true; } @@ -211,6 +218,11 @@ class LoadClassSlowPathARM : public SlowPathCode { ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType); arm_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + } // Move the class to the desired location. Location out = locations->Out(); @@ -257,6 +269,7 @@ class LoadStringSlowPathARM : public SlowPathCode { __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex()); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); RestoreLiveRegisters(codegen, locations); @@ -286,15 +299,6 @@ class TypeCheckSlowPathARM : public SlowPathCode { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); - if (instruction_->IsCheckCast()) { - // The codegen for the instruction overwrites `temp`, so put it back in place. - Register obj = locations->InAt(0).AsRegister<Register>(); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); - } - if (!is_fatal_) { SaveLiveRegisters(codegen, locations); } @@ -315,6 +319,8 @@ class TypeCheckSlowPathARM : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); } else { DCHECK(instruction_->IsCheckCast()); @@ -322,6 +328,7 @@ class TypeCheckSlowPathARM : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } if (!is_fatal_) { @@ -354,6 +361,7 @@ class DeoptimizationSlowPathARM : public SlowPathCode { uint32_t dex_pc = deoptimize->GetDexPc(); CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; } @@ -396,6 +404,7 @@ class ArraySetSlowPathARM : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ b(GetExitLabel()); } @@ -408,6 +417,221 @@ class ArraySetSlowPathARM : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM); }; +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { + public: + ReadBarrierForHeapReferenceSlowPathARM(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : instruction_(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial object + // has been overwritten by (or after) the heap object reference load + // to be instrumented, e.g.: + // + // __ LoadFromOffset(kLoadWord, out, out, offset); + // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(!instruction_->IsInvoke() || + (instruction_->IsInvokeStaticOrDirect() && + instruction_->GetLocations()->Intrinsified())); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + if (instruction_->IsArrayGet()) { + // Compute the actual memory offset and store it in `index`. + Register index_reg = index_.AsRegister<Register>(); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); + if (codegen->IsCoreCalleeSaveRegister(index_reg)) { + // We are about to change the value of `index_reg` (see the + // calls to art::arm::Thumb2Assembler::Lsl and + // art::arm::Thumb2Assembler::AddConstant below), but it has + // not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + Register free_reg = FindAvailableCallerSaveRegister(codegen); + __ Mov(free_reg, index_reg); + index_reg = free_reg; + index = Location::RegisterLocation(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the scale + // factor (2) cannot overflow in practice, as the runtime is + // unable to allocate object arrays with a size larger than + // 2^26 - 1 (that is, 2^28 - 4 bytes). + __ Lsl(index_reg, index_reg, TIMES_4); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ AddConstant(index_reg, index_reg, offset_); + } else { + DCHECK(instruction_->IsInvoke()); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegisterPair()); + // UnsafeGet's offset location is a register pair, the low + // part contains the correct offset. + index = index_.ToLow(); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove(obj_, + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + __ LoadImmediate(calling_convention.GetRegisterAt(2), offset_); + } + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + arm_codegen->Move32(out_, Location::RegisterLocation(R0)); + + RestoreLiveRegisters(codegen, locations); + __ b(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM"; } + + private: + Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(ref_.AsRegister<Register>()); + size_t obj = static_cast<int>(obj_.AsRegister<Register>()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { + return static_cast<Register>(i); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on ARM + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free caller-save register"; + UNREACHABLE(); + } + + HInstruction* const instruction_; + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathARM : public SlowPathCode { + public: + ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root) + : instruction_(instruction), out_(out), root_(root) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + arm_codegen->Move32(out_, Location::RegisterLocation(R0)); + + RestoreLiveRegisters(codegen, locations); + __ b(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM"; } + + private: + HInstruction* const instruction_; + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM); +}; + #undef __ #define __ down_cast<ArmAssembler*>(GetAssembler())-> @@ -581,7 +805,7 @@ Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const { LOG(FATAL) << "Unreachable type " << type; } - return Location(); + return Location::NoLocation(); } void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const { @@ -820,7 +1044,7 @@ Location InvokeDexCallingConventionVisitorARM::GetNextLocation(Primitive::Type t LOG(FATAL) << "Unexpected parameter type " << type; break; } - return Location(); + return Location::NoLocation(); } Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type type) const { @@ -847,7 +1071,7 @@ Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type } case Primitive::kPrimVoid: - return Location(); + return Location::NoLocation(); } UNREACHABLE(); @@ -1762,29 +1986,39 @@ void LocationsBuilderARM::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); + LocationSummary* locations = invoke->GetLocations(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register hidden_reg = locations->GetTemp(1).AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( invoke->GetImtIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value(); - LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - // Set the hidden argument. - __ LoadImmediate(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(), - invoke->GetDexMethodIndex()); + // Set the hidden argument. This is safe to do this here, as R12 + // won't be modified thereafter, before the `blx` (call) instruction. + DCHECK_EQ(R12, hidden_reg); + __ LoadImmediate(hidden_reg, invoke->GetDexMethodIndex()); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex()); + // /* HeapReference<Class> */ temp = temp->klass_ __ LoadFromOffset(kLoadWord, temp, temp, class_offset); } else { + // /* HeapReference<Class> */ temp = receiver->klass_ __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); } codegen_->MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetImtEntryAt(method_offset); - uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArmWordSize).Int32Value(); + uint32_t entry_point = + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value(); __ LoadFromOffset(kLoadWord, temp, temp, method_offset); // LR = temp->GetEntryPoint(); __ LoadFromOffset(kLoadWord, LR, temp, entry_point); @@ -2188,6 +2422,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); break; case Primitive::kPrimDouble: @@ -2196,6 +2431,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); break; default: @@ -2241,6 +2477,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickL2f, float, int64_t>(); break; case Primitive::kPrimDouble: @@ -2763,6 +3000,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { DCHECK_EQ(R0, out.AsRegister<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), div, div->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>(); } break; } @@ -2777,6 +3015,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); break; } @@ -2905,22 +3144,26 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) { DCHECK_EQ(R1, out.AsRegister<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>(); } break; } case Primitive::kPrimLong: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); break; } case Primitive::kPrimFloat: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); break; } case Primitive::kPrimDouble: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickFmod, double, double, double>(); break; } @@ -3139,7 +3382,19 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) { __ mov(o_l, ShifterOperand(high)); __ LoadImmediate(o_h, 0); } - } else { // shift_value < 32 + } else if (shift_value == 1) { + if (op->IsShl()) { + __ Lsls(o_l, low, 1); + __ adc(o_h, high, ShifterOperand(high)); + } else if (op->IsShr()) { + __ Asrs(o_h, high, 1); + __ Rrx(o_l, low); + } else { + __ Lsrs(o_h, high, 1); + __ Rrx(o_l, low); + } + } else { + DCHECK(2 <= shift_value && shift_value < 32) << shift_value; if (op->IsShl()) { __ Lsl(o_h, high, shift_value); __ orr(o_h, o_h, ShifterOperand(low, LSR, 32 - shift_value)); @@ -3191,20 +3446,19 @@ void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(Location::RegisterLocation(R0)); } void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) { @@ -3226,6 +3480,7 @@ void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); } void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) { @@ -3407,6 +3662,9 @@ void InstructionCodeGeneratorARM::GenerateWideAtomicLoad(Register addr, Register out_lo, Register out_hi) { if (offset != 0) { + // Ensure `out_lo` is different from `addr`, so that loading + // `offset` into `out_lo` does not clutter `addr`. + DCHECK_NE(out_lo, addr); __ LoadImmediate(out_lo, offset); __ add(IP, addr, ShifterOperand(out_lo)); addr = IP; @@ -3594,14 +3852,26 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (field_info.GetFieldType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_field_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); bool volatile_for_double = field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimDouble) && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); - bool overlap = field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong); + // The output overlaps in case of volatile long: we don't want the + // code generated by GenerateWideAtomicLoad to overwrite the + // object's location. Likewise, in the case of an object field get + // with read barriers enabled, we do not want the load to overwrite + // the object's location, as we need it to emit the read barrier. + bool overlap = (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) || + object_field_get_with_read_barrier; if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); @@ -3667,7 +3937,8 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = instruction->GetLocations(); - Register base = locations->InAt(0).AsRegister<Register>(); + Location base_loc = locations->InAt(0); + Register base = base_loc.AsRegister<Register>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); @@ -3747,7 +4018,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, } if (field_type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(out.AsRegister<Register>()); + codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset); } } @@ -3891,20 +4162,31 @@ void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) { } void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in the case of an object array get with + // read barriers enabled: we do not want the move to overwrite the + // array's location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Location index = locations->InAt(1); Primitive::Type type = instruction->GetType(); @@ -3967,8 +4249,9 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimInt: case Primitive::kPrimNot: { - static_assert(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes."); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes."); uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { @@ -4031,8 +4314,17 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { codegen_->MaybeRecordImplicitNullCheck(instruction); if (type == Primitive::kPrimNot) { - Register out = locations->Out().AsRegister<Register>(); - __ MaybeUnpoisonHeapReference(out); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Location out = locations->Out(); + if (index.IsConstant()) { + uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); + } else { + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index); + } } } @@ -4041,11 +4333,16 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - bool may_need_runtime_call = instruction->NeedsTypeCheck(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool object_array_set_with_read_barrier = + kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(value_type)) { @@ -4053,7 +4350,6 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { } else { locations->SetInAt(2, Location::RequiresRegister()); } - if (needs_write_barrier) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. @@ -4063,10 +4359,11 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register array = locations->InAt(0).AsRegister<Register>(); + Location array_loc = locations->InAt(0); + Register array = array_loc.AsRegister<Register>(); Location index = locations->InAt(1); Primitive::Type value_type = instruction->GetComponentType(); - bool may_need_runtime_call = locations->CanCall(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -4103,7 +4400,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimNot: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Register value = locations->InAt(2).AsRegister<Register>(); + Location value_loc = locations->InAt(2); + Register value = value_loc.AsRegister<Register>(); Register source = value; if (instruction->InputAt(2)->IsNullConstant()) { @@ -4117,6 +4415,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); __ StoreToOffset(kStoreWord, source, IP, data_offset); } + DCHECK(!needs_write_barrier); + DCHECK(!may_need_runtime_call_for_type_check); break; } @@ -4129,7 +4429,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { Label done; SlowPathCode* slow_path = nullptr; - if (may_need_runtime_call) { + if (may_need_runtime_call_for_type_check) { slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { @@ -4149,23 +4449,63 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ Bind(&non_zero); } - __ LoadFromOffset(kLoadWord, temp1, array, class_offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ MaybeUnpoisonHeapReference(temp1); - __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); - __ LoadFromOffset(kLoadWord, temp2, value, class_offset); - // No need to poison/unpoison, we're comparing two poisoined references. - __ cmp(temp1, ShifterOperand(temp2)); - if (instruction->StaticTypeOfArrayIsObjectArray()) { - Label do_put; - __ b(&do_put, EQ); - __ MaybeUnpoisonHeapReference(temp1); - __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); - // No need to poison/unpoison, we're comparing against null. - __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); - __ Bind(&do_put); + if (kEmitCompilerReadBarrier) { + // When read barriers are enabled, the type checking + // instrumentation requires two read barriers: + // + // __ Mov(temp2, temp1); + // // /* HeapReference<Class> */ temp1 = temp1->component_type_ + // __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); + // codegen_->GenerateReadBarrier( + // instruction, temp1_loc, temp1_loc, temp2_loc, component_offset); + // + // // /* HeapReference<Class> */ temp2 = value->klass_ + // __ LoadFromOffset(kLoadWord, temp2, value, class_offset); + // codegen_->GenerateReadBarrier( + // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc); + // + // __ cmp(temp1, ShifterOperand(temp2)); + // + // However, the second read barrier may trash `temp`, as it + // is a temporary register, and as such would not be saved + // along with live registers before calling the runtime (nor + // restored afterwards). So in this case, we bail out and + // delegate the work to the array set slow path. + // + // TODO: Extend the register allocator to support a new + // "(locally) live temp" location so as to avoid always + // going into the slow path when read barriers are enabled. + __ b(slow_path->GetEntryLabel()); } else { - __ b(slow_path->GetEntryLabel(), NE); + // /* HeapReference<Class> */ temp1 = array->klass_ + __ LoadFromOffset(kLoadWord, temp1, array, class_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); + // /* HeapReference<Class> */ temp2 = value->klass_ + __ LoadFromOffset(kLoadWord, temp2, value, class_offset); + // If heap poisoning is enabled, no need to unpoison `temp1` + // nor `temp2`, as we are comparing two poisoned references. + __ cmp(temp1, ShifterOperand(temp2)); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + Label do_put; + __ b(&do_put, EQ); + // If heap poisoning is enabled, the `temp1` reference has + // not been unpoisoned yet; unpoison it now. + __ MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); + // If heap poisoning is enabled, no need to unpoison + // `temp1`, as we are comparing against null below. + __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ b(slow_path->GetEntryLabel(), NE); + } } } @@ -4189,7 +4529,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ StoreToOffset(kStoreWord, source, IP, data_offset); } - if (!may_need_runtime_call) { + if (!may_need_runtime_call_for_type_check) { codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -4618,7 +4958,8 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassLocationSummary( cls, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(R0)); + Location::RegisterLocation(R0), + /* code_generator_supports_read_barrier */ true); } void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { @@ -4629,33 +4970,59 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); + if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - __ LoadFromOffset( - kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ AddConstant(out, current_method, declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset); + } } else { - DCHECK(cls->CanCallRuntime()); + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value()); - __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); - // TODO: We will need a read barrier here. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); + size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &out[type_index] + __ AddConstant(out, out, cache_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); } else { - __ Bind(slow_path->GetExitLabel()); + // /* GcRoot<mirror::Class> */ out = out[type_index] + __ LoadFromOffset(kLoadWord, out, out, cache_offset); + } + + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -4701,13 +5068,35 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { codegen_->AddSlowPath(slow_path); LocationSummary* locations = load->GetLocations(); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); - __ LoadFromOffset( - kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); + + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ AddConstant(out, current_method, declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset); + } + + // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); - // TODO: We will need a read barrier here. + + size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::String>* */ out = &out[string_index] + __ AddConstant(out, out, cache_offset); + // /* mirror::String* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::String> */ out = out[string_index] + __ LoadFromOffset(kLoadWord, out, out, cache_offset); + } + __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -4746,45 +5135,50 @@ void LocationsBuilderARM::VisitThrow(HThrow* instruction) { void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) { codegen_->InvokeRuntime( QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // The out register is used as a temporary, so it overlaps with the inputs. - // Note that TypeCheckSlowPathARM uses this register too. - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetOut(Location::RegisterLocation(R0)); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // The "out" register is used as a temporary, so it overlaps with the inputs. + // Note that TypeCheckSlowPathARM uses this register too. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + // When read barriers are enabled, we need a temporary register for + // some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -4798,15 +5192,9 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ CompareAndBranchIfZero(obj, &zero); } - // In case of an interface/unresolved check, we put the object class into the object register. - // This is safe, as the register is caller-save, and the object must be in another - // register if it survives the runtime call. - Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || - (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) - ? obj - : out; - __ LoadFromOffset(kLoadWord, target, obj, class_offset); - __ MaybeUnpoisonHeapReference(target); + // /* HeapReference<Class> */ out = obj->klass_ + __ LoadFromOffset(kLoadWord, out, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); switch (instruction->GetTypeCheckKind()) { case TypeCheckKind::kExactCheck: { @@ -4817,13 +5205,23 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ b(&done); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. Label loop; __ Bind(&loop); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ LoadFromOffset(kLoadWord, out, out, super_offset); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); // If `out` is null, we use it for the result, and jump to `done`. __ CompareAndBranchIfZero(out, &done); __ cmp(out, ShifterOperand(cls)); @@ -4834,14 +5232,24 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. Label loop, success; __ Bind(&loop); __ cmp(out, ShifterOperand(cls)); __ b(&success, EQ); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ LoadFromOffset(kLoadWord, out, out, super_offset); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ CompareAndBranchIfNonZero(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ b(&done); @@ -4852,14 +5260,24 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. Label exact_check; __ cmp(out, ShifterOperand(cls)); __ b(&exact_check, EQ); - // Otherwise, we need to check that the object's class is a non primitive array. + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->component_type_ __ LoadFromOffset(kLoadWord, out, out, component_offset); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); // If `out` is null, we use it for the result, and jump to `done`. __ CompareAndBranchIfZero(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); @@ -4870,11 +5288,12 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ b(&done); break; } + case TypeCheckKind::kArrayCheck: { __ cmp(out, ShifterOperand(cls)); DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM( - instruction, /* is_fatal */ false); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction, + /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ b(slow_path->GetEntryLabel(), NE); __ LoadImmediate(out, 1); @@ -4883,13 +5302,25 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - default: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction, - instruction->GetDexPc(), - nullptr); + case TypeCheckKind::kInterfaceCheck: { + // Note that we indeed only call on slow path, but we always go + // into the slow path for the unresolved & interface check + // cases. + // + // We cannot directly call the InstanceofNonTrivial runtime + // entry point without resorting to a type checking slow path + // here (i.e. by calling InvokeRuntime directly), as it would + // require to assign fixed registers for the inputs of this + // HInstanceOf instruction (following the runtime calling + // convention), which might be cluttered by the potential first + // read barrier emission at the beginning of this method. + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction, + /* is_fatal */ false); + codegen_->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel()); if (zero.IsLinked()) { __ b(&done); } @@ -4915,57 +5346,61 @@ void LocationsBuilderARM::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // Note that TypeCheckSlowPathARM uses this register too. + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Note that TypeCheckSlowPathARM uses this "temp" register too. + locations->AddTemp(Location::RequiresRegister()); + // When read barriers are enabled, we need an additional temporary + // register for some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { locations->AddTemp(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } } void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); - Register temp = locations->WillCall() - ? Register(kNoRegister) - : locations->GetTemp(0).AsRegister<Register>(); - + Location temp_loc = locations->GetTemp(0); + Register temp = temp_loc.AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - SlowPathCode* slow_path = nullptr; - if (!locations->WillCall()) { - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM( - instruction, !locations->CanCall()); - codegen_->AddSlowPath(slow_path); - } + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool is_type_check_slow_path_fatal = + (type_check_kind == TypeCheckKind::kExactCheck || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck) && + !instruction->CanThrowIntoCatchBlock(); + SlowPathCode* type_check_slow_path = + new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction, + is_type_check_slow_path_fatal); + codegen_->AddSlowPath(type_check_slow_path); Label done; // Avoid null check if we know obj is not null. @@ -4973,76 +5408,159 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { __ CompareAndBranchIfZero(obj, &done); } - if (locations->WillCall()) { - __ LoadFromOffset(kLoadWord, obj, obj, class_offset); - __ MaybeUnpoisonHeapReference(obj); - } else { - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); - } + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { __ cmp(temp, ShifterOperand(cls)); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ b(slow_path->GetEntryLabel(), NE); + __ b(type_check_slow_path->GetEntryLabel(), NE); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - Label loop; + Label loop, compare_classes; __ Bind(&loop); + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - __ MaybeUnpoisonHeapReference(temp); - // Jump to the slow path to throw the exception. - __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // to the `compare_classes` label to compare it with the checked + // class. + __ CompareAndBranchIfNonZero(temp, &compare_classes); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ b(type_check_slow_path->GetEntryLabel()); + + __ Bind(&compare_classes); __ cmp(temp, ShifterOperand(cls)); __ b(&loop, NE); break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. Label loop; __ Bind(&loop); __ cmp(temp, ShifterOperand(cls)); __ b(&done, EQ); + + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // back at the beginning of the loop. __ CompareAndBranchIfNonZero(temp, &loop); - // Jump to the slow path to throw the exception. - __ b(slow_path->GetEntryLabel()); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ b(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. + Label check_non_primitive_component_type; __ cmp(temp, ShifterOperand(cls)); __ b(&done, EQ); - // Otherwise, we need to check that the object's class is a non primitive array. + + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->component_type_ __ LoadFromOffset(kLoadWord, temp, temp, component_offset); - __ MaybeUnpoisonHeapReference(temp); - __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier( + instruction, temp_loc, temp_loc, temp2_loc, component_offset); + + // If the component type is not null (i.e. the object is indeed + // an array), jump to label `check_non_primitive_component_type` + // to further check that this component type is not a primitive + // type. + __ CompareAndBranchIfNonZero(temp, &check_non_primitive_component_type); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ b(type_check_slow_path->GetEntryLabel()); + + __ Bind(&check_non_primitive_component_type); __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel()); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for art::Primitive::kPrimNot"); + __ CompareAndBranchIfZero(temp, &done); + // Same comment as above regarding `temp` and the slow path. + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ b(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - default: - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction, - instruction->GetDexPc(), - nullptr); + // We always go into the type check slow path for the unresolved & + // interface check cases. + // + // We cannot directly call the CheckCast runtime entry point + // without resorting to a type checking slow path here (i.e. by + // calling InvokeRuntime directly), as it would require to + // assign fixed registers for the inputs of this HInstanceOf + // instruction (following the runtime calling convention), which + // might be cluttered by the potential first read barrier + // emission at the beginning of this method. + __ b(type_check_slow_path->GetEntryLabel()); break; } __ Bind(&done); - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } + __ Bind(type_check_slow_path->GetExitLabel()); } void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) { @@ -5058,6 +5576,11 @@ void InstructionCodeGeneratorARM::VisitMonitorOperation(HMonitorOperation* instr instruction, instruction->GetDexPc(), nullptr); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction, AND); } @@ -5216,6 +5739,82 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr } } +void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + // TODO: When read barrier has a fast path, add it here. + /* Currently the read barrier call is inserted after the original load. + * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the + * original load. This load-load ordering is required by the read barrier. + * The fast path/slow path (for Baker's algorithm) should look like: + * + * bool isGray = obj.LockWord & kReadBarrierMask; + * lfence; // load fence or artificial data dependence to prevent load-load reordering + * ref = obj.field; // this is the original load + * if (isGray) { + * ref = Mark(ref); // ideally the slow path just does Mark(ref) + * } + */ + + __ b(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM::MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrier(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(out.AsRegister<Register>()); + } +} + +void CodeGeneratorARM::GenerateReadBarrierForRoot(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root); + AddSlowPath(slow_path); + + // TODO: Implement a fast path for ReadBarrierForRoot, performing + // the following operation (for Baker's algorithm): + // + // if (thread.tls32_.is_gc_marking) { + // root = Mark(root); + // } + + __ b(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method) { @@ -5273,7 +5872,7 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, invoke->GetStringInitOffset()); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress()); @@ -5288,7 +5887,7 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, LOG(FATAL) << "Unsupported"; UNREACHABLE(); case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register method_reg; Register reg = temp.AsRegister<Register>(); if (current_method.IsRegister()) { @@ -5299,10 +5898,11 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, method_reg = reg; __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset); } - // temp = current_method->dex_cache_resolved_methods_; - __ LoadFromOffset( - kLoadWord, reg, method_reg, ArtMethod::DexCacheResolvedMethodsOffset( - kArmPointerSize).Int32Value()); + // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; + __ LoadFromOffset(kLoadWord, + reg, + method_reg, + ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value()); // temp = temp[index_in_cache] uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index; __ LoadFromOffset(kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache)); @@ -5346,10 +5946,17 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - // temp = object->GetClass(); DCHECK(receiver.IsRegister()); + // /* HeapReference<Class> */ temp = receiver->klass_ __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetMethodAt(method_offset); uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset( diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 32bfe0f0be..89de4f801d 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -373,6 +373,51 @@ class CodeGeneratorARM : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + // Generate a read barrier for a heap reference within `instruction`. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap reference. + // If heap poisoning is enabled, also unpoison the reference in `out`. + void MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction`. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + private: using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index d1bddf673a..ac16268834 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -42,6 +42,9 @@ using namespace vixl; // NOLINT(build/namespaces) namespace art { +template<class MirrorType> +class GcRoot; + namespace arm64 { using helpers::CPURegisterFrom; @@ -431,15 +434,6 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { __ Bind(GetEntryLabel()); - if (instruction_->IsCheckCast()) { - // The codegen for the instruction overwrites `temp`, so put it back in place. - Register obj = InputRegisterAt(instruction_, 0); - Register temp = WRegisterFrom(locations->GetTemp(0)); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - __ Ldr(temp, HeapOperand(obj, class_offset)); - arm64_codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp); - } - if (!is_fatal_) { SaveLiveRegisters(codegen, locations); } @@ -454,11 +448,11 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { if (instruction_->IsInstanceOf()) { arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t, + const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t, - const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this); @@ -494,6 +488,7 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { uint32_t dex_pc = deoptimize->GetDexPc(); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; } @@ -571,6 +566,271 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { } } +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : instruction_(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial object + // has been overwritten by (or after) the heap object reference load + // to be instrumented, e.g.: + // + // __ Ldr(out, HeapOperand(out, class_offset); + // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + Primitive::Type type = Primitive::kPrimNot; + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); + DCHECK(!instruction_->IsInvoke() || + (instruction_->IsInvokeStaticOrDirect() && + instruction_->GetLocations()->Intrinsified())); + + __ Bind(GetEntryLabel()); + + // Note: In the case of a HArrayGet instruction, when the base + // address is a HArm64IntermediateAddress instruction, it does not + // point to the array object itself, but to an offset within this + // object. However, the read barrier entry point needs the array + // object address to be passed as first argument. So we + // temporarily set back `obj_` to that address, and restore its + // initial value later. + if (instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) { + if (kIsDebugBuild) { + HArm64IntermediateAddress* intermediate_address = + instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress(); + uint32_t intermediate_address_offset = + intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64(); + DCHECK_EQ(intermediate_address_offset, offset_); + DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_); + } + Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt); + __ Sub(obj_reg, obj_reg, offset_); + } + + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + if (instruction_->IsArrayGet()) { + // Compute the actual memory offset and store it in `index`. + Register index_reg = RegisterFrom(index_, Primitive::kPrimInt); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg())); + if (codegen->IsCoreCalleeSaveRegister(index_.reg())) { + // We are about to change the value of `index_reg` (see the + // calls to vixl::MacroAssembler::Lsl and + // vixl::MacroAssembler::Mov below), but it has + // not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + Register free_reg = FindAvailableCallerSaveRegister(codegen); + __ Mov(free_reg.W(), index_reg); + index_reg = free_reg; + index = LocationFrom(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the scale + // factor (2) cannot overflow in practice, as the runtime is + // unable to allocate object arrays with a size larger than + // 2^26 - 1 (that is, 2^28 - 4 bytes). + __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type)); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ Add(index_reg, index_reg, Operand(offset_)); + } else { + DCHECK(instruction_->IsInvoke()); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegisterPair()); + // UnsafeGet's offset location is a register pair, the low + // part contains the correct offset. + index = index_.ToLow(); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + LocationFrom(calling_convention.GetRegisterAt(0)), + type, + nullptr); + parallel_move.AddMove(obj_, + LocationFrom(calling_convention.GetRegisterAt(1)), + type, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + LocationFrom(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_); + } + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + + // Restore the value of `obj_` when it corresponds to a + // HArm64IntermediateAddress instruction. + if (instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) { + if (kIsDebugBuild) { + HArm64IntermediateAddress* intermediate_address = + instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress(); + uint32_t intermediate_address_offset = + intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64(); + DCHECK_EQ(intermediate_address_offset, offset_); + DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_); + } + Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt); + __ Add(obj_reg, obj_reg, offset_); + } + + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; } + + private: + Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(XRegisterFrom(ref_).code()); + size_t obj = static_cast<int>(XRegisterFrom(obj_).code()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { + return Register(VIXLRegCodeFromART(i), kXRegSize); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on ARM64 + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free register"; + UNREACHABLE(); + } + + HInstruction* const instruction_; + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) + : instruction_(instruction), out_(out), root_(root) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Primitive::Type type = Primitive::kPrimNot; + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + // The argument of the ReadBarrierForRootSlow is not a managed + // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`; + // thus we need a 64-bit move here, and we cannot use + // + // arm64_codegen->MoveLocation( + // LocationFrom(calling_convention.GetRegisterAt(0)), + // root_, + // type); + // + // which would emit a 32-bit move, as `type` is a (32-bit wide) + // reference type (`Primitive::kPrimNot`). + __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_)); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; } + + private: + HInstruction* const instruction_; + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64); +}; + #undef __ Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) { @@ -1401,13 +1661,25 @@ void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) { } void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_field_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps for an object field get when read barriers + // are enabled: we do not want the load to overwrite the object's + // location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } @@ -1436,7 +1708,11 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, } if (field_type == Primitive::kPrimNot) { - GetAssembler()->MaybeUnpoisonHeapReference(OutputCPURegister(instruction).W()); + LocationSummary* locations = instruction->GetLocations(); + Location base = locations->InAt(0); + Location out = locations->Out(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + codegen_->MaybeGenerateReadBarrier(instruction, out, out, base, offset); } } @@ -1613,6 +1889,82 @@ void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) { HandleBinaryOp(instruction); } +void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp( + HArm64DataProcWithShifterOp* instruction) { + DCHECK(instruction->GetType() == Primitive::kPrimInt || + instruction->GetType() == Primitive::kPrimLong); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + if (instruction->GetInstrKind() == HInstruction::kNeg) { + locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant())); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + } + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp( + HArm64DataProcWithShifterOp* instruction) { + Primitive::Type type = instruction->GetType(); + HInstruction::InstructionKind kind = instruction->GetInstrKind(); + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + Register out = OutputRegister(instruction); + Register left; + if (kind != HInstruction::kNeg) { + left = InputRegisterAt(instruction, 0); + } + // If this `HArm64DataProcWithShifterOp` was created by merging a type conversion as the + // shifter operand operation, the IR generating `right_reg` (input to the type + // conversion) can have a different type from the current instruction's type, + // so we manually indicate the type. + Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type); + int64_t shift_amount = (type == Primitive::kPrimInt) + ? static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxIntShiftValue) + : static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxLongShiftValue); + + Operand right_operand(0); + + HArm64DataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); + if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind)) { + right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind)); + } else { + right_operand = Operand(right_reg, helpers::ShiftFromOpKind(op_kind), shift_amount); + } + + // Logical binary operations do not support extension operations in the + // operand. Note that VIXL would still manage if it was passed by generating + // the extension as a separate instruction. + // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`. + DCHECK(!right_operand.IsExtendedRegister() || + (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor && + kind != HInstruction::kNeg)); + switch (kind) { + case HInstruction::kAdd: + __ Add(out, left, right_operand); + break; + case HInstruction::kAnd: + __ And(out, left, right_operand); + break; + case HInstruction::kNeg: + DCHECK(instruction->InputAt(0)->AsConstant()->IsZero()); + __ Neg(out, right_operand); + break; + case HInstruction::kOr: + __ Orr(out, left, right_operand); + break; + case HInstruction::kSub: + __ Sub(out, left, right_operand); + break; + case HInstruction::kXor: + __ Eor(out, left, right_operand); + break; + default: + LOG(FATAL) << "Unexpected operation kind: " << kind; + UNREACHABLE(); + } +} + void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -1628,23 +1980,75 @@ void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress( Operand(InputOperandAt(instruction, 1))); } +void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); + locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex, + Location::RequiresRegister()); + locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); + locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { + Register res = OutputRegister(instr); + Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex); + Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex); + Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex); + + // Avoid emitting code that could trigger Cortex A53's erratum 835769. + // This fixup should be carried out for all multiply-accumulate instructions: + // madd, msub, smaddl, smsubl, umaddl and umsubl. + if (instr->GetType() == Primitive::kPrimLong && + codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) { + MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler(); + vixl::Instruction* prev = masm->GetCursorAddress<vixl::Instruction*>() - vixl::kInstructionSize; + if (prev->IsLoadOrStore()) { + // Make sure we emit only exactly one nop. + vixl::CodeBufferCheckScope scope(masm, + vixl::kInstructionSize, + vixl::CodeBufferCheckScope::kCheck, + vixl::CodeBufferCheckScope::kExactSize); + __ nop(); + } + } + + if (instr->GetOpKind() == HInstruction::kAdd) { + __ Madd(res, mul_left, mul_right, accumulator); + } else { + DCHECK(instr->GetOpKind() == HInstruction::kSub); + __ Msub(res, mul_left, mul_right, accumulator); + } +} + void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in the case of an object array get with + // read barriers enabled: we do not want the move to overwrite the + // array's location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { Primitive::Type type = instruction->GetType(); Register obj = InputRegisterAt(instruction, 0); - Location index = instruction->GetLocations()->InAt(1); - size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(); + LocationSummary* locations = instruction->GetLocations(); + Location index = locations->InAt(1); + uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(); MemOperand source = HeapOperand(obj); CPURegister dest = OutputCPURegister(instruction); @@ -1676,8 +2080,22 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { codegen_->Load(type, dest, source); codegen_->MaybeRecordImplicitNullCheck(instruction); - if (instruction->GetType() == Primitive::kPrimNot) { - GetAssembler()->MaybeUnpoisonHeapReference(dest.W()); + if (type == Primitive::kPrimNot) { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + Location obj_loc = locations->InAt(0); + Location out = locations->Out(); + if (index.IsConstant()) { + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); + } else { + // Note: when `obj_loc` is a HArm64IntermediateAddress, it does + // not contain the base address of the array object, which is + // needed by the read barrier entry point. So the read barrier + // slow path will temporarily set back `obj_loc` to the right + // address (see ReadBarrierForHeapReferenceSlowPathARM64::EmitNativeCode). + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index); + } } } @@ -1695,12 +2113,19 @@ void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) } void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { + Primitive::Type value_type = instruction->GetComponentType(); + + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool object_array_set_with_read_barrier = + kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - instruction->NeedsTypeCheck() ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { + if (Primitive::IsFloatingPointType(value_type)) { locations->SetInAt(2, Location::RequiresFpuRegister()); } else { locations->SetInAt(2, Location::RequiresRegister()); @@ -1710,7 +2135,7 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { Primitive::Type value_type = instruction->GetComponentType(); LocationSummary* locations = instruction->GetLocations(); - bool may_need_runtime_call = locations->CanCall(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -1724,7 +2149,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { BlockPoolsScope block_pools(masm); if (!needs_write_barrier) { - DCHECK(!may_need_runtime_call); + DCHECK(!may_need_runtime_call_for_type_check); if (index.IsConstant()) { offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); destination = HeapOperand(array, offset); @@ -1774,7 +2199,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - if (may_need_runtime_call) { + if (may_need_runtime_call_for_type_check) { slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { @@ -1789,26 +2214,66 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { __ Bind(&non_zero); } - Register temp2 = temps.AcquireSameSizeAs(array); - __ Ldr(temp, HeapOperand(array, class_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - __ Ldr(temp, HeapOperand(temp, component_offset)); - __ Ldr(temp2, HeapOperand(Register(value), class_offset)); - // No need to poison/unpoison, we're comparing two poisoned references. - __ Cmp(temp, temp2); - if (instruction->StaticTypeOfArrayIsObjectArray()) { - vixl::Label do_put; - __ B(eq, &do_put); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - __ Ldr(temp, HeapOperand(temp, super_offset)); - // No need to unpoison, we're comparing against null. - __ Cbnz(temp, slow_path->GetEntryLabel()); - __ Bind(&do_put); + if (kEmitCompilerReadBarrier) { + // When read barriers are enabled, the type checking + // instrumentation requires two read barriers: + // + // __ Mov(temp2, temp); + // // /* HeapReference<Class> */ temp = temp->component_type_ + // __ Ldr(temp, HeapOperand(temp, component_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp_loc, temp_loc, temp2_loc, component_offset); + // + // // /* HeapReference<Class> */ temp2 = value->klass_ + // __ Ldr(temp2, HeapOperand(Register(value), class_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc); + // + // __ Cmp(temp, temp2); + // + // However, the second read barrier may trash `temp`, as it + // is a temporary register, and as such would not be saved + // along with live registers before calling the runtime (nor + // restored afterwards). So in this case, we bail out and + // delegate the work to the array set slow path. + // + // TODO: Extend the register allocator to support a new + // "(locally) live temp" location so as to avoid always + // going into the slow path when read barriers are enabled. + __ B(slow_path->GetEntryLabel()); } else { - __ B(ne, slow_path->GetEntryLabel()); + Register temp2 = temps.AcquireSameSizeAs(array); + // /* HeapReference<Class> */ temp = array->klass_ + __ Ldr(temp, HeapOperand(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + GetAssembler()->MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->component_type_ + __ Ldr(temp, HeapOperand(temp, component_offset)); + // /* HeapReference<Class> */ temp2 = value->klass_ + __ Ldr(temp2, HeapOperand(Register(value), class_offset)); + // If heap poisoning is enabled, no need to unpoison `temp` + // nor `temp2`, as we are comparing two poisoned references. + __ Cmp(temp, temp2); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + vixl::Label do_put; + __ B(eq, &do_put); + // If heap poisoning is enabled, the `temp` reference has + // not been unpoisoned yet; unpoison it now. + GetAssembler()->MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->super_class_ + __ Ldr(temp, HeapOperand(temp, super_offset)); + // If heap poisoning is enabled, no need to unpoison + // `temp`, as we are comparing against null below. + __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ B(ne, slow_path->GetEntryLabel()); + } + temps.Release(temp2); } - temps.Release(temp2); } if (kPoisonHeapReferences) { @@ -1824,7 +2289,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { } __ Str(source, destination); - if (!may_need_runtime_call) { + if (!may_need_runtime_call_for_type_check) { codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -2491,40 +2956,44 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // The out register is used as a temporary, so it overlaps with the inputs. - // Note that TypeCheckSlowPathARM64 uses this register too. - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // The "out" register is used as a temporary, so it overlaps with the inputs. + // Note that TypeCheckSlowPathARM64 uses this register too. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + // When read barriers are enabled, we need a temporary register for + // some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); Register cls = InputRegisterAt(instruction, 1); + Location out_loc = locations->Out(); Register out = OutputRegister(instruction); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); @@ -2540,15 +3009,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ Cbz(obj, &zero); } - // In case of an interface/unresolved check, we put the object class into the object register. - // This is safe, as the register is caller-save, and the object must be in another - // register if it survives the runtime call. - Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || - (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) - ? obj - : out; - __ Ldr(target, HeapOperand(obj.W(), class_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(target); + // /* HeapReference<Class> */ out = obj->klass_ + __ Ldr(out, HeapOperand(obj.W(), class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); switch (instruction->GetTypeCheckKind()) { case TypeCheckKind::kExactCheck: { @@ -2559,13 +3022,23 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. vixl::Label loop, success; __ Bind(&loop); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = WRegisterFrom(temp_loc); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ Ldr(out, HeapOperand(out, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Cmp(out, cls); @@ -2576,14 +3049,24 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. vixl::Label loop, success; __ Bind(&loop); __ Cmp(out, cls); __ B(eq, &success); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = WRegisterFrom(temp_loc); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ Ldr(out, HeapOperand(out, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ Cbnz(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ B(&done); @@ -2594,14 +3077,24 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. vixl::Label exact_check; __ Cmp(out, cls); __ B(eq, &exact_check); - // Otherwise, we need to check that the object's class is a non primitive array. + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = WRegisterFrom(temp_loc); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->component_type_ __ Ldr(out, HeapOperand(out, component_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Ldrh(out, HeapOperand(out, primitive_offset)); @@ -2612,11 +3105,12 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ B(&done); break; } + case TypeCheckKind::kArrayCheck: { __ Cmp(out, cls); DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64( - instruction, /* is_fatal */ false); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, + /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ B(ne, slow_path->GetEntryLabel()); __ Mov(out, 1); @@ -2625,13 +3119,25 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - default: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction, - instruction->GetDexPc(), - nullptr); + case TypeCheckKind::kInterfaceCheck: { + // Note that we indeed only call on slow path, but we always go + // into the slow path for the unresolved and interface check + // cases. + // + // We cannot directly call the InstanceofNonTrivial runtime + // entry point without resorting to a type checking slow path + // here (i.e. by calling InvokeRuntime directly), as it would + // require to assign fixed registers for the inputs of this + // HInstanceOf instruction (following the runtime calling + // convention), which might be cluttered by the potential first + // read barrier emission at the beginning of this method. + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, + /* is_fatal */ false); + codegen_->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); if (zero.IsLinked()) { __ B(&done); } @@ -2657,58 +3163,62 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // Note that TypeCheckSlowPathARM64 uses this register too. - locations->AddTemp(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Note that TypeCheckSlowPathARM64 uses this "temp" register too. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + // When read barriers are enabled, we need an additional temporary + // register for some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); Register cls = InputRegisterAt(instruction, 1); - Register temp; - if (!locations->WillCall()) { - temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0)); - } - + Location temp_loc = locations->GetTemp(0); + Register temp = WRegisterFrom(temp_loc); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - SlowPathCodeARM64* slow_path = nullptr; - if (!locations->WillCall()) { - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64( - instruction, !locations->CanCall()); - codegen_->AddSlowPath(slow_path); - } + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool is_type_check_slow_path_fatal = + (type_check_kind == TypeCheckKind::kExactCheck || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck) && + !instruction->CanThrowIntoCatchBlock(); + SlowPathCodeARM64* type_check_slow_path = + new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, + is_type_check_slow_path_fatal); + codegen_->AddSlowPath(type_check_slow_path); vixl::Label done; // Avoid null check if we know obj is not null. @@ -2716,76 +3226,159 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ Cbz(obj, &done); } - if (locations->WillCall()) { - __ Ldr(obj, HeapOperand(obj, class_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(obj); - } else { - __ Ldr(temp, HeapOperand(obj, class_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - } + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { __ Cmp(temp, cls); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ B(ne, slow_path->GetEntryLabel()); + __ B(ne, type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - vixl::Label loop; + vixl::Label loop, compare_classes; __ Bind(&loop); + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = WRegisterFrom(temp2_loc); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ Ldr(temp, HeapOperand(temp, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - // Jump to the slow path to throw the exception. - __ Cbz(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // to the `compare_classes` label to compare it with the checked + // class. + __ Cbnz(temp, &compare_classes); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); + + __ Bind(&compare_classes); __ Cmp(temp, cls); __ B(ne, &loop); break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. vixl::Label loop; __ Bind(&loop); __ Cmp(temp, cls); __ B(eq, &done); + + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = WRegisterFrom(temp2_loc); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ Ldr(temp, HeapOperand(temp, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // back at the beginning of the loop. __ Cbnz(temp, &loop); - // Jump to the slow path to throw the exception. - __ B(slow_path->GetEntryLabel()); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. + vixl::Label check_non_primitive_component_type; __ Cmp(temp, cls); __ B(eq, &done); - // Otherwise, we need to check that the object's class is a non primitive array. + + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = WRegisterFrom(temp2_loc); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->component_type_ __ Ldr(temp, HeapOperand(temp, component_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - __ Cbz(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier( + instruction, temp_loc, temp_loc, temp2_loc, component_offset); + + // If the component type is not null (i.e. the object is indeed + // an array), jump to label `check_non_primitive_component_type` + // to further check that this component type is not a primitive + // type. + __ Cbnz(temp, &check_non_primitive_component_type); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); + + __ Bind(&check_non_primitive_component_type); __ Ldrh(temp, HeapOperand(temp, primitive_offset)); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Cbz(temp, &done); + // Same comment as above regarding `temp` and the slow path. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - default: - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction, - instruction->GetDexPc(), - nullptr); + // We always go into the type check slow path for the unresolved + // and interface check cases. + // + // We cannot directly call the CheckCast runtime entry point + // without resorting to a type checking slow path here (i.e. by + // calling InvokeRuntime directly), as it would require to + // assign fixed registers for the inputs of this HInstanceOf + // instruction (following the runtime calling convention), which + // might be cluttered by the potential first read barrier + // emission at the beginning of this method. + __ B(type_check_slow_path->GetEntryLabel()); break; } __ Bind(&done); - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } + __ Bind(type_check_slow_path->GetExitLabel()); } void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) { @@ -2828,10 +3421,11 @@ void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0)); + LocationSummary* locations = invoke->GetLocations(); + Register temp = XRegisterFrom(locations->GetTemp(0)); uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value(); - Location receiver = invoke->GetLocations()->InAt(0); + Location receiver = locations->InAt(0); Offset class_offset = mirror::Object::ClassOffset(); Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize); @@ -2843,14 +3437,22 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok scratch_scope.Exclude(ip1); __ Mov(ip1, invoke->GetDexMethodIndex()); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ Ldr(temp.W(), StackOperandFrom(receiver)); + // /* HeapReference<Class> */ temp = temp->klass_ __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset)); } else { + // /* HeapReference<Class> */ temp = receiver->klass_ __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); } codegen_->MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); // temp = temp->GetImtEntryAt(method_offset); __ Ldr(temp, MemOperand(temp, method_offset)); @@ -2926,7 +3528,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok __ Ldr(XRegisterFrom(temp), MemOperand(tr, invoke->GetStringInitOffset())); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: // Load method address from literal pool. @@ -2960,7 +3562,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register reg = XRegisterFrom(temp); Register method_reg; if (current_method.IsRegister()) { @@ -2972,7 +3574,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset)); } - // temp = current_method->dex_cache_resolved_methods_; + // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; __ Ldr(reg.X(), MemOperand(method_reg.X(), ArtMethod::DexCacheResolvedMethodsOffset(kArm64WordSize).Int32Value())); @@ -3027,8 +3629,16 @@ void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location te BlockPoolsScope block_pools(GetVIXLAssembler()); DCHECK(receiver.IsRegister()); + // /* HeapReference<Class> */ temp = receiver->klass_ __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); // temp = temp->GetMethodAt(method_offset); __ Ldr(temp, MemOperand(temp, method_offset)); @@ -3141,7 +3751,8 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassLocationSummary( cls, LocationFrom(calling_convention.GetRegisterAt(0)), - LocationFrom(vixl::x0)); + LocationFrom(vixl::x0), + /* code_generator_supports_read_barrier */ true); } void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { @@ -3151,30 +3762,56 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } + Location out_loc = cls->GetLocations()->Out(); Register out = OutputRegister(cls); Register current_method = InputRegisterAt(cls, 0); if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ Add(out.X(), current_method.X(), declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ Ldr(out, MemOperand(current_method, declaring_class_offset)); + } } else { - DCHECK(cls->CanCallRuntime()); MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize); + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value())); - __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); - // TODO: We will need a read barrier here. - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ Cbz(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); + size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &out[type_index] + __ Add(out.X(), out.X(), cache_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); } else { - __ Bind(slow_path->GetExitLabel()); + // /* GcRoot<mirror::Class> */ out = out[type_index] + __ Ldr(out, MemOperand(out.X(), cache_offset)); + } + + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ Cbz(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -3220,12 +3857,35 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); codegen_->AddSlowPath(slow_path); + Location out_loc = load->GetLocations()->Out(); Register out = OutputRegister(load); Register current_method = InputRegisterAt(load, 0); - __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); - __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset())); - __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex()))); - // TODO: We will need a read barrier here. + + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ Add(out.X(), current_method.X(), declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ Ldr(out, MemOperand(current_method, declaring_class_offset)); + } + + // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ + __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); + + size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::String>* */ out = &out[string_index] + __ Add(out.X(), out.X(), cache_offset); + // /* mirror::String* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::String> */ out = out[string_index] + __ Ldr(out, MemOperand(out.X(), cache_offset)); + } + __ Cbz(out, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -3260,7 +3920,11 @@ void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* ins instruction, instruction->GetDexPc(), nullptr); - CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderARM64::VisitMul(HMul* mul) { @@ -3349,8 +4013,6 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { locations->SetOut(LocationFrom(x0)); locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2))); - CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, - void*, uint32_t, int32_t, ArtMethod*>(); } void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { @@ -3372,17 +4034,12 @@ void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt); - DCHECK(type_index.Is(w0)); - __ Mov(type_index, instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), @@ -3559,6 +4216,11 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf) : QUICK_ENTRY_POINT(pFmod); codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc(), nullptr); + if (type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); + } else { + CheckEntrypointTypes<kQuickFmod, double, double, double>(); + } break; } @@ -3803,9 +4465,7 @@ void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* convers int min_size = std::min(result_size, input_size); Register output = OutputRegister(conversion); Register source = InputRegisterAt(conversion, 0); - if ((result_type == Primitive::kPrimChar) && (input_size < result_size)) { - __ Ubfx(output, source, 0, result_size * kBitsPerByte); - } else if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { + if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { // 'int' values are used directly as W registers, discarding the top // bits, so we don't need to sign-extend and can just perform a move. // We do not pass the `kDiscardForSameWReg` argument to force clearing the @@ -3814,9 +4474,11 @@ void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* convers // 32bit input value as a 64bit value assuming that the top 32 bits are // zero. __ Mov(output.W(), source.W()); - } else if ((result_type == Primitive::kPrimChar) || - ((input_type == Primitive::kPrimChar) && (result_size > input_size))) { - __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); + } else if (result_type == Primitive::kPrimChar || + (input_type == Primitive::kPrimChar && input_size < result_size)) { + __ Ubfx(output, + output.IsX() ? source.X() : source.W(), + 0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte); } else { __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); } @@ -3951,6 +4613,82 @@ void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_inst } } +void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + // TODO: When read barrier has a fast path, add it here. + /* Currently the read barrier call is inserted after the original load. + * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the + * original load. This load-load ordering is required by the read barrier. + * The fast path/slow path (for Baker's algorithm) should look like: + * + * bool isGray = obj.LockWord & kReadBarrierMask; + * lfence; // load fence or artificial data dependence to prevent load-load reordering + * ref = obj.field; // this is the original load + * if (isGray) { + * ref = Mark(ref); // ideally the slow path just does Mark(ref) + * } + */ + + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM64::MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrier(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out)); + } +} + +void CodeGeneratorARM64::GenerateReadBarrierForRoot(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root); + AddSlowPath(slow_path); + + // TODO: Implement a fast path for ReadBarrierForRoot, performing + // the following operation (for Baker's algorithm): + // + // if (thread.tls32_.is_gc_marking) { + // root = Mark(root); + // } + + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + #undef __ #undef QUICK_ENTRY_POINT diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 881afcc123..7950f078ad 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -424,6 +424,51 @@ class CodeGeneratorARM64 : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + // Generate a read barrier for a heap reference within `instruction`. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap reference. + // If heap poisoning is enabled, also unpoison the reference in `out`. + void MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction`. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + private: using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>; using MethodToLiteralMap = ArenaSafeMap<MethodReference, diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 919ed2db78..9dc9167824 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -415,13 +415,11 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { dex_pc, this, IsDirectEntrypoint(kQuickInstanceofNonTrivial)); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, - uint32_t, - const mirror::Class*, - const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), @@ -461,6 +459,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { dex_pc, this, IsDirectEntrypoint(kQuickDeoptimize)); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; } @@ -2638,6 +2637,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, Register obj = locations->InAt(0).AsRegister<Register>(); LoadOperandType load_type = kLoadUnsignedByte; bool is_volatile = field_info.IsVolatile(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (type) { case Primitive::kPrimBoolean: @@ -2668,8 +2668,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, if (is_volatile && load_type == kLoadDoubleword) { InvokeRuntimeCallingConvention calling_convention; - __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), - obj, field_info.GetFieldOffset().Uint32Value()); + __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset); // Do implicit Null check __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); @@ -2692,21 +2691,34 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, if (type == Primitive::kPrimLong) { DCHECK(locations->Out().IsRegisterPair()); dst = locations->Out().AsRegisterPairLow<Register>(); + Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); + if (obj == dst) { + __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ LoadFromOffset(kLoadWord, dst, obj, offset); + } else { + __ LoadFromOffset(kLoadWord, dst, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize); + } } else { DCHECK(locations->Out().IsRegister()); dst = locations->Out().AsRegister<Register>(); + __ LoadFromOffset(load_type, dst, obj, offset); } - __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value()); } else { DCHECK(locations->Out().IsFpuRegister()); FRegister dst = locations->Out().AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { - __ LoadSFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value()); + __ LoadSFromOffset(dst, obj, offset); } else { - __ LoadDFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value()); + __ LoadDFromOffset(dst, obj, offset); } } - codegen_->MaybeRecordImplicitNullCheck(instruction); + // Longs are handled earlier. + if (type != Primitive::kPrimLong) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } if (is_volatile) { @@ -2752,6 +2764,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, Register obj = locations->InAt(0).AsRegister<Register>(); StoreOperandType store_type = kStoreByte; bool is_volatile = field_info.IsVolatile(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (type) { case Primitive::kPrimBoolean: @@ -2782,8 +2795,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, if (is_volatile && store_type == kStoreDoubleword) { InvokeRuntimeCallingConvention calling_convention; - __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), - obj, field_info.GetFieldOffset().Uint32Value()); + __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset); // Do implicit Null check. __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); @@ -2806,21 +2818,28 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, if (type == Primitive::kPrimLong) { DCHECK(locations->InAt(1).IsRegisterPair()); src = locations->InAt(1).AsRegisterPairLow<Register>(); + Register src_high = locations->InAt(1).AsRegisterPairHigh<Register>(); + __ StoreToOffset(kStoreWord, src, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ StoreToOffset(kStoreWord, src_high, obj, offset + kMipsWordSize); } else { DCHECK(locations->InAt(1).IsRegister()); src = locations->InAt(1).AsRegister<Register>(); + __ StoreToOffset(store_type, src, obj, offset); } - __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value()); } else { DCHECK(locations->InAt(1).IsFpuRegister()); FRegister src = locations->InAt(1).AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { - __ StoreSToOffset(src, obj, field_info.GetFieldOffset().Uint32Value()); + __ StoreSToOffset(src, obj, offset); } else { - __ StoreDToOffset(src, obj, field_info.GetFieldOffset().Uint32Value()); + __ StoreDToOffset(src, obj, offset); } } - codegen_->MaybeRecordImplicitNullCheck(instruction); + // Longs are handled earlier. + if (type != Primitive::kPrimLong) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } // TODO: memory barriers? @@ -3031,7 +3050,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke invoke->GetStringInitOffset()); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); @@ -3043,7 +3062,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke LOG(FATAL) << "Unsupported"; UNREACHABLE(); case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register reg = temp.AsRegister<Register>(); Register method_reg; if (current_method.IsRegister()) { @@ -3170,6 +3189,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { cls->GetDexPc(), nullptr, IsDirectEntrypoint(kQuickInitializeTypeAndVerifyAccess)); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -3181,21 +3201,26 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { - DCHECK(cls->CanCallRuntime()); __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DexCacheResolvedTypesOffset(kMipsPointerSize).Int32Value()); __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS( - cls, - cls, - cls->GetDexPc(), - cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ Beqz(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS( + cls, + cls, + cls->GetDexPc(), + cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ Beqz(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -3478,17 +3503,12 @@ void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); } void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - Register current_method_register = calling_convention.GetRegisterAt(1); - __ Lw(current_method_register, SP, kCurrentMethodStackOffset); - // Move an uint16_t value to a register. - __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); codegen_->InvokeRuntime( GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(), instruction, @@ -3705,7 +3725,7 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { instruction, instruction->GetDexPc(), nullptr, IsDirectEntrypoint(kQuickFmodf)); - CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); break; } case Primitive::kPrimDouble: { @@ -3713,7 +3733,7 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { instruction, instruction->GetDexPc(), nullptr, IsDirectEntrypoint(kQuickFmod)); - CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + CheckEntrypointTypes<kQuickFmod, double, double, double>(); break; } default: diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 5864660890..934f24bfb0 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -27,8 +27,8 @@ #include "mirror/class-inl.h" #include "offsets.h" #include "thread.h" -#include "utils/mips64/assembler_mips64.h" #include "utils/assembler.h" +#include "utils/mips64/assembler_mips64.h" #include "utils/stack_checks.h" namespace art { @@ -210,7 +210,7 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS64"; } @@ -257,7 +257,7 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { type); RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; } @@ -312,13 +312,13 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { - __ B(GetReturnLabel()); + __ Bc(GetReturnLabel()); } else { - __ B(mips64_codegen->GetLabelOf(successor_)); + __ Bc(mips64_codegen->GetLabelOf(successor_)); } } - Label* GetReturnLabel() { + Mips64Label* GetReturnLabel() { DCHECK(successor_ == nullptr); return &return_label_; } @@ -331,7 +331,7 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { HBasicBlock* const successor_; // If `successor_` is null, the label to branch to after the suspend check. - Label return_label_; + Mips64Label return_label_; DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathMIPS64); }; @@ -366,13 +366,11 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { instruction_, dex_pc, this); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, - uint32_t, - const mirror::Class*, - const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this); @@ -380,7 +378,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; } @@ -404,6 +402,7 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { uint32_t dex_pc = deoptimize->GetDexPc(); CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; } @@ -441,6 +440,32 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value() void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) { + // Ensure that we fix up branches. + __ FinalizeCode(); + + // Adjust native pc offsets in stack maps. + for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) { + uint32_t old_position = stack_map_stream_.GetStackMap(i).native_pc_offset; + uint32_t new_position = __ GetAdjustedPosition(old_position); + DCHECK_GE(new_position, old_position); + stack_map_stream_.SetStackMapNativePcOffset(i, new_position); + } + + // Adjust pc offsets for the disassembly information. + if (disasm_info_ != nullptr) { + GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval(); + frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start); + frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end); + for (auto& it : *disasm_info_->GetInstructionIntervals()) { + it.second.start = __ GetAdjustedPosition(it.second.start); + it.second.end = __ GetAdjustedPosition(it.second.end); + } + for (auto& it : *disasm_info_->GetSlowPathIntervals()) { + it.code_interval.start = __ GetAdjustedPosition(it.code_interval.start); + it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end); + } + } + CodeGenerator::Finalize(allocator); } @@ -603,6 +628,7 @@ void CodeGeneratorMIPS64::GenerateFrameExit() { } __ Jr(RA); + __ Nop(); __ cfi().RestoreState(); __ cfi().DefCFAOffset(GetFrameSize()); @@ -939,7 +965,7 @@ Location CodeGeneratorMIPS64::GetStackLocation(HLoadLocal* load) const { } void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) { - Label done; + Mips64Label done; GpuRegister card = AT; GpuRegister temp = TMP; __ Beqzc(value, &done); @@ -1048,6 +1074,7 @@ void CodeGeneratorMIPS64::InvokeRuntime(int32_t entry_point_offset, // TODO: anything related to T9/GP/GOT/PIC/.so's? __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); __ Jalr(T9); + __ Nop(); RecordPcInfo(instruction, dex_pc, slow_path); } @@ -1079,7 +1106,7 @@ void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruc __ Bind(slow_path->GetReturnLabel()); } else { __ Beqzc(TMP, codegen_->GetLabelOf(successor)); - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); // slow_path will return to GetLabelOf(successor). } } @@ -1583,6 +1610,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); } break; } @@ -1669,12 +1697,7 @@ void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) // length is limited by the maximum positive signed 32-bit integer. // Unsigned comparison of length and index checks for index < 0 // and for length <= index simultaneously. - // Mips R6 requires lhs != rhs for compact branches. - if (index == length) { - __ B(slow_path->GetEntryLabel()); - } else { - __ Bgeuc(index, length, slow_path->GetEntryLabel()); - } + __ Bgeuc(index, length, slow_path->GetEntryLabel()); } void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { @@ -1796,6 +1819,19 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { : QUICK_ENTRY_POINT(pCmplDouble); } codegen_->InvokeRuntime(entry_point_offset, instruction, instruction->GetDexPc(), nullptr); + if (in_type == Primitive::kPrimFloat) { + if (instruction->IsGtBias()) { + CheckEntrypointTypes<kQuickCmpgFloat, int32_t, float, float>(); + } else { + CheckEntrypointTypes<kQuickCmplFloat, int32_t, float, float>(); + } + } else { + if (instruction->IsGtBias()) { + CheckEntrypointTypes<kQuickCmpgDouble, int32_t, double, double>(); + } else { + CheckEntrypointTypes<kQuickCmplDouble, int32_t, double, double>(); + } + } break; } @@ -2264,7 +2300,7 @@ void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instructio if (value.IsConstant()) { int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant()); if (divisor == 0) { - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); } else { // A division by a non-null constant is valid. We don't need to perform // any check, so simply fall through. @@ -2316,7 +2352,7 @@ void InstructionCodeGeneratorMIPS64::HandleGoto(HInstruction* got, HBasicBlock* GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); } if (!codegen_->GoesToNextBlock(block, successor)) { - __ B(codegen_->GetLabelOf(successor)); + __ Bc(codegen_->GetLabelOf(successor)); } } @@ -2341,8 +2377,8 @@ void InstructionCodeGeneratorMIPS64::VisitTryBoundary(HTryBoundary* try_boundary void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, - Label* true_target, - Label* false_target) { + Mips64Label* true_target, + Mips64Label* false_target) { HInstruction* cond = instruction->InputAt(condition_input_index); if (true_target == nullptr && false_target == nullptr) { @@ -2352,12 +2388,12 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc // Constant condition, statically compared against 1. if (cond->AsIntConstant()->IsOne()) { if (true_target != nullptr) { - __ B(true_target); + __ Bc(true_target); } } else { DCHECK(cond->AsIntConstant()->IsZero()); if (false_target != nullptr) { - __ B(false_target); + __ Bc(false_target); } } return; @@ -2397,7 +2433,7 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc } IfCondition if_cond; - Label* non_fallthrough_target; + Mips64Label* non_fallthrough_target; if (true_target == nullptr) { if_cond = condition->GetOppositeCondition(); non_fallthrough_target = false_target; @@ -2435,7 +2471,7 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc __ Bnezc(lhs, non_fallthrough_target); // > 0 if non-zero break; case kCondAE: - __ B(non_fallthrough_target); // always true + __ Bc(non_fallthrough_target); // always true break; } } else { @@ -2443,60 +2479,37 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc rhs_reg = TMP; __ LoadConst32(rhs_reg, rhs_imm); } - // It looks like we can get here with lhs == rhs. Should that be possible at all? - // Mips R6 requires lhs != rhs for compact branches. - if (lhs == rhs_reg) { - DCHECK(!use_imm); - switch (if_cond) { - case kCondEQ: - case kCondGE: - case kCondLE: - case kCondBE: - case kCondAE: - // if lhs == rhs for a positive condition, then it is a branch - __ B(non_fallthrough_target); - break; - case kCondNE: - case kCondLT: - case kCondGT: - case kCondB: - case kCondA: - // if lhs == rhs for a negative condition, then it is a NOP - break; - } - } else { - switch (if_cond) { - case kCondEQ: - __ Beqc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondNE: - __ Bnec(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLT: - __ Bltc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondGE: - __ Bgec(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLE: - __ Bgec(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondGT: - __ Bltc(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondB: - __ Bltuc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondAE: - __ Bgeuc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondBE: - __ Bgeuc(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondA: - __ Bltuc(rhs_reg, lhs, non_fallthrough_target); - break; - } + switch (if_cond) { + case kCondEQ: + __ Beqc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondNE: + __ Bnec(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondLT: + __ Bltc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondGE: + __ Bgec(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondLE: + __ Bgec(rhs_reg, lhs, non_fallthrough_target); + break; + case kCondGT: + __ Bltc(rhs_reg, lhs, non_fallthrough_target); + break; + case kCondB: + __ Bltuc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondAE: + __ Bgeuc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondBE: + __ Bgeuc(rhs_reg, lhs, non_fallthrough_target); + break; + case kCondA: + __ Bltuc(rhs_reg, lhs, non_fallthrough_target); + break; } } } @@ -2504,7 +2517,7 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc // If neither branch falls through (case 3), the conditional branch to `true_target` // was already emitted (case 2) and we need to emit a jump to `false_target`. if (true_target != nullptr && false_target != nullptr) { - __ B(false_target); + __ Bc(false_target); } } @@ -2518,9 +2531,9 @@ void LocationsBuilderMIPS64::VisitIf(HIf* if_instr) { void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) { HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); - Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + Mips64Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? nullptr : codegen_->GetLabelOf(true_successor); - Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + Mips64Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } @@ -2695,7 +2708,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - Label done; + Mips64Label done; // Return 0 if `obj` is null. // TODO: Avoid this check if we know `obj` is not null. @@ -2790,6 +2803,7 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invo __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); + __ Nop(); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -2822,9 +2836,9 @@ void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* in // sorted out. if (invoke->HasCurrentMethodInput()) { LocationSummary* locations = invoke->GetLocations(); - Location location = locations->InAt(invoke->GetCurrentMethodInputIndex()); + Location location = locations->InAt(invoke->GetSpecialInputIndex()); if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { - locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation()); + locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation()); } } } @@ -2882,7 +2896,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo invoke->GetStringInitOffset()); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadConst64(temp.AsRegister<GpuRegister>(), invoke->GetMethodAddress()); @@ -2894,7 +2908,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo LOG(FATAL) << "Unsupported"; UNREACHABLE(); case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); GpuRegister reg = temp.AsRegister<GpuRegister>(); GpuRegister method_reg; if (current_method.IsRegister()) { @@ -2924,13 +2938,14 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Jalr(&frame_entry_label_, T9); + __ Jialc(&frame_entry_label_, T9); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: // LR = invoke->GetDirectCodePtr(); __ LoadConst64(T9, invoke->GetDirectCodePtr()); // LR() __ Jalr(T9); + __ Nop(); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: @@ -2947,6 +2962,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo kMips64WordSize).Int32Value()); // T9() __ Jalr(T9); + __ Nop(); break; } DCHECK(!IsLeafMethod()); @@ -2988,6 +3004,7 @@ void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); + __ Nop(); } void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -3016,6 +3033,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -3027,22 +3045,26 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { __ LoadFromOffset(kLoadUnsignedWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { - DCHECK(cls->CanCallRuntime()); __ LoadFromOffset(kLoadDoubleword, out, current_method, ArtMethod::DexCacheResolvedTypesOffset(kMips64PointerSize).Int32Value()); __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); // TODO: We will need a read barrier here. - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64( - cls, - cls, - cls->GetDexPc(), - cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ Beqzc(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64( + cls, + cls, + cls->GetDexPc(), + cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ Beqzc(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -3132,7 +3154,11 @@ void InstructionCodeGeneratorMIPS64::VisitMonitorOperation(HMonitorOperation* in instruction, instruction->GetDexPc(), nullptr); - CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderMIPS64::VisitMul(HMul* mul) { @@ -3266,15 +3292,12 @@ void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); } void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = instruction->GetLocations(); - // Move an uint16_t value to a register. - __ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(), instruction->GetTypeIndex()); codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), @@ -3454,6 +3477,11 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf) : QUICK_ENTRY_POINT(pFmod); codegen_->InvokeRuntime(entry_offset, instruction, instruction->GetDexPc(), nullptr); + if (type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); + } else { + CheckEntrypointTypes<kQuickFmod, double, double, double>(); + } break; } default: @@ -3763,6 +3791,11 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver conversion, conversion->GetDexPc(), nullptr); + if (result_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + } else { + CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + } } } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); @@ -3778,6 +3811,19 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver conversion, conversion->GetDexPc(), nullptr); + if (result_type != Primitive::kPrimLong) { + if (input_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickF2iz, int32_t, float>(); + } else { + CheckEntrypointTypes<kQuickD2iz, int32_t, double>(); + } + } else { + if (input_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + } else { + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + } + } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsFloatingPointType(input_type)) { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); @@ -3929,7 +3975,7 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); for (int32_t i = 0; i < num_entries; i++) { int32_t case_value = lower_bound + i; - Label* succ = codegen_->GetLabelOf(successors[i]); + Mips64Label* succ = codegen_->GetLabelOf(successors[i]); if (case_value == 0) { __ Beqzc(value_reg, succ); } else { @@ -3940,7 +3986,7 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins // And the default for any other value. if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { - __ B(codegen_->GetLabelOf(default_block)); + __ Bc(codegen_->GetLabelOf(default_block)); } } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index a078dd1819..85e3a4a3ce 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -158,12 +158,12 @@ class SlowPathCodeMIPS64 : public SlowPathCode { public: SlowPathCodeMIPS64() : entry_label_(), exit_label_() {} - Label* GetEntryLabel() { return &entry_label_; } - Label* GetExitLabel() { return &exit_label_; } + Mips64Label* GetEntryLabel() { return &entry_label_; } + Mips64Label* GetExitLabel() { return &exit_label_; } private: - Label entry_label_; - Label exit_label_; + Mips64Label entry_label_; + Mips64Label exit_label_; DISALLOW_COPY_AND_ASSIGN(SlowPathCodeMIPS64); }; @@ -231,8 +231,8 @@ class InstructionCodeGeneratorMIPS64 : public HGraphVisitor { void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, - Label* true_target, - Label* false_target); + Mips64Label* true_target, + Mips64Label* false_target); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); @@ -265,7 +265,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64WordSize; } uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE { - return GetLabelOf(block)->Position(); + return assembler_.GetLabelLocation(GetLabelOf(block)); } HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } @@ -298,12 +298,12 @@ class CodeGeneratorMIPS64 : public CodeGenerator { return isa_features_; } - Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_, block); + Mips64Label* GetLabelOf(HBasicBlock* block) const { + return CommonGetLabelOf<Mips64Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_ = CommonInitializeLabels<Label>(); + block_labels_ = CommonInitializeLabels<Mips64Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -349,8 +349,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator { private: // Labels for each block that will be compiled. - Label* block_labels_; // Indexed by block id. - Label frame_entry_label_; + Mips64Label* block_labels_; // Indexed by block id. + Mips64Label frame_entry_label_; LocationsBuilderMIPS64 location_builder_; InstructionCodeGeneratorMIPS64 instruction_visitor_; ParallelMoveResolverMIPS64 move_resolver_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 999306c34b..1fc09a81bc 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -67,6 +67,7 @@ class NullCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -93,6 +94,7 @@ class DivZeroCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -152,6 +154,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } bool IsFatal() const OVERRIDE { return true; } @@ -177,6 +180,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); @@ -222,6 +226,7 @@ class LoadStringSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX)); RestoreLiveRegisters(codegen, locations); @@ -257,6 +262,11 @@ class LoadClassSlowPathX86 : public SlowPathCode { x86_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType), at_, dex_pc_, this); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + } // Move the class to the desired location. Location out = locations->Out(); @@ -368,6 +378,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; } @@ -410,6 +421,7 @@ class ArraySetSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -1908,7 +1920,7 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { - invoke->GetLocations()->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::Any()); + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); } return; } @@ -1917,7 +1929,7 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. if (invoke->HasPcRelativeDexCache()) { - invoke->GetLocations()->SetInAt(invoke->GetCurrentMethodInputIndex(), + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); } @@ -1926,9 +1938,9 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok // needs a register. We therefore do not require a register for it, and let // the code generation of the invoke handle it. LocationSummary* locations = invoke->GetLocations(); - Location location = locations->InAt(invoke->GetCurrentMethodInputIndex()); + Location location = locations->InAt(invoke->GetSpecialInputIndex()); if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { - locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation()); + locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation()); } } } @@ -2460,6 +2472,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); break; case Primitive::kPrimDouble: @@ -2468,6 +2481,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); break; default: @@ -3298,11 +3312,13 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); } else { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); } break; } @@ -3769,19 +3785,18 @@ void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); locations->SetOut(Location::RegisterLocation(EAX)); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -3798,13 +3813,13 @@ void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) { void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); - // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -4032,7 +4047,7 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOr Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp) { DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); - Location location = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); if (!invoke->GetLocations()->Intrinsified()) { return location.AsRegister<Register>(); } @@ -4063,7 +4078,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(invoke->GetStringInitOffset())); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress())); @@ -4084,7 +4099,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register method_reg; Register reg = temp.AsRegister<Register>(); if (current_method.IsRegister()) { @@ -4856,7 +4871,7 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); // Possibly used for read barrier too. + locations->AddTemp(Location::RegisterLocation(ECX)); } } @@ -5503,6 +5518,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -5524,7 +5540,6 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { __ movl(out, Address(current_method, declaring_class_offset)); } } else { - DCHECK(cls->CanCallRuntime()); // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ movl(out, Address(current_method, @@ -5541,15 +5556,22 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { __ movl(out, Address(out, cache_offset)); } - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + + if (!cls->IsInDexCache()) { + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + } + + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -5661,6 +5683,7 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { @@ -6150,6 +6173,11 @@ void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instr instruction, instruction->GetDexPc(), nullptr); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 4088160b3f..534ee1c5ab 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -65,6 +65,7 @@ class NullCheckSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -91,6 +92,7 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -149,6 +151,7 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); @@ -203,6 +206,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } bool IsFatal() const OVERRIDE { return true; } @@ -240,6 +244,11 @@ class LoadClassSlowPathX86_64 : public SlowPathCode { at_, dex_pc_, this); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + } Location out = locations->Out(); // Move the class to the desired location. @@ -290,6 +299,7 @@ class LoadStringSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); @@ -386,6 +396,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { deoptimize, deoptimize->GetDexPc(), this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } @@ -428,6 +439,7 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -718,7 +730,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo Address::Absolute(invoke->GetStringInitOffset(), true)); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress())); @@ -737,7 +749,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo __ Bind(&pc_relative_dex_cache_patches_.back().label); break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register method_reg; CpuRegister reg = temp.AsRegister<CpuRegister>(); if (current_method.IsRegister()) { @@ -3765,22 +3777,19 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(Location::RegisterLocation(RAX)); } void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), - instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -3799,13 +3808,13 @@ void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), instruction->GetTypeIndex()); - // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -4500,8 +4509,6 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { // This first temporary register is possibly used for heap // reference poisoning and/or read barrier emission too. locations->AddTemp(Location::RequiresRegister()); - // This second temporary register is possibly used for read - // barrier emission too. locations->AddTemp(Location::RequiresRegister()); } } @@ -5129,6 +5136,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -5150,7 +5158,6 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { __ movl(out, Address(current_method, declaring_class_offset)); } } else { - DCHECK(cls->CanCallRuntime()); // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ movq(out, Address(current_method, @@ -5167,15 +5174,20 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { __ movl(out, Address(out, cache_offset)); } - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -5278,6 +5290,7 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { @@ -5772,6 +5785,11 @@ void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* in instruction, instruction->GetDexPc(), nullptr); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index e1a8c9cc0f..af8b8b562a 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_ #define ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_ +#include "code_generator.h" #include "locations.h" #include "nodes.h" #include "utils/arm64/assembler_arm64.h" @@ -255,6 +256,67 @@ static inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers, return true; } +static inline vixl::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) { + switch (op_kind) { + case HArm64DataProcWithShifterOp::kASR: return vixl::ASR; + case HArm64DataProcWithShifterOp::kLSL: return vixl::LSL; + case HArm64DataProcWithShifterOp::kLSR: return vixl::LSR; + default: + LOG(FATAL) << "Unexpected op kind " << op_kind; + UNREACHABLE(); + return vixl::NO_SHIFT; + } +} + +static inline vixl::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) { + switch (op_kind) { + case HArm64DataProcWithShifterOp::kUXTB: return vixl::UXTB; + case HArm64DataProcWithShifterOp::kUXTH: return vixl::UXTH; + case HArm64DataProcWithShifterOp::kUXTW: return vixl::UXTW; + case HArm64DataProcWithShifterOp::kSXTB: return vixl::SXTB; + case HArm64DataProcWithShifterOp::kSXTH: return vixl::SXTH; + case HArm64DataProcWithShifterOp::kSXTW: return vixl::SXTW; + default: + LOG(FATAL) << "Unexpected op kind " << op_kind; + UNREACHABLE(); + return vixl::NO_EXTEND; + } +} + +static inline bool CanFitInShifterOperand(HInstruction* instruction) { + if (instruction->IsTypeConversion()) { + HTypeConversion* conversion = instruction->AsTypeConversion(); + Primitive::Type result_type = conversion->GetResultType(); + Primitive::Type input_type = conversion->GetInputType(); + // We don't expect to see the same type as input and result. + return Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type) && + (result_type != input_type); + } else { + return (instruction->IsShl() && instruction->AsShl()->InputAt(1)->IsIntConstant()) || + (instruction->IsShr() && instruction->AsShr()->InputAt(1)->IsIntConstant()) || + (instruction->IsUShr() && instruction->AsUShr()->InputAt(1)->IsIntConstant()); + } +} + +static inline bool HasShifterOperand(HInstruction* instr) { + // `neg` instructions are an alias of `sub` using the zero register as the + // first register input. + bool res = instr->IsAdd() || instr->IsAnd() || instr->IsNeg() || + instr->IsOr() || instr->IsSub() || instr->IsXor(); + return res; +} + +static inline bool ShifterOperandSupportsExtension(HInstruction* instruction) { + DCHECK(HasShifterOperand(instruction)); + // Although the `neg` instruction is an alias of the `sub` instruction, `HNeg` + // does *not* support extension. This is because the `extended register` form + // of the `sub` instruction interprets the left register with code 31 as the + // stack pointer and not the zero register. (So does the `immediate` form.) In + // the other form `shifted register, the register with code 31 is interpreted + // as the zero register. + return instruction->IsAdd() || instruction->IsSub(); +} + } // namespace helpers } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 5814d7556f..b3b09d2155 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -735,26 +735,29 @@ void SSAChecker::VisitPhi(HPhi* phi) { } } - // Test phi equivalents. There should not be two of the same type and they - // should only be created for constants which were untyped in DEX. - for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis()); !phi_it.Done(); phi_it.Advance()) { - HPhi* other_phi = phi_it.Current()->AsPhi(); - if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) { - if (phi->GetType() == other_phi->GetType()) { - std::stringstream type_str; - type_str << phi->GetType(); - AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.", - phi->GetId(), - phi->GetRegNumber(), - type_str.str().c_str())); - } else { - ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true); - if (!IsConstantEquivalent(phi, other_phi, &visited)) { - AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they " - "are not equivalents of constants.", + // Test phi equivalents. There should not be two of the same type and they should only be + // created for constants which were untyped in DEX. Note that this test can be skipped for + // a synthetic phi (indicated by lack of a virtual register). + if (phi->GetRegNumber() != kNoRegNumber) { + for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + HPhi* other_phi = phi_it.Current()->AsPhi(); + if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) { + if (phi->GetType() == other_phi->GetType()) { + std::stringstream type_str; + type_str << phi->GetType(); + AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.", phi->GetId(), - other_phi->GetId(), - phi->GetRegNumber())); + phi->GetRegNumber(), + type_str.str().c_str())); + } else { + ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true); + if (!IsConstantEquivalent(phi, other_phi, &visited)) { + AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they " + "are not equivalents of constants.", + phi->GetId(), + other_phi->GetId(), + phi->GetRegNumber())); + } } } } diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 2b7790184a..48bcd10b10 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -397,6 +397,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << invoke->IsRecursive() << std::noboolalpha; StartAttributeStream("intrinsic") << invoke->GetIntrinsic(); + if (invoke->IsStatic()) { + StartAttributeStream("clinit_check") << invoke->GetClinitCheckRequirement(); + } } void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE { @@ -419,6 +422,19 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit"); } +#ifdef ART_ENABLE_CODEGEN_arm64 + void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE { + StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind(); + if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) { + StartAttributeStream("shift") << instruction->GetShiftAmount(); + } + } + + void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE { + StartAttributeStream("kind") << instruction->GetOpKind(); + } +#endif + bool IsPass(const char* name) { return strcmp(pass_name_, name) == 0; } @@ -500,6 +516,18 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; } else if (instruction->IsLoadClass()) { StartAttributeStream("klass") << "unresolved"; + } else if (instruction->IsNullConstant()) { + // The NullConstant may be added to the graph during other passes that happen between + // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner + // doesn't run or doesn't inline anything, the NullConstant remains untyped. + // So we should check NullConstants for validity only after reference type propagation. + // + // Note: The infrastructure to properly type NullConstants everywhere is to complex to add + // for the benefits. + StartAttributeStream("klass") << "not_set"; + DCHECK(!is_after_pass_ + || !IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName)) + << " Expected a valid rti after reference type propagation"; } else { DCHECK(!is_after_pass_) << "Expected a valid rti after reference type propagation"; diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index c36de84064..4af111b784 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -377,9 +377,10 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { HInstruction* current = block->GetFirstInstruction(); while (current != nullptr) { - set->Kill(current->GetSideEffects()); // Save the next instruction in case `current` is removed from the graph. HInstruction* next = current->GetNext(); + // Do not kill the set with the side effects of the instruction just now: if + // the instruction is GVN'ed, we don't need to kill. if (current->CanBeMoved()) { if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) { // For commutative ops, (x op y) will be treated the same as (y op x) @@ -395,8 +396,11 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { current->ReplaceWith(existing); current->GetBlock()->RemoveInstruction(current); } else { + set->Kill(current->GetSideEffects()); set->Add(current); } + } else { + set->Kill(current->GetSideEffects()); } current = next; } diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index b97dc1a511..2f3df7fc68 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -169,16 +169,6 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { // src instruction->ReplaceWith(input_other); instruction->GetBlock()->RemoveInstruction(instruction); - } else if (instruction->IsShl() && input_cst->IsOne()) { - // Replace Shl looking like - // SHL dst, src, 1 - // with - // ADD dst, src, src - HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(), - input_other, - input_other); - instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add); - RecordSimplification(); } } } @@ -372,9 +362,8 @@ void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) { block->RemoveInstruction(equal); RecordSimplification(); } else if (input_const->AsIntConstant()->IsZero()) { - // Replace (bool_value == false) with !bool_value - block->ReplaceAndRemoveInstructionWith( - equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value)); + equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, equal)); + block->RemoveInstruction(equal); RecordSimplification(); } else { // Replace (bool_value == integer_not_zero_nor_one_constant) with false @@ -399,9 +388,8 @@ void InstructionSimplifierVisitor::VisitNotEqual(HNotEqual* not_equal) { // We are comparing the boolean to a constant which is of type int and can // be any constant. if (input_const->AsIntConstant()->IsOne()) { - // Replace (bool_value != true) with !bool_value - block->ReplaceAndRemoveInstructionWith( - not_equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value)); + not_equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, not_equal)); + block->RemoveInstruction(not_equal); RecordSimplification(); } else if (input_const->AsIntConstant()->IsZero()) { // Replace (bool_value != false) with bool_value @@ -796,6 +784,34 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { HShl* shl = new(allocator) HShl(type, input_other, shift); block->ReplaceAndRemoveInstructionWith(instruction, shl); RecordSimplification(); + } else if (IsPowerOfTwo(factor - 1)) { + // Transform code looking like + // MUL dst, src, (2^n + 1) + // into + // SHL tmp, src, n + // ADD dst, src, tmp + HShl* shl = new (allocator) HShl(type, + input_other, + GetGraph()->GetIntConstant(WhichPowerOf2(factor - 1))); + HAdd* add = new (allocator) HAdd(type, input_other, shl); + + block->InsertInstructionBefore(shl, instruction); + block->ReplaceAndRemoveInstructionWith(instruction, add); + RecordSimplification(); + } else if (IsPowerOfTwo(factor + 1)) { + // Transform code looking like + // MUL dst, src, (2^n - 1) + // into + // SHL tmp, src, n + // SUB dst, tmp, src + HShl* shl = new (allocator) HShl(type, + input_other, + GetGraph()->GetIntConstant(WhichPowerOf2(factor + 1))); + HSub* sub = new (allocator) HSub(type, shl, input_other); + + block->InsertInstructionBefore(shl, instruction); + block->ReplaceAndRemoveInstructionWith(instruction, sub); + RecordSimplification(); } } } diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index eb79f469eb..6a34b13320 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -16,11 +16,16 @@ #include "instruction_simplifier_arm64.h" +#include "common_arm64.h" #include "mirror/array-inl.h" namespace art { namespace arm64 { +using helpers::CanFitInShifterOperand; +using helpers::HasShifterOperand; +using helpers::ShifterOperandSupportsExtension; + void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access, HInstruction* array, HInstruction* index, @@ -62,6 +67,169 @@ void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstructio RecordSimplification(); } +bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use, + HInstruction* bitfield_op, + bool do_merge) { + DCHECK(HasShifterOperand(use)); + DCHECK(use->IsBinaryOperation() || use->IsNeg()); + DCHECK(CanFitInShifterOperand(bitfield_op)); + DCHECK(!bitfield_op->HasEnvironmentUses()); + + Primitive::Type type = use->GetType(); + if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) { + return false; + } + + HInstruction* left; + HInstruction* right; + if (use->IsBinaryOperation()) { + left = use->InputAt(0); + right = use->InputAt(1); + } else { + DCHECK(use->IsNeg()); + right = use->AsNeg()->InputAt(0); + left = GetGraph()->GetConstant(right->GetType(), 0); + } + DCHECK(left == bitfield_op || right == bitfield_op); + + if (left == right) { + // TODO: Handle special transformations in this situation? + // For example should we transform `(x << 1) + (x << 1)` into `(x << 2)`? + // Or should this be part of a separate transformation logic? + return false; + } + + bool is_commutative = use->IsBinaryOperation() && use->AsBinaryOperation()->IsCommutative(); + HInstruction* other_input; + if (bitfield_op == right) { + other_input = left; + } else { + if (is_commutative) { + other_input = right; + } else { + return false; + } + } + + HArm64DataProcWithShifterOp::OpKind op_kind; + int shift_amount = 0; + HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount); + + if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind) && + !ShifterOperandSupportsExtension(use)) { + return false; + } + + if (do_merge) { + HArm64DataProcWithShifterOp* alu_with_op = + new (GetGraph()->GetArena()) HArm64DataProcWithShifterOp(use, + other_input, + bitfield_op->InputAt(0), + op_kind, + shift_amount, + use->GetDexPc()); + use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op); + if (bitfield_op->GetUses().IsEmpty()) { + bitfield_op->GetBlock()->RemoveInstruction(bitfield_op); + } + RecordSimplification(); + } + + return true; +} + +// Merge a bitfield move instruction into its uses if it can be merged in all of them. +bool InstructionSimplifierArm64Visitor::TryMergeIntoUsersShifterOperand(HInstruction* bitfield_op) { + DCHECK(CanFitInShifterOperand(bitfield_op)); + + if (bitfield_op->HasEnvironmentUses()) { + return false; + } + + const HUseList<HInstruction*>& uses = bitfield_op->GetUses(); + + // Check whether we can merge the instruction in all its users' shifter operand. + for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) { + HInstruction* use = it_use.Current()->GetUser(); + if (!HasShifterOperand(use)) { + return false; + } + if (!CanMergeIntoShifterOperand(use, bitfield_op)) { + return false; + } + } + + // Merge the instruction into its uses. + for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) { + HInstruction* use = it_use.Current()->GetUser(); + bool merged = MergeIntoShifterOperand(use, bitfield_op); + DCHECK(merged); + } + + return true; +} + +bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns( + HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) { + DCHECK(Primitive::IsIntOrLongType(mul->GetType())); + DCHECK(input_binop->IsAdd() || input_binop->IsSub()); + DCHECK_NE(input_binop, input_other); + if (!input_binop->HasOnlyOneNonEnvironmentUse()) { + return false; + } + + // Try to interpret patterns like + // a * (b <+/-> 1) + // as + // (a * b) <+/-> a + HInstruction* input_a = input_other; + HInstruction* input_b = nullptr; // Set to a non-null value if we found a pattern to optimize. + HInstruction::InstructionKind op_kind; + + if (input_binop->IsAdd()) { + if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) { + // Interpret + // a * (b + 1) + // as + // (a * b) + a + input_b = input_binop->GetLeastConstantLeft(); + op_kind = HInstruction::kAdd; + } + } else { + DCHECK(input_binop->IsSub()); + if (input_binop->GetRight()->IsConstant() && + input_binop->GetRight()->AsConstant()->IsMinusOne()) { + // Interpret + // a * (b - (-1)) + // as + // a + (a * b) + input_b = input_binop->GetLeft(); + op_kind = HInstruction::kAdd; + } else if (input_binop->GetLeft()->IsConstant() && + input_binop->GetLeft()->AsConstant()->IsOne()) { + // Interpret + // a * (1 - b) + // as + // a - (a * b) + input_b = input_binop->GetRight(); + op_kind = HInstruction::kSub; + } + } + + if (input_b == nullptr) { + // We did not find a pattern we can optimize. + return false; + } + + HArm64MultiplyAccumulate* mulacc = new(GetGraph()->GetArena()) HArm64MultiplyAccumulate( + mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc()); + + mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc); + input_binop->GetBlock()->RemoveInstruction(input_binop); + + return false; +} + void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { TryExtractArrayAccessAddress(instruction, instruction->GetArray(), @@ -76,5 +244,110 @@ void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) { Primitive::ComponentSize(instruction->GetComponentType())); } +void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) { + Primitive::Type type = instruction->GetType(); + if (!Primitive::IsIntOrLongType(type)) { + return; + } + + HInstruction* use = instruction->HasNonEnvironmentUses() + ? instruction->GetUses().GetFirst()->GetUser() + : nullptr; + + if (instruction->HasOnlyOneNonEnvironmentUse() && (use->IsAdd() || use->IsSub())) { + // Replace code looking like + // MUL tmp, x, y + // SUB dst, acc, tmp + // with + // MULSUB dst, acc, x, y + // Note that we do not want to (unconditionally) perform the merge when the + // multiplication has multiple uses and it can be merged in all of them. + // Multiple uses could happen on the same control-flow path, and we would + // then increase the amount of work. In the future we could try to evaluate + // whether all uses are on different control-flow paths (using dominance and + // reverse-dominance information) and only perform the merge when they are. + HInstruction* accumulator = nullptr; + HBinaryOperation* binop = use->AsBinaryOperation(); + HInstruction* binop_left = binop->GetLeft(); + HInstruction* binop_right = binop->GetRight(); + // Be careful after GVN. This should not happen since the `HMul` has only + // one use. + DCHECK_NE(binop_left, binop_right); + if (binop_right == instruction) { + accumulator = binop_left; + } else if (use->IsAdd()) { + DCHECK_EQ(binop_left, instruction); + accumulator = binop_right; + } + + if (accumulator != nullptr) { + HArm64MultiplyAccumulate* mulacc = + new (GetGraph()->GetArena()) HArm64MultiplyAccumulate(type, + binop->GetKind(), + accumulator, + instruction->GetLeft(), + instruction->GetRight()); + + binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); + DCHECK(!instruction->HasUses()); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } + } + + // Use multiply accumulate instruction for a few simple patterns. + // We prefer not applying the following transformations if the left and + // right inputs perform the same operation. + // We rely on GVN having squashed the inputs if appropriate. However the + // results are still correct even if that did not happen. + if (instruction->GetLeft() == instruction->GetRight()) { + return; + } + + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + if ((right->IsAdd() || right->IsSub()) && + TrySimpleMultiplyAccumulatePatterns(instruction, right->AsBinaryOperation(), left)) { + return; + } + if ((left->IsAdd() || left->IsSub()) && + TrySimpleMultiplyAccumulatePatterns(instruction, left->AsBinaryOperation(), right)) { + return; + } +} + +void InstructionSimplifierArm64Visitor::VisitShl(HShl* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArm64Visitor::VisitShr(HShr* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArm64Visitor::VisitTypeConversion(HTypeConversion* instruction) { + Primitive::Type result_type = instruction->GetResultType(); + Primitive::Type input_type = instruction->GetInputType(); + + if (input_type == result_type) { + // We let the arch-independent code handle this. + return; + } + + if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArm64Visitor::VisitUShr(HUShr* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 4b697dba0e..b7f490bb8c 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -39,9 +39,30 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { HInstruction* array, HInstruction* index, int access_size); + bool TryMergeIntoUsersShifterOperand(HInstruction* instruction); + bool TryMergeIntoShifterOperand(HInstruction* use, + HInstruction* bitfield_op, + bool do_merge); + bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { + return TryMergeIntoShifterOperand(use, bitfield_op, false); + } + bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { + DCHECK(CanMergeIntoShifterOperand(use, bitfield_op)); + return TryMergeIntoShifterOperand(use, bitfield_op, true); + } + + bool TrySimpleMultiplyAccumulatePatterns(HMul* mul, + HBinaryOperation* input_binop, + HInstruction* input_other); + // HInstruction visitors, sorted alphabetically. void VisitArrayGet(HArrayGet* instruction) OVERRIDE; void VisitArraySet(HArraySet* instruction) OVERRIDE; + void VisitMul(HMul* instruction) OVERRIDE; + void VisitShl(HShl* instruction) OVERRIDE; + void VisitShr(HShr* instruction) OVERRIDE; + void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; + void VisitUShr(HUShr* instruction) OVERRIDE; OptimizingCompilerStats* stats_; }; diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 0a5acc3e64..d2017da221 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -44,7 +44,23 @@ using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitor bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) { Dispatch(invoke); LocationSummary* res = invoke->GetLocations(); - return res != nullptr && res->Intrinsified(); + if (res == nullptr) { + return false; + } + if (kEmitCompilerReadBarrier && res->CanCall()) { + // Generating an intrinsic for this HInvoke may produce an + // IntrinsicSlowPathARM slow path. Currently this approach + // does not work when using read barriers, as the emitted + // calling sequence will make use of another slow path + // (ReadBarrierForRootSlowPathARM for HInvokeStaticOrDirect, + // ReadBarrierSlowPathARM for HInvokeVirtual). So we bail + // out in this case. + // + // TODO: Find a way to have intrinsics work with read barriers. + invoke->SetLocations(nullptr); + return false; + } + return res->Intrinsified(); } #define __ assembler-> @@ -662,20 +678,23 @@ static void GenUnsafeGet(HInvoke* invoke, (type == Primitive::kPrimLong) || (type == Primitive::kPrimNot)); ArmAssembler* assembler = codegen->GetAssembler(); - Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. - Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Long offset, lo part only. + Location base_loc = locations->InAt(1); + Register base = base_loc.AsRegister<Register>(); // Object pointer. + Location offset_loc = locations->InAt(2); + Register offset = offset_loc.AsRegisterPairLow<Register>(); // Long offset, lo part only. + Location trg_loc = locations->Out(); if (type == Primitive::kPrimLong) { - Register trg_lo = locations->Out().AsRegisterPairLow<Register>(); + Register trg_lo = trg_loc.AsRegisterPairLow<Register>(); __ add(IP, base, ShifterOperand(offset)); if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { - Register trg_hi = locations->Out().AsRegisterPairHigh<Register>(); + Register trg_hi = trg_loc.AsRegisterPairHigh<Register>(); __ ldrexd(trg_lo, trg_hi, IP); } else { __ ldrd(trg_lo, Address(IP)); } } else { - Register trg = locations->Out().AsRegister<Register>(); + Register trg = trg_loc.AsRegister<Register>(); __ ldr(trg, Address(base, offset)); } @@ -684,14 +703,18 @@ static void GenUnsafeGet(HInvoke* invoke, } if (type == Primitive::kPrimNot) { - Register trg = locations->Out().AsRegister<Register>(); - __ MaybeUnpoisonHeapReference(trg); + codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); } } static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + can_call ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -936,6 +959,7 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ Bind(&loop_head); __ ldrex(tmp_lo, tmp_ptr); + // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo)); @@ -964,7 +988,11 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic does not always work when heap // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it // off temporarily as a quick fix. + // // TODO(rpl): Fix it and turn it back on. + // + // TODO(rpl): Also, we should investigate whether we need a read + // barrier in the generated code. if (kPoisonHeapReferences) { return; } @@ -1400,6 +1428,10 @@ static void CheckPosition(ArmAssembler* assembler, } } +// TODO: Implement read barriers in the SystemArrayCopy intrinsic. +// Note that this code path is not used (yet) because we do not +// intrinsify methods that can go into the IntrinsicSlowPathARM +// slow path. void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { ArmAssembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 059abf090d..b04dcceb05 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -143,7 +143,23 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) { Dispatch(invoke); LocationSummary* res = invoke->GetLocations(); - return res != nullptr && res->Intrinsified(); + if (res == nullptr) { + return false; + } + if (kEmitCompilerReadBarrier && res->CanCall()) { + // Generating an intrinsic for this HInvoke may produce an + // IntrinsicSlowPathARM64 slow path. Currently this approach + // does not work when using read barriers, as the emitted + // calling sequence will make use of another slow path + // (ReadBarrierForRootSlowPathARM64 for HInvokeStaticOrDirect, + // ReadBarrierSlowPathARM64 for HInvokeVirtual). So we bail + // out in this case. + // + // TODO: Find a way to have intrinsics work with read barriers. + invoke->SetLocations(nullptr); + return false; + } + return res->Intrinsified(); } #define __ masm-> @@ -818,9 +834,12 @@ static void GenUnsafeGet(HInvoke* invoke, (type == Primitive::kPrimLong) || (type == Primitive::kPrimNot)); vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_; - Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. - Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. - Register trg = RegisterFrom(locations->Out(), type); + Location base_loc = locations->InAt(1); + Register base = WRegisterFrom(base_loc); // Object pointer. + Location offset_loc = locations->InAt(2); + Register offset = XRegisterFrom(offset_loc); // Long offset. + Location trg_loc = locations->Out(); + Register trg = RegisterFrom(trg_loc, type); bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease(); MemOperand mem_op(base.X(), offset); @@ -837,13 +856,18 @@ static void GenUnsafeGet(HInvoke* invoke, if (type == Primitive::kPrimNot) { DCHECK(trg.IsW()); - codegen->GetAssembler()->MaybeUnpoisonHeapReference(trg); + codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); } } static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + can_call ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -1057,6 +1081,9 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat if (use_acquire_release) { __ Bind(&loop_head); __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); + // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); @@ -1065,6 +1092,9 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ Dmb(InnerShareable, BarrierWrites); __ Bind(&loop_head); __ Ldxr(tmp_value, MemOperand(tmp_ptr)); + // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); __ Stxr(tmp_32, value, MemOperand(tmp_ptr)); @@ -1090,7 +1120,11 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic does not always work when heap // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it // off temporarily as a quick fix. + // // TODO(rpl): Fix it and turn it back on. + // + // TODO(rpl): Also, we should investigate whether we need a read + // barrier in the generated code. if (kPoisonHeapReferences) { return; } diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index a94e3a8c23..326844526e 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -138,6 +138,221 @@ bool IntrinsicLocationsBuilderMIPS::TryDispatch(HInvoke* invoke) { #define __ assembler-> +static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void MoveFPToInt(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { + FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); + + if (is64bit) { + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + __ Mfc1(out_lo, in); + __ Mfhc1(out_hi, in); + } else { + Register out = locations->Out().AsRegister<Register>(); + + __ Mfc1(out, in); + } +} + +// long java.lang.Double.doubleToRawLongBits(double) +void IntrinsicLocationsBuilderMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); +} + +// int java.lang.Float.floatToRawIntBits(float) +void IntrinsicLocationsBuilderMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +static void MoveIntToFP(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + + if (is64bit) { + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + + __ Mtc1(in_lo, out); + __ Mthc1(in_hi, out); + } else { + Register in = locations->InAt(0).AsRegister<Register>(); + + __ Mtc1(in, out); + } +} + +// double java.lang.Double.longBitsToDouble(long) +void IntrinsicLocationsBuilderMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); +} + +// float java.lang.Float.intBitsToFloat(int) +void IntrinsicLocationsBuilderMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +static void GenReverseBytes(LocationSummary* locations, + Primitive::Type type, + MipsAssembler* assembler, + bool isR2OrNewer) { + DCHECK(type == Primitive::kPrimShort || + type == Primitive::kPrimInt || + type == Primitive::kPrimLong); + + if (type == Primitive::kPrimShort) { + Register in = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (isR2OrNewer) { + __ Wsbh(out, in); + __ Seh(out, out); + } else { + __ Sll(TMP, in, 24); + __ Sra(TMP, TMP, 16); + __ Sll(out, in, 16); + __ Srl(out, out, 24); + __ Or(out, out, TMP); + } + } else if (type == Primitive::kPrimInt) { + Register in = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (isR2OrNewer) { + __ Rotr(out, in, 16); + __ Wsbh(out, out); + } else { + // MIPS32r1 + // __ Rotr(out, in, 16); + __ Sll(TMP, in, 16); + __ Srl(out, in, 16); + __ Or(out, out, TMP); + // __ Wsbh(out, out); + __ LoadConst32(AT, 0x00FF00FF); + __ And(TMP, out, AT); + __ Sll(TMP, TMP, 8); + __ Srl(out, out, 8); + __ And(out, out, AT); + __ Or(out, out, TMP); + } + } else if (type == Primitive::kPrimLong) { + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + if (isR2OrNewer) { + __ Rotr(AT, in_hi, 16); + __ Rotr(TMP, in_lo, 16); + __ Wsbh(out_lo, AT); + __ Wsbh(out_hi, TMP); + } else { + // When calling CreateIntToIntLocations() we promised that the + // use of the out_lo/out_hi wouldn't overlap with the use of + // in_lo/in_hi. Be very careful not to write to out_lo/out_hi + // until we're completely done reading from in_lo/in_hi. + // __ Rotr(TMP, in_lo, 16); + __ Sll(TMP, in_lo, 16); + __ Srl(AT, in_lo, 16); + __ Or(TMP, TMP, AT); // Hold in TMP until it's safe + // to write to out_hi. + // __ Rotr(out_lo, in_hi, 16); + __ Sll(AT, in_hi, 16); + __ Srl(out_lo, in_hi, 16); // Here we are finally done reading + // from in_lo/in_hi so it's okay to + // write to out_lo/out_hi. + __ Or(out_lo, out_lo, AT); + // __ Wsbh(out_hi, out_hi); + __ LoadConst32(AT, 0x00FF00FF); + __ And(out_hi, TMP, AT); + __ Sll(out_hi, out_hi, 8); + __ Srl(TMP, TMP, 8); + __ And(TMP, TMP, AT); + __ Or(out_hi, out_hi, TMP); + // __ Wsbh(out_lo, out_lo); + __ And(TMP, out_lo, AT); // AT already holds the correct mask value + __ Sll(TMP, TMP, 8); + __ Srl(out_lo, out_lo, 8); + __ And(out_lo, out_lo, AT); + __ Or(out_lo, out_lo, TMP); + } + } +} + +// int java.lang.Integer.reverseBytes(int) +void IntrinsicLocationsBuilderMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), + Primitive::kPrimInt, + GetAssembler(), + codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2()); +} + +// long java.lang.Long.reverseBytes(long) +void IntrinsicLocationsBuilderMIPS::VisitLongReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), + Primitive::kPrimLong, + GetAssembler(), + codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2()); +} + +// short java.lang.Short.reverseBytes(short) +void IntrinsicLocationsBuilderMIPS::VisitShortReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), + Primitive::kPrimShort, + GetAssembler(), + codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2()); +} + // boolean java.lang.String.equals(Object anObject) void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, @@ -250,15 +465,8 @@ void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) UNIMPLEMENTED_INTRINSIC(IntegerReverse) UNIMPLEMENTED_INTRINSIC(LongReverse) -UNIMPLEMENTED_INTRINSIC(ShortReverseBytes) -UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes) -UNIMPLEMENTED_INTRINSIC(LongReverseBytes) UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros) UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros) -UNIMPLEMENTED_INTRINSIC(FloatIntBitsToFloat) -UNIMPLEMENTED_INTRINSIC(DoubleLongBitsToDouble) -UNIMPLEMENTED_INTRINSIC(FloatFloatToRawIntBits) -UNIMPLEMENTED_INTRINSIC(DoubleDoubleToRawLongBits) UNIMPLEMENTED_INTRINSIC(MathAbsDouble) UNIMPLEMENTED_INTRINSIC(MathAbsFloat) UNIMPLEMENTED_INTRINSIC(MathAbsInt) diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index ff843ebb1e..ecee11dea6 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -115,7 +115,7 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, invoke_->GetLocations()); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS64"; } @@ -806,7 +806,7 @@ static void GenRoundingMode(LocationSummary* locations, DCHECK_NE(in, out); - Label done; + Mips64Label done; // double floor/ceil(double in) { // if in.isNaN || in.isInfinite || in.isZero { @@ -1256,7 +1256,7 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); // result = tmp_value != 0; - Label loop_head, exit_loop; + Mips64Label loop_head, exit_loop; __ Daddu(TMP, base, offset); __ Sync(0); __ Bind(&loop_head); @@ -1391,6 +1391,108 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +// boolean java.lang.String.equals(Object anObject) +void IntrinsicLocationsBuilderMIPS64::VisitStringEquals(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); + + // Temporary registers to store lengths of strings and for calculations. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + GpuRegister str = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister arg = locations->InAt(1).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + GpuRegister temp1 = locations->GetTemp(0).AsRegister<GpuRegister>(); + GpuRegister temp2 = locations->GetTemp(1).AsRegister<GpuRegister>(); + GpuRegister temp3 = locations->GetTemp(2).AsRegister<GpuRegister>(); + + Mips64Label loop; + Mips64Label end; + Mips64Label return_true; + Mips64Label return_false; + + // Get offsets of count, value, and class fields within a string object. + const int32_t count_offset = mirror::String::CountOffset().Int32Value(); + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + const int32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // If the register containing the pointer to "this", and the register + // containing the pointer to "anObject" are the same register then + // "this", and "anObject" are the same object and we can + // short-circuit the logic to a true result. + if (str == arg) { + __ LoadConst64(out, 1); + return; + } + + // Check if input is null, return false if it is. + __ Beqzc(arg, &return_false); + + // Reference equality check, return true if same reference. + __ Beqc(str, arg, &return_true); + + // Instanceof check for the argument by comparing class fields. + // All string objects must have the same type since String cannot be subclassed. + // Receiver must be a string object, so its class field is equal to all strings' class fields. + // If the argument is a string object, its class field must be equal to receiver's class field. + __ Lw(temp1, str, class_offset); + __ Lw(temp2, arg, class_offset); + __ Bnec(temp1, temp2, &return_false); + + // Load lengths of this and argument strings. + __ Lw(temp1, str, count_offset); + __ Lw(temp2, arg, count_offset); + // Check if lengths are equal, return false if they're not. + __ Bnec(temp1, temp2, &return_false); + // Return true if both strings are empty. + __ Beqzc(temp1, &return_true); + + // Don't overwrite input registers + __ Move(TMP, str); + __ Move(temp3, arg); + + // Assertions that must hold in order to compare strings 4 characters at a time. + DCHECK_ALIGNED(value_offset, 8); + static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); + + // Loop to compare strings 4 characters at a time starting at the beginning of the string. + // Ok to do this because strings are zero-padded to be 8-byte aligned. + __ Bind(&loop); + __ Ld(out, TMP, value_offset); + __ Ld(temp2, temp3, value_offset); + __ Bnec(out, temp2, &return_false); + __ Daddiu(TMP, TMP, 8); + __ Daddiu(temp3, temp3, 8); + __ Addiu(temp1, temp1, -4); + __ Bgtzc(temp1, &loop); + + // Return true and exit the function. + // If loop does not result in returning false, we return true. + __ Bind(&return_true); + __ LoadConst64(out, 1); + __ Bc(&end); + + // Return false and exit the function. + __ Bind(&return_false); + __ LoadConst64(out, 0); + __ Bind(&end); +} + static void GenerateStringIndexOf(HInvoke* invoke, Mips64Assembler* assembler, CodeGeneratorMIPS64* codegen, @@ -1412,7 +1514,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, // full slow-path down and branch unconditionally. slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke); codegen->AddSlowPath(slow_path); - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); return; } @@ -1586,8 +1688,6 @@ void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(MathRoundFloat) -UNIMPLEMENTED_INTRINSIC(StringEquals) - UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 068d5db69c..5b89cfef5a 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -738,8 +738,6 @@ class LSEVisitor : public HGraphVisitor { } if (same_value || possibly_redundant) { possibly_removed_stores_.push_back(instruction); - // Same-value/singleton-field store shouldn't have a null check. - DCHECK(!ref->InputAt(0)->IsNullCheck()); } if (!same_value) { diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 73a44ee2cb..b5ac773505 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1890,7 +1890,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { * | * if_block * / \ - * dummy_block deopt_block + * true_block false_block * \ / * new_pre_header * | @@ -1898,62 +1898,73 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { */ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetDominator(); + HBasicBlock* old_pre_header = header->GetDominator(); - // Need this to avoid critical edge. + // Need extra block to avoid critical edge. HBasicBlock* if_block = new (arena_) HBasicBlock(this, header->GetDexPc()); - // Need this to avoid critical edge. - HBasicBlock* dummy_block = new (arena_) HBasicBlock(this, header->GetDexPc()); - HBasicBlock* deopt_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* true_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* false_block = new (arena_) HBasicBlock(this, header->GetDexPc()); HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); AddBlock(if_block); - AddBlock(dummy_block); - AddBlock(deopt_block); + AddBlock(true_block); + AddBlock(false_block); AddBlock(new_pre_header); - header->ReplacePredecessor(pre_header, new_pre_header); - pre_header->successors_.clear(); - pre_header->dominated_blocks_.clear(); - - pre_header->AddSuccessor(if_block); - if_block->AddSuccessor(dummy_block); // True successor - if_block->AddSuccessor(deopt_block); // False successor - dummy_block->AddSuccessor(new_pre_header); - deopt_block->AddSuccessor(new_pre_header); - - pre_header->dominated_blocks_.push_back(if_block); - if_block->SetDominator(pre_header); - if_block->dominated_blocks_.push_back(dummy_block); - dummy_block->SetDominator(if_block); - if_block->dominated_blocks_.push_back(deopt_block); - deopt_block->SetDominator(if_block); + header->ReplacePredecessor(old_pre_header, new_pre_header); + old_pre_header->successors_.clear(); + old_pre_header->dominated_blocks_.clear(); + + old_pre_header->AddSuccessor(if_block); + if_block->AddSuccessor(true_block); // True successor + if_block->AddSuccessor(false_block); // False successor + true_block->AddSuccessor(new_pre_header); + false_block->AddSuccessor(new_pre_header); + + old_pre_header->dominated_blocks_.push_back(if_block); + if_block->SetDominator(old_pre_header); + if_block->dominated_blocks_.push_back(true_block); + true_block->SetDominator(if_block); + if_block->dominated_blocks_.push_back(false_block); + false_block->SetDominator(if_block); if_block->dominated_blocks_.push_back(new_pre_header); new_pre_header->SetDominator(if_block); new_pre_header->dominated_blocks_.push_back(header); header->SetDominator(new_pre_header); + // Fix reverse post order. size_t index_of_header = IndexOfElement(reverse_post_order_, header); MakeRoomFor(&reverse_post_order_, 4, index_of_header - 1); reverse_post_order_[index_of_header++] = if_block; - reverse_post_order_[index_of_header++] = dummy_block; - reverse_post_order_[index_of_header++] = deopt_block; + reverse_post_order_[index_of_header++] = true_block; + reverse_post_order_[index_of_header++] = false_block; reverse_post_order_[index_of_header++] = new_pre_header; - HLoopInformation* info = pre_header->GetLoopInformation(); - if (info != nullptr) { - if_block->SetLoopInformation(info); - dummy_block->SetLoopInformation(info); - deopt_block->SetLoopInformation(info); - new_pre_header->SetLoopInformation(info); - for (HLoopInformationOutwardIterator loop_it(*pre_header); + // Fix loop information. + HLoopInformation* loop_info = old_pre_header->GetLoopInformation(); + if (loop_info != nullptr) { + if_block->SetLoopInformation(loop_info); + true_block->SetLoopInformation(loop_info); + false_block->SetLoopInformation(loop_info); + new_pre_header->SetLoopInformation(loop_info); + // Add blocks to all enveloping loops. + for (HLoopInformationOutwardIterator loop_it(*old_pre_header); !loop_it.Done(); loop_it.Advance()) { loop_it.Current()->Add(if_block); - loop_it.Current()->Add(dummy_block); - loop_it.Current()->Add(deopt_block); + loop_it.Current()->Add(true_block); + loop_it.Current()->Add(false_block); loop_it.Current()->Add(new_pre_header); } } + + // Fix try/catch information. + TryCatchInformation* try_catch_info = old_pre_header->IsTryBlock() + ? old_pre_header->GetTryCatchInformation() + : nullptr; + if_block->SetTryCatchInformation(try_catch_info); + true_block->SetTryCatchInformation(try_catch_info); + false_block->SetTryCatchInformation(try_catch_info); + new_pre_header->SetTryCatchInformation(try_catch_info); } void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) { @@ -2068,6 +2079,19 @@ void HInvokeStaticOrDirect::RemoveInputAt(size_t index) { } } +std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs) { + switch (rhs) { + case HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit: + return os << "explicit"; + case HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit: + return os << "implicit"; + case HInvokeStaticOrDirect::ClinitCheckRequirement::kNone: + return os << "none"; + default: + return os << "unknown:" << static_cast<int>(rhs); + } +} + void HInstruction::RemoveEnvironmentUsers() { for (HUseIterator<HEnvironment*> use_it(GetEnvUses()); !use_it.Done(); use_it.Advance()) { HUseListNode<HEnvironment*>* user_node = use_it.Current(); @@ -2077,4 +2101,46 @@ void HInstruction::RemoveEnvironmentUsers() { env_uses_.Clear(); } +// Returns an instruction with the opposite boolean value from 'cond'. +HInstruction* HGraph::InsertOppositeCondition(HInstruction* cond, HInstruction* cursor) { + ArenaAllocator* allocator = GetArena(); + + if (cond->IsCondition() && + !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType())) { + // Can't reverse floating point conditions. We have to use HBooleanNot in that case. + HInstruction* lhs = cond->InputAt(0); + HInstruction* rhs = cond->InputAt(1); + HInstruction* replacement = nullptr; + switch (cond->AsCondition()->GetOppositeCondition()) { // get *opposite* + case kCondEQ: replacement = new (allocator) HEqual(lhs, rhs); break; + case kCondNE: replacement = new (allocator) HNotEqual(lhs, rhs); break; + case kCondLT: replacement = new (allocator) HLessThan(lhs, rhs); break; + case kCondLE: replacement = new (allocator) HLessThanOrEqual(lhs, rhs); break; + case kCondGT: replacement = new (allocator) HGreaterThan(lhs, rhs); break; + case kCondGE: replacement = new (allocator) HGreaterThanOrEqual(lhs, rhs); break; + case kCondB: replacement = new (allocator) HBelow(lhs, rhs); break; + case kCondBE: replacement = new (allocator) HBelowOrEqual(lhs, rhs); break; + case kCondA: replacement = new (allocator) HAbove(lhs, rhs); break; + case kCondAE: replacement = new (allocator) HAboveOrEqual(lhs, rhs); break; + default: + LOG(FATAL) << "Unexpected condition"; + UNREACHABLE(); + } + cursor->GetBlock()->InsertInstructionBefore(replacement, cursor); + return replacement; + } else if (cond->IsIntConstant()) { + HIntConstant* int_const = cond->AsIntConstant(); + if (int_const->IsZero()) { + return GetIntConstant(1); + } else { + DCHECK(int_const->IsOne()); + return GetIntConstant(0); + } + } else { + HInstruction* replacement = new (allocator) HBooleanNot(cond); + cursor->GetBlock()->InsertInstructionBefore(replacement, cursor); + return replacement; + } +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index e3c810e6b1..d5110a7172 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -371,6 +371,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } + // Returns an instruction with the opposite boolean value from 'cond'. + // The instruction has been inserted into the graph, either as a constant, or + // before cursor. + HInstruction* InsertOppositeCondition(HInstruction* cond, HInstruction* cursor); + private: void FindBackEdges(ArenaBitVector* visited); void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; @@ -1096,7 +1101,9 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) #else #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ - M(Arm64IntermediateAddress, Instruction) + M(Arm64DataProcWithShifterOp, Instruction) \ + M(Arm64IntermediateAddress, Instruction) \ + M(Arm64MultiplyAccumulate, Instruction) #endif #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) @@ -1626,6 +1633,11 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { return holder_; } + + bool IsFromInlinedInvoke() const { + return GetParent() != nullptr; + } + private: // Record instructions' use entries of this environment for constant-time removal. // It should only be called by HInstruction when a new environment use is added. @@ -3238,7 +3250,7 @@ class HInvoke : public HInstruction { void SetIntrinsic(Intrinsics intrinsic, IntrinsicNeedsEnvironmentOrCache needs_env_or_cache); bool IsFromInlinedInvoke() const { - return GetEnvironment()->GetParent() != nullptr; + return GetEnvironment()->IsFromInlinedInvoke(); } bool CanThrow() const OVERRIDE { return true; } @@ -3434,14 +3446,19 @@ class HInvokeStaticOrDirect : public HInvoke { DCHECK(had_current_method_input || !needs_current_method_input); if (had_current_method_input && !needs_current_method_input) { - DCHECK_EQ(InputAt(GetCurrentMethodInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod()); - RemoveInputAt(GetCurrentMethodInputIndex()); + DCHECK_EQ(InputAt(GetSpecialInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod()); + RemoveInputAt(GetSpecialInputIndex()); } dispatch_info_ = dispatch_info; } - void InsertInputAt(size_t index, HInstruction* input); - void RemoveInputAt(size_t index); + void AddSpecialInput(HInstruction* input) { + // We allow only one special input. + DCHECK(!IsStringInit() && !HasCurrentMethodInput()); + DCHECK(InputCount() == GetSpecialInputIndex() || + (InputCount() == GetSpecialInputIndex() + 1 && IsStaticWithExplicitClinitCheck())); + InsertInputAt(GetSpecialInputIndex(), input); + } bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE { // We access the method via the dex cache so we can't do an implicit null check. @@ -3453,13 +3470,20 @@ class HInvokeStaticOrDirect : public HInvoke { return return_type_ == Primitive::kPrimNot && !IsStringInit(); } + // Get the index of the special input, if any. + // + // If the invoke IsStringInit(), it initially has a HFakeString special argument + // which is removed by the instruction simplifier; if the invoke HasCurrentMethodInput(), + // the "special input" is the current method pointer; otherwise there may be one + // platform-specific special input, such as PC-relative addressing base. + uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); } + InvokeType GetInvokeType() const { return invoke_type_; } MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; } CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; } bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; } bool NeedsDexCacheOfDeclaringClass() const OVERRIDE; bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; } - uint32_t GetCurrentMethodInputIndex() const { return GetNumberOfArguments(); } bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; } bool HasPcRelativeDexCache() const { return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; @@ -3467,11 +3491,11 @@ class HInvokeStaticOrDirect : public HInvoke { bool HasCurrentMethodInput() const { // This function can be called only after the invoke has been fully initialized by the builder. if (NeedsCurrentMethodInput(GetMethodLoadKind())) { - DCHECK(InputAt(GetCurrentMethodInputIndex())->IsCurrentMethod()); + DCHECK(InputAt(GetSpecialInputIndex())->IsCurrentMethod()); return true; } else { - DCHECK(InputCount() == GetCurrentMethodInputIndex() || - !InputAt(GetCurrentMethodInputIndex())->IsCurrentMethod()); + DCHECK(InputCount() == GetSpecialInputIndex() || + !InputAt(GetSpecialInputIndex())->IsCurrentMethod()); return false; } } @@ -3505,20 +3529,19 @@ class HInvokeStaticOrDirect : public HInvoke { return GetInvokeType() == kStatic; } - // Remove the art::HLoadClass instruction set as last input by - // art::PrepareForRegisterAllocation::VisitClinitCheck in lieu of - // the initial art::HClinitCheck instruction (only relevant for - // static calls with explicit clinit check). - void RemoveLoadClassAsLastInput() { + // Remove the HClinitCheck or the replacement HLoadClass (set as last input by + // PrepareForRegisterAllocation::VisitClinitCheck() in lieu of the initial HClinitCheck) + // instruction; only relevant for static calls with explicit clinit check. + void RemoveExplicitClinitCheck(ClinitCheckRequirement new_requirement) { DCHECK(IsStaticWithExplicitClinitCheck()); size_t last_input_index = InputCount() - 1; HInstruction* last_input = InputAt(last_input_index); DCHECK(last_input != nullptr); - DCHECK(last_input->IsLoadClass()) << last_input->DebugName(); + DCHECK(last_input->IsLoadClass() || last_input->IsClinitCheck()) << last_input->DebugName(); RemoveAsUserOfInput(last_input_index); inputs_.pop_back(); - clinit_check_requirement_ = ClinitCheckRequirement::kImplicit; - DCHECK(IsStaticWithImplicitClinitCheck()); + clinit_check_requirement_ = new_requirement; + DCHECK(!IsStaticWithExplicitClinitCheck()); } bool IsStringFactoryFor(HFakeString* str) const { @@ -3539,7 +3562,7 @@ class HInvokeStaticOrDirect : public HInvoke { } // Is this a call to a static method whose declaring class has an - // explicit intialization check in the graph? + // explicit initialization check in the graph? bool IsStaticWithExplicitClinitCheck() const { return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kExplicit); } @@ -3572,6 +3595,9 @@ class HInvokeStaticOrDirect : public HInvoke { return input_record; } + void InsertInputAt(size_t index, HInstruction* input); + void RemoveInputAt(size_t index); + private: const InvokeType invoke_type_; ClinitCheckRequirement clinit_check_requirement_; @@ -3583,6 +3609,7 @@ class HInvokeStaticOrDirect : public HInvoke { DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect); }; +std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs); class HInvokeVirtual : public HInvoke { public: @@ -3637,9 +3664,10 @@ class HInvokeInterface : public HInvoke { DISALLOW_COPY_AND_ASSIGN(HInvokeInterface); }; -class HNewInstance : public HExpression<1> { +class HNewInstance : public HExpression<2> { public: - HNewInstance(HCurrentMethod* current_method, + HNewInstance(HInstruction* cls, + HCurrentMethod* current_method, uint32_t dex_pc, uint16_t type_index, const DexFile& dex_file, @@ -3652,7 +3680,8 @@ class HNewInstance : public HExpression<1> { can_throw_(can_throw), finalizable_(finalizable), entrypoint_(entrypoint) { - SetRawInputAt(0, current_method); + SetRawInputAt(0, cls); + SetRawInputAt(1, current_method); } uint16_t GetTypeIndex() const { return type_index_; } @@ -3672,6 +3701,10 @@ class HNewInstance : public HExpression<1> { QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; } + void SetEntrypoint(QuickEntrypointEnum entrypoint) { + entrypoint_ = entrypoint; + } + DECLARE_INSTRUCTION(NewInstance); private: @@ -3679,7 +3712,7 @@ class HNewInstance : public HExpression<1> { const DexFile& dex_file_; const bool can_throw_; const bool finalizable_; - const QuickEntrypointEnum entrypoint_; + QuickEntrypointEnum entrypoint_; DISALLOW_COPY_AND_ASSIGN(HNewInstance); }; @@ -4287,9 +4320,13 @@ class HPhi : public HInstruction { : HInstruction(SideEffects::None(), dex_pc), inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)), reg_number_(reg_number), - type_(type), - is_live_(false), + type_(ToPhiType(type)), + // Phis are constructed live and marked dead if conflicting or unused. + // Individual steps of SsaBuilder should assume that if a phi has been + // marked dead, it can be ignored and will be removed by SsaPhiElimination. + is_live_(true), can_be_null_(true) { + DCHECK_NE(type_, Primitive::kPrimVoid); } // Returns a type equivalent to the given `type`, but that a `HPhi` can hold. @@ -4760,13 +4797,15 @@ class HLoadClass : public HExpression<1> { const DexFile& dex_file, bool is_referrers_class, uint32_t dex_pc, - bool needs_access_check) + bool needs_access_check, + bool is_in_dex_cache) : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc), type_index_(type_index), dex_file_(dex_file), is_referrers_class_(is_referrers_class), generate_clinit_check_(false), needs_access_check_(needs_access_check), + is_in_dex_cache_(is_in_dex_cache), loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) { // Referrers class should not need access check. We never inline unverified // methods so we can't possibly end up in this situation. @@ -4791,14 +4830,13 @@ class HLoadClass : public HExpression<1> { bool CanBeNull() const OVERRIDE { return false; } bool NeedsEnvironment() const OVERRIDE { - // Will call runtime and load the class if the class is not loaded yet. - // TODO: finer grain decision. - return !is_referrers_class_; + return CanCallRuntime(); } bool MustGenerateClinitCheck() const { return generate_clinit_check_; } + void SetMustGenerateClinitCheck(bool generate_clinit_check) { // The entrypoint the code generator is going to call does not do // clinit of the class. @@ -4807,7 +4845,9 @@ class HLoadClass : public HExpression<1> { } bool CanCallRuntime() const { - return MustGenerateClinitCheck() || !is_referrers_class_ || needs_access_check_; + return MustGenerateClinitCheck() || + (!is_referrers_class_ && !is_in_dex_cache_) || + needs_access_check_; } bool NeedsAccessCheck() const { @@ -4815,8 +4855,6 @@ class HLoadClass : public HExpression<1> { } bool CanThrow() const OVERRIDE { - // May call runtime and and therefore can throw. - // TODO: finer grain decision. return CanCallRuntime(); } @@ -4838,6 +4876,8 @@ class HLoadClass : public HExpression<1> { return SideEffects::CanTriggerGC(); } + bool IsInDexCache() const { return is_in_dex_cache_; } + DECLARE_INSTRUCTION(LoadClass); private: @@ -4847,7 +4887,8 @@ class HLoadClass : public HExpression<1> { // Whether this instruction must generate the initialization check. // Used for code generation. bool generate_clinit_check_; - bool needs_access_check_; + const bool needs_access_check_; + const bool is_in_dex_cache_; ReferenceTypeInfo loaded_class_rti_; @@ -4912,6 +4953,7 @@ class HClinitCheck : public HExpression<1> { return true; } + bool CanThrow() const OVERRIDE { return true; } HLoadClass* GetLoadClass() const { return InputAt(0)->AsLoadClass(); } diff --git a/compiler/optimizing/nodes_arm64.cc b/compiler/optimizing/nodes_arm64.cc new file mode 100644 index 0000000000..ac2f093847 --- /dev/null +++ b/compiler/optimizing/nodes_arm64.cc @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common_arm64.h" +#include "nodes.h" + +namespace art { + +using arm64::helpers::CanFitInShifterOperand; + +void HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruction, + /*out*/OpKind* op_kind, + /*out*/int* shift_amount) { + DCHECK(CanFitInShifterOperand(instruction)); + if (instruction->IsShl()) { + *op_kind = kLSL; + *shift_amount = instruction->AsShl()->GetRight()->AsIntConstant()->GetValue(); + } else if (instruction->IsShr()) { + *op_kind = kASR; + *shift_amount = instruction->AsShr()->GetRight()->AsIntConstant()->GetValue(); + } else if (instruction->IsUShr()) { + *op_kind = kLSR; + *shift_amount = instruction->AsUShr()->GetRight()->AsIntConstant()->GetValue(); + } else { + DCHECK(instruction->IsTypeConversion()); + Primitive::Type result_type = instruction->AsTypeConversion()->GetResultType(); + Primitive::Type input_type = instruction->AsTypeConversion()->GetInputType(); + int result_size = Primitive::ComponentSize(result_type); + int input_size = Primitive::ComponentSize(input_type); + int min_size = std::min(result_size, input_size); + // This follows the logic in + // `InstructionCodeGeneratorARM64::VisitTypeConversion()`. + if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { + // There is actually nothing to do. The register will be used as a W + // register, discarding the top bits. This is represented by the default + // encoding 'LSL 0'. + *op_kind = kLSL; + *shift_amount = 0; + } else if (result_type == Primitive::kPrimChar || + (input_type == Primitive::kPrimChar && input_size < result_size)) { + *op_kind = kUXTH; + } else { + switch (min_size) { + case 1: *op_kind = kSXTB; break; + case 2: *op_kind = kSXTH; break; + case 4: *op_kind = kSXTW; break; + default: + LOG(FATAL) << "Unexpected min size " << min_size; + } + } + } +} + +std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op) { + switch (op) { + case HArm64DataProcWithShifterOp::kLSL: return os << "LSL"; + case HArm64DataProcWithShifterOp::kLSR: return os << "LSR"; + case HArm64DataProcWithShifterOp::kASR: return os << "ASR"; + case HArm64DataProcWithShifterOp::kUXTB: return os << "UXTB"; + case HArm64DataProcWithShifterOp::kUXTH: return os << "UXTH"; + case HArm64DataProcWithShifterOp::kUXTW: return os << "UXTW"; + case HArm64DataProcWithShifterOp::kSXTB: return os << "SXTB"; + case HArm64DataProcWithShifterOp::kSXTH: return os << "SXTH"; + case HArm64DataProcWithShifterOp::kSXTW: return os << "SXTW"; + default: + LOG(FATAL) << "Invalid OpKind " << static_cast<int>(op); + UNREACHABLE(); + } +} + +} // namespace art diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h index 885d3a29ee..e8439354af 100644 --- a/compiler/optimizing/nodes_arm64.h +++ b/compiler/optimizing/nodes_arm64.h @@ -19,6 +19,79 @@ namespace art { +class HArm64DataProcWithShifterOp : public HExpression<2> { + public: + enum OpKind { + kLSL, // Logical shift left. + kLSR, // Logical shift right. + kASR, // Arithmetic shift right. + kUXTB, // Unsigned extend byte. + kUXTH, // Unsigned extend half-word. + kUXTW, // Unsigned extend word. + kSXTB, // Signed extend byte. + kSXTH, // Signed extend half-word. + kSXTW, // Signed extend word. + + // Aliases. + kFirstShiftOp = kLSL, + kLastShiftOp = kASR, + kFirstExtensionOp = kUXTB, + kLastExtensionOp = kSXTW + }; + HArm64DataProcWithShifterOp(HInstruction* instr, + HInstruction* left, + HInstruction* right, + OpKind op, + // The shift argument is unused if the operation + // is an extension. + int shift = 0, + uint32_t dex_pc = kNoDexPc) + : HExpression(instr->GetType(), SideEffects::None(), dex_pc), + instr_kind_(instr->GetKind()), op_kind_(op), shift_amount_(shift) { + DCHECK(!instr->HasSideEffects()); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other_instr) const OVERRIDE { + HArm64DataProcWithShifterOp* other = other_instr->AsArm64DataProcWithShifterOp(); + return instr_kind_ == other->instr_kind_ && + op_kind_ == other->op_kind_ && + shift_amount_ == other->shift_amount_; + } + + static bool IsShiftOp(OpKind op_kind) { + return kFirstShiftOp <= op_kind && op_kind <= kLastShiftOp; + } + + static bool IsExtensionOp(OpKind op_kind) { + return kFirstExtensionOp <= op_kind && op_kind <= kLastExtensionOp; + } + + // Find the operation kind and shift amount from a bitfield move instruction. + static void GetOpInfoFromInstruction(HInstruction* bitfield_op, + /*out*/OpKind* op_kind, + /*out*/int* shift_amount); + + InstructionKind GetInstrKind() const { return instr_kind_; } + OpKind GetOpKind() const { return op_kind_; } + int GetShiftAmount() const { return shift_amount_; } + + DECLARE_INSTRUCTION(Arm64DataProcWithShifterOp); + + private: + InstructionKind instr_kind_; + OpKind op_kind_; + int shift_amount_; + + friend std::ostream& operator<<(std::ostream& os, OpKind op); + + DISALLOW_COPY_AND_ASSIGN(HArm64DataProcWithShifterOp); +}; + +std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op); + // This instruction computes an intermediate address pointing in the 'middle' of an object. The // result pointer cannot be handled by GC, so extra care is taken to make sure that this value is // never used across anything that can trigger GC. @@ -42,6 +115,40 @@ class HArm64IntermediateAddress : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress); }; +class HArm64MultiplyAccumulate : public HExpression<3> { + public: + HArm64MultiplyAccumulate(Primitive::Type type, + InstructionKind op, + HInstruction* accumulator, + HInstruction* mul_left, + HInstruction* mul_right, + uint32_t dex_pc = kNoDexPc) + : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) { + SetRawInputAt(kInputAccumulatorIndex, accumulator); + SetRawInputAt(kInputMulLeftIndex, mul_left); + SetRawInputAt(kInputMulRightIndex, mul_right); + } + + static constexpr int kInputAccumulatorIndex = 0; + static constexpr int kInputMulLeftIndex = 1; + static constexpr int kInputMulRightIndex = 2; + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + return op_kind_ == other->AsArm64MultiplyAccumulate()->op_kind_; + } + + InstructionKind GetOpKind() const { return op_kind_; } + + DECLARE_INSTRUCTION(Arm64MultiplyAccumulate); + + private: + // Indicates if this is a MADD or MSUB. + InstructionKind op_kind_; + + DISALLOW_COPY_AND_ASSIGN(HArm64MultiplyAccumulate); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_ diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index 34f1fe5949..2b0d522b31 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -25,6 +25,7 @@ #include "utils/assembler.h" #include "utils/arm/assembler_thumb2.h" #include "utils/mips/assembler_mips.h" +#include "utils/mips64/assembler_mips64.h" #include "optimizing/optimizing_cfi_test_expected.inc" @@ -212,6 +213,34 @@ TEST_F(OptimizingCFITest, kMipsAdjust) { Check(kMips, "kMips_adjust", expected_asm, expected_cfi); } +TEST_F(OptimizingCFITest, kMips64Adjust) { + // One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum. + static constexpr size_t kNumNops = 1u + (1u << 15); + std::vector<uint8_t> expected_asm( + expected_asm_kMips64_adjust_head, + expected_asm_kMips64_adjust_head + arraysize(expected_asm_kMips64_adjust_head)); + expected_asm.resize(expected_asm.size() + kNumNops * 4u, 0u); + expected_asm.insert( + expected_asm.end(), + expected_asm_kMips64_adjust_tail, + expected_asm_kMips64_adjust_tail + arraysize(expected_asm_kMips64_adjust_tail)); + std::vector<uint8_t> expected_cfi( + expected_cfi_kMips64_adjust, + expected_cfi_kMips64_adjust + arraysize(expected_cfi_kMips64_adjust)); + SetUpFrame(kMips64); +#define __ down_cast<mips64::Mips64Assembler*>(GetCodeGenerator()->GetAssembler())-> + mips64::Mips64Label target; + __ Beqc(mips64::A1, mips64::A2, &target); + // Push the target out of range of BEQC. + for (size_t i = 0; i != kNumNops; ++i) { + __ Nop(); + } + __ Bind(&target); +#undef __ + Finish(); + Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi); +} + #endif // __ANDROID__ } // namespace art diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index 4571ebf2d4..de857295c7 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -413,3 +413,57 @@ static constexpr uint8_t expected_cfi_kMips_adjust[] = { // 0x0002007c: nop // 0x00020080: .cfi_restore_state // 0x00020080: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips64_adjust_head[] = { + 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF, + 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7, + 0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x02, 0x00, 0xA6, 0x60, + 0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8, +}; +static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = { + 0x18, 0x00, 0xBD, 0x67, 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, + 0x10, 0x00, 0xB0, 0xDF, 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, + 0x28, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips64_adjust[] = { + 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06, + 0x4C, 0x0E, 0x40, 0x04, 0x14, 0x00, 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28, + 0x4C, 0xD0, 0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, + 0x40, +}; +// 0x00000000: daddiu r29, r29, -40 +// 0x00000004: .cfi_def_cfa_offset: 40 +// 0x00000004: sd r31, +32(r29) +// 0x00000008: .cfi_offset: r31 at cfa-8 +// 0x00000008: sd r17, +24(r29) +// 0x0000000c: .cfi_offset: r17 at cfa-16 +// 0x0000000c: sd r16, +16(r29) +// 0x00000010: .cfi_offset: r16 at cfa-24 +// 0x00000010: sdc1 f25, +8(r29) +// 0x00000014: sdc1 f24, +0(r29) +// 0x00000018: daddiu r29, r29, -24 +// 0x0000001c: .cfi_def_cfa_offset: 64 +// 0x0000001c: sd r4, +0(r29) +// 0x00000020: bnec r5, r6, 0x0000002c ; +12 +// 0x00000024: auipc r1, 2 +// 0x00000028: jic r1, 12 ; b 0x00020030 ; +131080 +// 0x0000002c: nop +// ... +// 0x0002002c: nop +// 0x00020030: .cfi_remember_state +// 0x00020030: daddiu r29, r29, 24 +// 0x00020034: .cfi_def_cfa_offset: 40 +// 0x00020034: ldc1 f24, +0(r29) +// 0x00020038: ldc1 f25, +8(r29) +// 0x0002003c: ld r16, +16(r29) +// 0x00020040: .cfi_restore: r16 +// 0x00020040: ld r17, +24(r29) +// 0x00020044: .cfi_restore: r17 +// 0x00020044: ld r31, +32(r29) +// 0x00020048: .cfi_restore: r31 +// 0x00020048: daddiu r29, r29, 40 +// 0x0002004c: .cfi_def_cfa_offset: 0 +// 0x0002004c: jr r31 +// 0x00020050: nop +// 0x00020054: .cfi_restore_state +// 0x00020054: .cfi_def_cfa_offset: 64 diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 27ee47296c..8440813a87 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -110,24 +110,23 @@ class PassScope; class PassObserver : public ValueObject { public: PassObserver(HGraph* graph, - const char* method_name, CodeGenerator* codegen, std::ostream* visualizer_output, CompilerDriver* compiler_driver) : graph_(graph), - method_name_(method_name), + cached_method_name_(), timing_logger_enabled_(compiler_driver->GetDumpPasses()), - timing_logger_(method_name, true, true), + timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true), disasm_info_(graph->GetArena()), visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()), visualizer_(visualizer_output, graph, *codegen), graph_in_bad_state_(false) { if (timing_logger_enabled_ || visualizer_enabled_) { - if (!IsVerboseMethod(compiler_driver, method_name)) { + if (!IsVerboseMethod(compiler_driver, GetMethodName())) { timing_logger_enabled_ = visualizer_enabled_ = false; } if (visualizer_enabled_) { - visualizer_.PrintHeader(method_name_); + visualizer_.PrintHeader(GetMethodName()); codegen->SetDisassemblyInformation(&disasm_info_); } } @@ -135,7 +134,7 @@ class PassObserver : public ValueObject { ~PassObserver() { if (timing_logger_enabled_) { - LOG(INFO) << "TIMINGS " << method_name_; + LOG(INFO) << "TIMINGS " << GetMethodName(); LOG(INFO) << Dumpable<TimingLogger>(timing_logger_); } } @@ -148,6 +147,14 @@ class PassObserver : public ValueObject { void SetGraphInBadState() { graph_in_bad_state_ = true; } + const char* GetMethodName() { + // PrettyMethod() is expensive, so we delay calling it until we actually have to. + if (cached_method_name_.empty()) { + cached_method_name_ = PrettyMethod(graph_->GetMethodIdx(), graph_->GetDexFile()); + } + return cached_method_name_.c_str(); + } + private: void StartPass(const char* pass_name) { // Dump graph first, then start timer. @@ -206,7 +213,8 @@ class PassObserver : public ValueObject { } HGraph* const graph_; - const char* method_name_; + + std::string cached_method_name_; bool timing_logger_enabled_; TimingLogger timing_logger_; @@ -383,10 +391,12 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) { || instruction_set == kX86_64; } -// Read barrier are supported only on x86 and x86-64 at the moment. +// Read barrier are supported on ARM, ARM64, x86 and x86-64 at the moment. // TODO: Add support for other architectures and remove this function static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) { - return instruction_set == kX86 + return instruction_set == kArm64 + || instruction_set == kThumb2 + || instruction_set == kX86 || instruction_set == kX86_64; } @@ -663,13 +673,12 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, jobject class_loader, const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const { - std::string method_name = PrettyMethod(method_idx, dex_file); MaybeRecordStat(MethodCompilationStat::kAttemptCompilation); CompilerDriver* compiler_driver = GetCompilerDriver(); InstructionSet instruction_set = compiler_driver->GetInstructionSet(); - // Always use the thumb2 assembler: some runtime functionality (like implicit stack - // overflow checks) assume thumb2. + // Always use the Thumb-2 assembler: some runtime functionality + // (like implicit stack overflow checks) assume Thumb-2. if (instruction_set == kArm) { instruction_set = kThumb2; } @@ -727,7 +736,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()); PassObserver pass_observer(graph, - method_name.c_str(), codegen.get(), visualizer_output_.get(), compiler_driver); @@ -755,7 +763,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, interpreter_metadata, dex_cache); - VLOG(compiler) << "Building " << method_name; + VLOG(compiler) << "Building " << pass_observer.GetMethodName(); { PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer); @@ -765,13 +773,14 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, } } - VLOG(compiler) << "Optimizing " << method_name; + VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName(); if (run_optimizations_) { { PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer); if (!graph->TryBuildingSsa()) { // We could not transform the graph to SSA, bailout. - LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop"; + LOG(INFO) << "Skipping compilation of " << pass_observer.GetMethodName() + << ": it contains a non natural loop"; MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA); pass_observer.SetGraphInBadState(); return nullptr; diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index c2894c7338..808a1dc6c2 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -113,9 +113,8 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) { InitializePCRelativeBasePointer(invoke); // Add the extra parameter base_. - uint32_t index = invoke_static_or_direct->GetCurrentMethodInputIndex(); DCHECK(!invoke_static_or_direct->HasCurrentMethodInput()); - invoke_static_or_direct->InsertInputAt(index, base_); + invoke_static_or_direct->AddSpecialInput(base_); } // Ensure that we can load FP arguments from the constant area. for (size_t i = 0, e = invoke->InputCount(); i < e; i++) { diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index ca928ae0f2..d1770b75ab 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -48,16 +48,85 @@ void PrepareForRegisterAllocation::VisitBoundType(HBoundType* bound_type) { } void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { - HLoadClass* cls = check->GetLoadClass(); - check->ReplaceWith(cls); - if (check->GetPrevious() == cls) { + // Try to find a static invoke or a new-instance from which this check originated. + HInstruction* implicit_clinit = nullptr; + for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); it.Advance()) { + HInstruction* user = it.Current()->GetUser(); + if ((user->IsInvokeStaticOrDirect() || user->IsNewInstance()) && + CanMoveClinitCheck(check, user)) { + implicit_clinit = user; + if (user->IsInvokeStaticOrDirect()) { + DCHECK(user->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()); + user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck( + HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit); + } else { + DCHECK(user->IsNewInstance()); + // We delegate the initialization duty to the allocation. + if (user->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectInitialized) { + user->AsNewInstance()->SetEntrypoint(kQuickAllocObjectResolved); + } + } + break; + } + } + // If we found a static invoke or new-instance for merging, remove the check + // from dominated static invokes. + if (implicit_clinit != nullptr) { + for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); ) { + HInstruction* user = it.Current()->GetUser(); + // All other uses must be dominated. + DCHECK(implicit_clinit->StrictlyDominates(user) || (implicit_clinit == user)); + it.Advance(); // Advance before we remove the node, reference to the next node is preserved. + if (user->IsInvokeStaticOrDirect()) { + user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck( + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + } + } + } + + HLoadClass* load_class = check->GetLoadClass(); + bool can_merge_with_load_class = CanMoveClinitCheck(load_class, check); + + check->ReplaceWith(load_class); + + if (implicit_clinit != nullptr) { + // Remove the check from the graph. It has been merged into the invoke or new-instance. + check->GetBlock()->RemoveInstruction(check); + // Check if we can merge the load class as well. + if (can_merge_with_load_class && !load_class->HasUses()) { + load_class->GetBlock()->RemoveInstruction(load_class); + } + } else if (can_merge_with_load_class) { // Pass the initialization duty to the `HLoadClass` instruction, // and remove the instruction from the graph. - cls->SetMustGenerateClinitCheck(true); + load_class->SetMustGenerateClinitCheck(true); check->GetBlock()->RemoveInstruction(check); } } +void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) { + HLoadClass* load_class = instruction->InputAt(0)->AsLoadClass(); + bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse(); + // Change the entrypoint to kQuickAllocObject if either: + // - the class is finalizable (only kQuickAllocObject handles finalizable classes), + // - the class needs access checks (we do not know if it's finalizable), + // - or the load class has only one use. + if (instruction->IsFinalizable() || has_only_one_use || load_class->NeedsAccessCheck()) { + instruction->SetEntrypoint(kQuickAllocObject); + instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex()), 0); + // The allocation entry point that deals with access checks does not work with inlined + // methods, so we need to check whether this allocation comes from an inlined method. + if (has_only_one_use && !instruction->GetEnvironment()->IsFromInlinedInvoke()) { + // We can remove the load class from the graph. If it needed access checks, we delegate + // the access check to the allocation. + if (load_class->NeedsAccessCheck()) { + instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck); + } + load_class->GetBlock()->RemoveInstruction(load_class); + } + } +} + void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) { bool needs_materialization = false; if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) { @@ -86,30 +155,60 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire DCHECK(last_input != nullptr) << "Last input is not HLoadClass. It is " << last_input->DebugName(); - // Remove a load class instruction as last input of a static - // invoke, which has been added (along with a clinit check, - // removed by PrepareForRegisterAllocation::VisitClinitCheck - // previously) by the graph builder during the creation of the - // static invoke instruction, but is no longer required at this - // stage (i.e., after inlining has been performed). - invoke->RemoveLoadClassAsLastInput(); - - // The static call will initialize the class so there's no need for a clinit check if - // it's the first user. - // There is one special case where we still need the clinit check, when inlining. Because - // currently the callee is responsible for reporting parameters to the GC, the code - // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC. - // Therefore we cannot allocate any object in that code, including loading a new class. - if (last_input == invoke->GetPrevious() && !invoke->IsFromInlinedInvoke()) { - last_input->SetMustGenerateClinitCheck(false); - - // If the load class instruction is no longer used, remove it from - // the graph. - if (!last_input->HasUses()) { - last_input->GetBlock()->RemoveInstruction(last_input); - } + // Detach the explicit class initialization check from the invoke. + // Keeping track of the initializing instruction is no longer required + // at this stage (i.e., after inlining has been performed). + invoke->RemoveExplicitClinitCheck(HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + + // Merging with load class should have happened in VisitClinitCheck(). + DCHECK(!CanMoveClinitCheck(last_input, invoke)); + } +} + +bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input, HInstruction* user) { + // Determine if input and user come from the same dex instruction, so that we can move + // the clinit check responsibility from one to the other, i.e. from HClinitCheck (user) + // to HLoadClass (input), or from HClinitCheck (input) to HInvokeStaticOrDirect (user). + + // Start with a quick dex pc check. + if (user->GetDexPc() != input->GetDexPc()) { + return false; + } + + // Now do a thorough environment check that this is really coming from the same instruction in + // the same inlined graph. Unfortunately, we have to go through the whole environment chain. + HEnvironment* user_environment = user->GetEnvironment(); + HEnvironment* input_environment = input->GetEnvironment(); + while (user_environment != nullptr || input_environment != nullptr) { + if (user_environment == nullptr || input_environment == nullptr) { + // Different environment chain length. This happens when a method is called + // once directly and once indirectly through another inlined method. + return false; + } + if (user_environment->GetDexPc() != input_environment->GetDexPc() || + user_environment->GetMethodIdx() != input_environment->GetMethodIdx() || + !IsSameDexFile(user_environment->GetDexFile(), input_environment->GetDexFile())) { + return false; + } + user_environment = user_environment->GetParent(); + input_environment = input_environment->GetParent(); + } + + // Check for code motion taking the input to a different block. + if (user->GetBlock() != input->GetBlock()) { + return false; + } + + // In debug mode, check that we have not inserted a throwing instruction + // or an instruction with side effects between input and user. + if (kIsDebugBuild) { + for (HInstruction* between = input->GetNext(); between != user; between = between->GetNext()) { + CHECK(between != nullptr); // User must be after input in the same block. + CHECK(!between->CanThrow()); + CHECK(!between->HasSideEffects()); } } + return true; } } // namespace art diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index d7f277fa0d..9b2434250d 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -40,6 +40,9 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { void VisitClinitCheck(HClinitCheck* check) OVERRIDE; void VisitCondition(HCondition* condition) OVERRIDE; void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; + void VisitNewInstance(HNewInstance* instruction) OVERRIDE; + + bool CanMoveClinitCheck(HInstruction* input, HInstruction* user); DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation); }; diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc index c98f43e461..bde54ee977 100644 --- a/compiler/optimizing/primitive_type_propagation.cc +++ b/compiler/optimizing/primitive_type_propagation.cc @@ -63,7 +63,6 @@ bool PrimitiveTypePropagation::UpdateType(HPhi* phi) { : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type); phi->ReplaceInput(equivalent, i); if (equivalent->IsPhi()) { - equivalent->AsPhi()->SetLive(); AddToWorklist(equivalent->AsPhi()); } else if (equivalent == input) { // The input has changed its type. It can be an input of other phis, diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 5190eb3b26..9e6cfbe653 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -22,6 +22,13 @@ namespace art { +// Returns whether this is a loop header phi which was eagerly created but later +// found inconsistent due to the vreg being undefined in one of its predecessors. +// Such phi is marked dead and should be ignored until its removal in SsaPhiElimination. +static bool IsUndefinedLoopHeaderPhi(HPhi* phi) { + return phi->IsLoopHeaderPhi() && phi->InputCount() != phi->GetBlock()->GetPredecessors().size(); +} + /** * A debuggable application may require to reviving phis, to ensure their * associated DEX register is available to a debugger. This class implements @@ -165,17 +172,15 @@ bool DeadPhiHandling::UpdateType(HPhi* phi) { void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) { for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); + if (IsUndefinedLoopHeaderPhi(phi)) { + DCHECK(phi->IsDead()); + continue; + } if (phi->IsDead() && phi->HasEnvironmentUses()) { phi->SetLive(); if (block->IsLoopHeader()) { - // Give a type to the loop phi to guarantee convergence of the algorithm. - // Note that the dead phi may already have a type if it is an equivalent - // generated for a typed LoadLocal. In that case we do not change the - // type because it could lead to an unsupported PrimNot/Float/Double -> - // PrimInt/Long transition and create same type equivalents. - if (phi->GetType() == Primitive::kPrimVoid) { - phi->SetType(phi->InputAt(0)->GetType()); - } + // Loop phis must have a type to guarantee convergence of the algorithm. + DCHECK_NE(phi->GetType(), Primitive::kPrimVoid); AddToWorklist(phi); } else { // Because we are doing a reverse post order visit, all inputs of @@ -220,6 +225,27 @@ void DeadPhiHandling::Run() { ProcessWorklist(); } +void SsaBuilder::SetLoopHeaderPhiInputs() { + for (size_t i = loop_headers_.size(); i > 0; --i) { + HBasicBlock* block = loop_headers_[i - 1]; + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + size_t vreg = phi->GetRegNumber(); + for (HBasicBlock* predecessor : block->GetPredecessors()) { + HInstruction* value = ValueOfLocal(predecessor, vreg); + if (value == nullptr) { + // Vreg is undefined at this predecessor. Mark it dead and leave with + // fewer inputs than predecessors. SsaChecker will fail if not removed. + phi->SetDead(); + break; + } else { + phi->AddInput(value); + } + } + } + } +} + void SsaBuilder::FixNullConstantType() { // The order doesn't matter here. for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) { @@ -283,15 +309,7 @@ void SsaBuilder::BuildSsa() { } // 2) Set inputs of loop phis. - for (HBasicBlock* block : loop_headers_) { - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - HPhi* phi = it.Current()->AsPhi(); - for (HBasicBlock* predecessor : block->GetPredecessors()) { - HInstruction* input = ValueOfLocal(predecessor, phi->GetRegNumber()); - phi->AddInput(input); - } - } - } + SetLoopHeaderPhiInputs(); // 3) Mark dead phis. This will mark phis that are only used by environments: // at the DEX level, the type of these phis does not need to be consistent, but @@ -403,8 +421,13 @@ ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) { for (size_t i = 0; i < vregs; ++i) { // No point in creating the catch phi if it is already undefined at // the first throwing instruction. - if ((*current_locals_)[i] != nullptr) { - HPhi* phi = new (arena) HPhi(arena, i, 0, Primitive::kPrimVoid); + HInstruction* current_local_value = (*current_locals_)[i]; + if (current_local_value != nullptr) { + HPhi* phi = new (arena) HPhi( + arena, + i, + 0, + current_local_value->GetType()); block->AddPhi(phi); (*locals)[i] = phi; } @@ -451,7 +474,10 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { HInstruction* incoming = ValueOfLocal(block->GetLoopInformation()->GetPreHeader(), local); if (incoming != nullptr) { HPhi* phi = new (GetGraph()->GetArena()) HPhi( - GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid); + GetGraph()->GetArena(), + local, + 0, + incoming->GetType()); block->AddPhi(phi); (*current_locals_)[local] = phi; } @@ -484,8 +510,12 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { } if (is_different) { + HInstruction* first_input = ValueOfLocal(block->GetPredecessors()[0], local); HPhi* phi = new (GetGraph()->GetArena()) HPhi( - GetGraph()->GetArena(), local, block->GetPredecessors().size(), Primitive::kPrimVoid); + GetGraph()->GetArena(), + local, + block->GetPredecessors().size(), + first_input->GetType()); for (size_t i = 0; i < block->GetPredecessors().size(); i++) { HInstruction* pred_value = ValueOfLocal(block->GetPredecessors()[i], local); phi->SetRawInputAt(i, pred_value); @@ -583,8 +613,16 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive: phi->GetBlock()->InsertPhiAfter(new_phi, phi); return new_phi; } else { - DCHECK_EQ(next->GetType(), type); - return next->AsPhi(); + HPhi* next_phi = next->AsPhi(); + DCHECK_EQ(next_phi->GetType(), type); + if (next_phi->IsDead()) { + // TODO(dbrazdil): Remove this SetLive (we should not need to revive phis) + // once we stop running MarkDeadPhis before PrimitiveTypePropagation. This + // cannot revive undefined loop header phis because they cannot have uses. + DCHECK(!IsUndefinedLoopHeaderPhi(next_phi)); + next_phi->SetLive(); + } + return next_phi; } } @@ -638,7 +676,36 @@ void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { } void SsaBuilder::VisitStoreLocal(HStoreLocal* store) { - (*current_locals_)[store->GetLocal()->GetRegNumber()] = store->InputAt(1); + uint32_t reg_number = store->GetLocal()->GetRegNumber(); + HInstruction* stored_value = store->InputAt(1); + Primitive::Type stored_type = stored_value->GetType(); + DCHECK_NE(stored_type, Primitive::kPrimVoid); + + // Storing into vreg `reg_number` may implicitly invalidate the surrounding + // registers. Consider the following cases: + // (1) Storing a wide value must overwrite previous values in both `reg_number` + // and `reg_number+1`. We store `nullptr` in `reg_number+1`. + // (2) If vreg `reg_number-1` holds a wide value, writing into `reg_number` + // must invalidate it. We store `nullptr` in `reg_number-1`. + // Consequently, storing a wide value into the high vreg of another wide value + // will invalidate both `reg_number-1` and `reg_number+1`. + + if (reg_number != 0) { + HInstruction* local_low = (*current_locals_)[reg_number - 1]; + if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) { + // The vreg we are storing into was previously the high vreg of a pair. + // We need to invalidate its low vreg. + DCHECK((*current_locals_)[reg_number] == nullptr); + (*current_locals_)[reg_number - 1] = nullptr; + } + } + + (*current_locals_)[reg_number] = stored_value; + if (Primitive::Is64BitType(stored_type)) { + // We are storing a pair. Invalidate the instruction in the high vreg. + (*current_locals_)[reg_number + 1] = nullptr; + } + store->GetBlock()->RemoveInstruction(store); } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 79f1a28ac8..dcce5e4c2c 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -81,6 +81,7 @@ class SsaBuilder : public HGraphVisitor { static constexpr const char* kSsaBuilderPassName = "ssa_builder"; private: + void SetLoopHeaderPhiInputs(); void FixNullConstantType(); void EquivalentPhisCleanup(); diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index 72f9ddd506..a3219dcc38 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -16,6 +16,8 @@ #include "ssa_phi_elimination.h" +#include "base/arena_containers.h" + namespace art { void SsaDeadPhiElimination::Run() { @@ -24,22 +26,36 @@ void SsaDeadPhiElimination::Run() { } void SsaDeadPhiElimination::MarkDeadPhis() { + // Phis are constructed live and should not be revived if previously marked + // dead. This algorithm temporarily breaks that invariant but we DCHECK that + // only phis which were initially live are revived. + ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter()); + // Add to the worklist phis referenced by non-phi instructions. for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HPhi* phi = inst_it.Current()->AsPhi(); - // Set dead ahead of running through uses. The phi may have no use. - phi->SetDead(); + if (phi->IsDead()) { + continue; + } + + bool has_non_phi_use = false; for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) { - HUseListNode<HInstruction*>* current = use_it.Current(); - HInstruction* user = current->GetUser(); - if (!user->IsPhi()) { - worklist_.push_back(phi); - phi->SetLive(); + if (!use_it.Current()->GetUser()->IsPhi()) { + has_non_phi_use = true; break; } } + + if (has_non_phi_use) { + worklist_.push_back(phi); + } else { + phi->SetDead(); + if (kIsDebugBuild) { + initially_live.insert(phi); + } + } } } @@ -48,10 +64,13 @@ void SsaDeadPhiElimination::MarkDeadPhis() { HPhi* phi = worklist_.back(); worklist_.pop_back(); for (HInputIterator it(phi); !it.Done(); it.Advance()) { - HInstruction* input = it.Current(); - if (input->IsPhi() && input->AsPhi()->IsDead()) { - worklist_.push_back(input->AsPhi()); - input->AsPhi()->SetLive(); + HPhi* input = it.Current()->AsPhi(); + if (input != nullptr && input->IsDead()) { + // Input is a dead phi. Revive it and add to the worklist. We make sure + // that the phi was not dead initially (see definition of `initially_live`). + DCHECK(ContainsElement(initially_live, input)); + input->SetLive(); + worklist_.push_back(input); } } } @@ -118,7 +137,6 @@ void SsaRedundantPhiElimination::Run() { } if (phi->InputCount() == 0) { - DCHECK(phi->IsCatchPhi()); DCHECK(phi->IsDead()); continue; } |