diff options
Diffstat (limited to 'compiler/optimizing')
22 files changed, 915 insertions, 741 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h index 6dc53207ea..b9df686ffd 100644 --- a/compiler/optimizing/bounds_check_elimination.h +++ b/compiler/optimizing/bounds_check_elimination.h @@ -29,13 +29,13 @@ class BoundsCheckElimination : public HOptimization { BoundsCheckElimination(HGraph* graph, const SideEffectsAnalysis& side_effects, HInductionVarAnalysis* induction_analysis) - : HOptimization(graph, kBoundsCheckEliminationPassName), + : HOptimization(graph, kBoundsCheckEliminiationPassName), side_effects_(side_effects), induction_analysis_(induction_analysis) {} void Run() OVERRIDE; - static constexpr const char* kBoundsCheckEliminationPassName = "BCE"; + static constexpr const char* kBoundsCheckEliminiationPassName = "BCE"; private: const SideEffectsAnalysis& side_effects_; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index f265a0c7d3..272579219f 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -6449,33 +6449,6 @@ Literal* CodeGeneratorARM::DeduplicateMethodCodeLiteral(MethodReference target_m return DeduplicateMethodLiteral(target_method, &call_patches_); } -void LocationsBuilderARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); - locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex, - Location::RequiresRegister()); - locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); - locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void InstructionCodeGeneratorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { - LocationSummary* locations = instr->GetLocations(); - Register res = locations->Out().AsRegister<Register>(); - Register accumulator = locations->InAt(HMultiplyAccumulate::kInputAccumulatorIndex) - .AsRegister<Register>(); - Register mul_left = locations->InAt(HMultiplyAccumulate::kInputMulLeftIndex) - .AsRegister<Register>(); - Register mul_right = locations->InAt(HMultiplyAccumulate::kInputMulRightIndex) - .AsRegister<Register>(); - - if (instr->GetOpKind() == HInstruction::kAdd) { - __ mla(res, mul_left, mul_right, accumulator); - } else { - __ mls(res, mul_left, mul_right, accumulator); - } -} - void LocationsBuilderARM::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { // Nothing to do, this should be removed during prepare for register allocator. LOG(FATAL) << "Unreachable"; diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index df2126c653..d45ea973f9 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -159,7 +159,6 @@ class LocationsBuilderARM : public HGraphVisitor { FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) - FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION @@ -198,7 +197,6 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator { FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) - FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 3fdd7186d1..c0e3959933 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -584,6 +584,56 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { } } +// Slow path marking an object during a read barrier. +class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location out, Location obj) + : instruction_(instruction), out_(out), obj_(obj) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Primitive::Type type = Primitive::kPrimNot; + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast()) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), obj_, type); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>(); + arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + private: + HInstruction* const instruction_; + const Location out_; + const Location obj_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { public: @@ -605,7 +655,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { // to be instrumented, e.g.: // // __ Ldr(out, HeapOperand(out, class_offset); - // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); // // In that case, we have lost the information about the original // object, and the emitted read barrier cannot work properly. @@ -621,7 +671,9 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); DCHECK(!instruction_->IsInvoke() || (instruction_->IsInvokeStaticOrDirect() && - instruction_->GetLocations()->Intrinsified())); + instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier for heap reference slow path: " + << instruction_->DebugName(); // The read barrier instrumentation does not support the // HArm64IntermediateAddress instruction yet. DCHECK(!(instruction_->IsArrayGet() && @@ -769,14 +821,18 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { public: ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) - : instruction_(instruction), out_(out), root_(root) {} + : instruction_(instruction), out_(out), root_(root) { + DCHECK(kEmitCompilerReadBarrier); + } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Primitive::Type type = Primitive::kPrimNot; DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) + << "Unexpected instruction in read barrier for GC root slow path: " + << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -1338,7 +1394,8 @@ void CodeGeneratorARM64::Load(Primitive::Type type, void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, CPURegister dst, - const MemOperand& src) { + const MemOperand& src, + bool needs_null_check) { MacroAssembler* masm = GetVIXLAssembler(); BlockPoolsScope block_pools(masm); UseScratchRegisterScope temps(masm); @@ -1354,20 +1411,28 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, switch (type) { case Primitive::kPrimBoolean: __ Ldarb(Register(dst), base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } break; case Primitive::kPrimByte: __ Ldarb(Register(dst), base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); break; case Primitive::kPrimChar: __ Ldarh(Register(dst), base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } break; case Primitive::kPrimShort: __ Ldarh(Register(dst), base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); break; case Primitive::kPrimInt: @@ -1375,7 +1440,9 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, case Primitive::kPrimLong: DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type)); __ Ldar(Register(dst), base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } break; case Primitive::kPrimFloat: case Primitive::kPrimDouble: { @@ -1384,7 +1451,9 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); __ Ldar(temp, base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } __ Fmov(FPRegister(dst), temp); break; } @@ -1505,7 +1574,7 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod __ Bind(slow_path->GetExitLabel()); } -void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { +void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { BarrierType type = BarrierAll; switch (kind) { @@ -1641,33 +1710,62 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) { void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = instruction->GetLocations(); + Location base_loc = locations->InAt(0); + Location out = locations->Out(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); Primitive::Type field_type = field_info.GetFieldType(); BlockPoolsScope block_pools(GetVIXLAssembler()); MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset()); bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); - if (field_info.IsVolatile()) { - if (use_acquire_release) { - // NB: LoadAcquire will record the pc info if needed. - codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field); + if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Object FieldGet with Baker's read barrier case. + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + // /* HeapReference<Object> */ out = *(base + offset) + Register base = RegisterFrom(base_loc, Primitive::kPrimNot); + Register temp = temps.AcquireW(); + // Note that potential implicit null checks are handled in this + // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call. + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, + out, + base, + offset, + temp, + /* needs_null_check */ true, + field_info.IsVolatile() && use_acquire_release); + if (field_info.IsVolatile() && !use_acquire_release) { + // For IRIW sequential consistency kLoadAny is not sufficient. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + // General case. + if (field_info.IsVolatile()) { + if (use_acquire_release) { + // Note that a potential implicit null check is handled in this + // CodeGeneratorARM64::LoadAcquire call. + // NB: LoadAcquire will record the pc info if needed. + codegen_->LoadAcquire( + instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true); + } else { + codegen_->Load(field_type, OutputCPURegister(instruction), field); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // For IRIW sequential consistency kLoadAny is not sufficient. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } } else { codegen_->Load(field_type, OutputCPURegister(instruction), field); codegen_->MaybeRecordImplicitNullCheck(instruction); - // For IRIW sequential consistency kLoadAny is not sufficient. - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } - } else { - codegen_->Load(field_type, OutputCPURegister(instruction), field); - codegen_->MaybeRecordImplicitNullCheck(instruction); - } - - if (field_type == Primitive::kPrimNot) { - LocationSummary* locations = instruction->GetLocations(); - Location base = locations->InAt(0); - Location out = locations->Out(); - uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - codegen_->MaybeGenerateReadBarrier(instruction, out, out, base, offset); + if (field_type == Primitive::kPrimNot) { + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); + } } } @@ -1713,10 +1811,10 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); } else { - GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); codegen_->Store(field_type, source, HeapOperand(obj, offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } } else { codegen_->Store(field_type, source, HeapOperand(obj, offset)); @@ -1952,27 +2050,21 @@ void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress( Operand(InputOperandAt(instruction, 1))); } -void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { +void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); - HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex); - if (instr->GetOpKind() == HInstruction::kSub && - accumulator->IsConstant() && - accumulator->AsConstant()->IsZero()) { - // Don't allocate register for Mneg instruction. - } else { - locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex, - Location::RequiresRegister()); - } - locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); - locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); + locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex, + Location::RequiresRegister()); + locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); + locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } -void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { +void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { Register res = OutputRegister(instr); - Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex); - Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex); + Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex); + Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex); + Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex); // Avoid emitting code that could trigger Cortex A53's erratum 835769. // This fixup should be carried out for all multiply-accumulate instructions: @@ -1992,18 +2084,10 @@ void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* } if (instr->GetOpKind() == HInstruction::kAdd) { - Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex); __ Madd(res, mul_left, mul_right, accumulator); } else { DCHECK(instr->GetOpKind() == HInstruction::kSub); - HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex); - if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsZero()) { - __ Mneg(res, mul_left, mul_right); - } else { - Register accumulator = InputRegisterAt(instr, - HMultiplyAccumulate::kInputAccumulatorIndex); - __ Msub(res, mul_left, mul_right, accumulator); - } + __ Msub(res, mul_left, mul_right, accumulator); } } @@ -2035,50 +2119,62 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); Location index = locations->InAt(1); uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(); - MemOperand source = HeapOperand(obj); - CPURegister dest = OutputCPURegister(instruction); + Location out = locations->Out(); MacroAssembler* masm = GetVIXLAssembler(); UseScratchRegisterScope temps(masm); // Block pools between `Load` and `MaybeRecordImplicitNullCheck`. BlockPoolsScope block_pools(masm); - if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); - source = HeapOperand(obj, offset); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Object ArrayGet with Baker's read barrier case. + Register temp = temps.AcquireW(); + // The read barrier instrumentation does not support the + // HArm64IntermediateAddress instruction yet. + DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress()); + // Note that a potential implicit null check is handled in the + // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true); } else { - Register temp = temps.AcquireSameSizeAs(obj); - if (instruction->GetArray()->IsArm64IntermediateAddress()) { - // The read barrier instrumentation does not support the - // HArm64IntermediateAddress instruction yet. - DCHECK(!kEmitCompilerReadBarrier); - // We do not need to compute the intermediate address from the array: the - // input instruction has done it already. See the comment in - // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`. - if (kIsDebugBuild) { - HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress(); - DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset); - } - temp = obj; + // General case. + MemOperand source = HeapOperand(obj); + if (index.IsConstant()) { + offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); + source = HeapOperand(obj, offset); } else { - __ Add(temp, obj, offset); + Register temp = temps.AcquireSameSizeAs(obj); + if (instruction->GetArray()->IsArm64IntermediateAddress()) { + // The read barrier instrumentation does not support the + // HArm64IntermediateAddress instruction yet. + DCHECK(!kEmitCompilerReadBarrier); + // We do not need to compute the intermediate address from the array: the + // input instruction has done it already. See the comment in + // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`. + if (kIsDebugBuild) { + HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress(); + DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset); + } + temp = obj; + } else { + __ Add(temp, obj, offset); + } + source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); } - source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); - } - codegen_->Load(type, dest, source); - codegen_->MaybeRecordImplicitNullCheck(instruction); + codegen_->Load(type, OutputCPURegister(instruction), source); + codegen_->MaybeRecordImplicitNullCheck(instruction); - if (type == Primitive::kPrimNot) { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - Location obj_loc = locations->InAt(0); - Location out = locations->Out(); - if (index.IsConstant()) { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); - } else { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index); + if (type == Primitive::kPrimNot) { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + Location obj_loc = locations->InAt(0); + if (index.IsConstant()) { + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset); + } else { + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index); + } } } } @@ -2208,12 +2304,12 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { // __ Mov(temp2, temp); // // /* HeapReference<Class> */ temp = temp->component_type_ // __ Ldr(temp, HeapOperand(temp, component_offset)); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp_loc, temp_loc, temp2_loc, component_offset); // // // /* HeapReference<Class> */ temp2 = value->klass_ // __ Ldr(temp2, HeapOperand(Register(value), class_offset)); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc); // // __ Cmp(temp, temp2); @@ -2936,6 +3032,14 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); } +static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { + return kEmitCompilerReadBarrier && + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck); +} + void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); @@ -2962,21 +3066,22 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); // When read barriers are enabled, we need a temporary register for // some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + if (TypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); Register cls = InputRegisterAt(instruction, 1); Location out_loc = locations->Out(); Register out = OutputRegister(instruction); + Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(0) : + Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -2992,10 +3097,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - __ Ldr(out, HeapOperand(obj.W(), class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: { __ Cmp(out, cls); __ Cset(out, eq); @@ -3010,17 +3114,8 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { // object to avoid doing a comparison we know will fail. vixl::Label loop, success; __ Bind(&loop); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = WRegisterFrom(temp_loc); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ Ldr(out, HeapOperand(out, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Cmp(out, cls); @@ -3038,17 +3133,8 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ Bind(&loop); __ Cmp(out, cls); __ B(eq, &success); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = WRegisterFrom(temp_loc); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ Ldr(out, HeapOperand(out, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc); __ Cbnz(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ B(&done); @@ -3066,17 +3152,8 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ Cmp(out, cls); __ B(eq, &exact_check); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = WRegisterFrom(temp_loc); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->component_type_ - __ Ldr(out, HeapOperand(out, component_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Ldrh(out, HeapOperand(out, primitive_offset)); @@ -3115,6 +3192,13 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { // HInstanceOf instruction (following the runtime calling // convention), which might be cluttered by the potential first // read barrier emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, /* is_fatal */ false); @@ -3167,30 +3251,29 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { locations->SetInAt(1, Location::RequiresRegister()); // Note that TypeCheckSlowPathARM64 uses this "temp" register too. locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); // When read barriers are enabled, we need an additional temporary // register for some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { - locations->AddTemp(Location::RequiresRegister()); + if (TypeCheckNeedsATemporary(type_check_kind)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); Register cls = InputRegisterAt(instruction, 1); Location temp_loc = locations->GetTemp(0); + Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(1) : + Location::NoLocation(); Register temp = WRegisterFrom(temp_loc); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); bool is_type_check_slow_path_fatal = (type_check_kind == TypeCheckKind::kExactCheck || type_check_kind == TypeCheckKind::kAbstractClassCheck || @@ -3209,8 +3292,7 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - __ Ldr(temp, HeapOperand(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); switch (type_check_kind) { case TypeCheckKind::kExactCheck: @@ -3227,18 +3309,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // object to avoid doing a comparison we know will fail. vixl::Label loop, compare_classes; __ Bind(&loop); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = WRegisterFrom(temp2_loc); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ Ldr(temp, HeapOperand(temp, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc); // If the class reference currently in `temp` is not null, jump // to the `compare_classes` label to compare it with the checked @@ -3250,8 +3322,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ Ldr(temp, HeapOperand(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ B(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -3267,18 +3339,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ Cmp(temp, cls); __ B(eq, &done); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = WRegisterFrom(temp2_loc); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ Ldr(temp, HeapOperand(temp, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc); // If the class reference currently in `temp` is not null, jump // back at the beginning of the loop. @@ -3289,8 +3351,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ Ldr(temp, HeapOperand(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ B(type_check_slow_path->GetEntryLabel()); break; } @@ -3302,19 +3364,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ B(eq, &done); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = WRegisterFrom(temp2_loc); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->component_type_ - __ Ldr(temp, HeapOperand(temp, component_offset)); - codegen_->MaybeGenerateReadBarrier( - instruction, temp_loc, temp_loc, temp2_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc); // If the component type is not null (i.e. the object is indeed // an array), jump to label `check_non_primitive_component_type` @@ -3327,8 +3378,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ Ldr(temp, HeapOperand(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ B(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -3337,8 +3388,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ Cbz(temp, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - __ Ldr(temp, HeapOperand(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ B(type_check_slow_path->GetEntryLabel()); break; } @@ -3355,6 +3406,13 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // instruction (following the runtime calling convention), which // might be cluttered by the potential first read barrier // emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. __ B(type_check_slow_path->GetEntryLabel()); break; } @@ -3480,7 +3538,7 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method ATTRIBUTE_UNUSED) { - // On arm64 we support all dispatch types. + // On ARM64 we support all dispatch types. return desired_dispatch_info; } @@ -3757,32 +3815,17 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ Add(out.X(), current_method.X(), declaring_class_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ Ldr(out, MemOperand(current_method, declaring_class_offset)); - } + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize); // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value())); - - size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &out[type_index] - __ Add(out.X(), out.X(), cache_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = out[type_index] - __ Ldr(out, MemOperand(out.X(), cache_offset)); - } + // /* GcRoot<mirror::Class> */ out = out[type_index] + GenerateGcRootFieldLoad( + cls, out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); @@ -3845,30 +3888,14 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { Register out = OutputRegister(load); Register current_method = InputRegisterAt(load, 0); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ Add(out.X(), current_method.X(), declaring_class_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ Ldr(out, MemOperand(current_method, declaring_class_offset)); - } - + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); - - size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::String>* */ out = &out[string_index] - __ Add(out.X(), out.X(), cache_offset); - // /* mirror::String* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::String> */ out = out[string_index] - __ Ldr(out, MemOperand(out.X(), cache_offset)); - } + // /* GcRoot<mirror::String> */ out = out[string_index] + GenerateGcRootFieldLoad( + load, out_loc, out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex())); if (!load->IsInDexCache()) { SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); @@ -4237,7 +4264,7 @@ void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { } void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { - GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); + codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); } void LocationsBuilderARM64::VisitReturn(HReturn* instruction) { @@ -4622,14 +4649,288 @@ void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_inst } } -void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp) { + Primitive::Type type = Primitive::kPrimNot; + Register out_reg = RegisterFrom(out, type); + if (kEmitCompilerReadBarrier) { + Register temp_reg = RegisterFrom(maybe_temp, type); + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + out_reg, + offset, + temp_reg, + /* needs_null_check */ false, + /* use_load_acquire */ false); + } else { + // Load with slow path based read barrier. + // Save the value of `out` into `maybe_temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + __ Mov(temp_reg, out_reg); + // /* HeapReference<Object> */ out = *(out + offset) + __ Ldr(out_reg, HeapOperand(out_reg, offset)); + codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + __ Ldr(out_reg, HeapOperand(out_reg, offset)); + GetAssembler()->MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp) { + Primitive::Type type = Primitive::kPrimNot; + Register out_reg = RegisterFrom(out, type); + Register obj_reg = RegisterFrom(obj, type); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + Register temp_reg = RegisterFrom(maybe_temp, type); + // /* HeapReference<Object> */ out = *(obj + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + obj_reg, + offset, + temp_reg, + /* needs_null_check */ false, + /* use_load_acquire */ false); + } else { + // Load with slow path based read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ Ldr(out_reg, HeapOperand(obj_reg, offset)); + codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ Ldr(out_reg, HeapOperand(obj_reg, offset)); + GetAssembler()->MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + vixl::Register obj, + uint32_t offset) { + Register root_reg = RegisterFrom(root, Primitive::kPrimNot); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Fast path implementation of art::ReadBarrier::BarrierForRoot when + // Baker's read barrier are used: + // + // root = obj.field; + // if (Thread::Current()->GetIsGcMarking()) { + // root = ReadBarrier::Mark(root) + // } + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ Ldr(root_reg, MemOperand(obj, offset)); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path used to mark the GC root `root`. + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root, root); + codegen_->AddSlowPath(slow_path); + + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + Register temp = temps.AcquireW(); + // temp = Thread::Current()->GetIsGcMarking() + __ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64WordSize>().Int32Value())); + __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } else { + // GC root loaded through a slow path for read barriers other + // than Baker's. + // /* GcRoot<mirror::Object>* */ root = obj + offset + __ Add(root_reg.X(), obj.X(), offset); + // /* mirror::Object* */ root = root->Read() + codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + } + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ Ldr(root_reg, MemOperand(obj, offset)); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. + } +} + +void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t offset, + Register temp, + bool needs_null_check, + bool use_load_acquire) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = *(obj + offset) + Location no_index = Location::NoLocation(); + GenerateReferenceLoadWithBakerReadBarrier( + instruction, ref, obj, offset, no_index, temp, needs_null_check, use_load_acquire); +} + +void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t data_offset, + Location index, + Register temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // Array cells are never volatile variables, therefore array loads + // never use Load-Acquire instructions on ARM64. + const bool use_load_acquire = false; + + // /* HeapReference<Object> */ ref = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + GenerateReferenceLoadWithBakerReadBarrier( + instruction, ref, obj, data_offset, index, temp, needs_null_check, use_load_acquire); +} + +void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t offset, + Location index, + Register temp, + bool needs_null_check, + bool use_load_acquire) { DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + // If `index` is a valid location, then we are emitting an array + // load, so we shouldn't be using a Load Acquire instruction. + // In other words: `index.IsValid()` => `!use_load_acquire`. + DCHECK(!index.IsValid() || !use_load_acquire); + + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + // In slow path based read barriers, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. + // } + // + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as it performs additional checks that we do + // not do here for performance reasons. + + Primitive::Type type = Primitive::kPrimNot; + Register ref_reg = RegisterFrom(ref, type); + DCHECK(obj.IsW()); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + // /* int32_t */ monitor = obj->monitor_ + __ Ldr(temp, HeapOperand(obj, monitor_offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + // /* uint32_t */ rb_state = lock_word.ReadBarrierState() + __ Lsr(temp, temp, LockWord::kReadBarrierStateShift); + __ And(temp, temp, Operand(LockWord::kReadBarrierStateMask)); + static_assert( + LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, + "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); + + // Introduce a dependency on the high bits of rb_state, which shall + // be all zeroes, to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + // temp2 = rb_state & ~LockWord::kReadBarrierStateMask = 0 + Register temp2 = temps.AcquireW(); + __ Bic(temp2, temp, Operand(LockWord::kReadBarrierStateMask)); + // obj is unchanged by this operation, but its value now depends on + // temp2, which depends on temp. + __ Add(obj, obj, Operand(temp2)); + temps.Release(temp2); + + // The actual reference load. + if (index.IsValid()) { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + temp2 = temps.AcquireW(); + // /* HeapReference<Object> */ ref = + // *(obj + offset + index * sizeof(HeapReference<Object>)) + MemOperand source = HeapOperand(obj); + if (index.IsConstant()) { + uint32_t computed_offset = + offset + (Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type)); + source = HeapOperand(obj, computed_offset); + } else { + __ Add(temp2, obj, offset); + source = HeapOperand(temp2, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); + } + Load(type, ref_reg, source); + temps.Release(temp2); + } else { + // /* HeapReference<Object> */ ref = *(obj + offset) + MemOperand field = HeapOperand(obj, offset); + if (use_load_acquire) { + LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false); + } else { + Load(type, ref_reg, field); + } + } + + // Object* ref = ref_addr->AsMirrorPtr() + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + + // Slow path used to mark the object `ref` when it is gray. + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref, ref); + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + __ Cmp(temp, ReadBarrier::gray_ptr_); + __ B(eq, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // Insert a slow path based read barrier *after* the reference load. + // // If heap poisoning is enabled, the unpoisoning of the loaded // reference will be carried out by the runtime within the slow // path. @@ -4643,57 +4944,41 @@ void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction, ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index); AddSlowPath(slow_path); - // TODO: When read barrier has a fast path, add it here. - /* Currently the read barrier call is inserted after the original load. - * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the - * original load. This load-load ordering is required by the read barrier. - * The fast path/slow path (for Baker's algorithm) should look like: - * - * bool isGray = obj.LockWord & kReadBarrierMask; - * lfence; // load fence or artificial data dependence to prevent load-load reordering - * ref = obj.field; // this is the original load - * if (isGray) { - * ref = Mark(ref); // ideally the slow path just does Mark(ref) - * } - */ - __ B(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } -void CodeGeneratorARM64::MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { if (kEmitCompilerReadBarrier) { + // Baker's read barriers shall be handled by the fast path + // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier). + DCHECK(!kUseBakerReadBarrier); // If heap poisoning is enabled, unpoisoning will be taken care of // by the runtime within the slow path. - GenerateReadBarrier(instruction, out, ref, obj, offset, index); + GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); } else if (kPoisonHeapReferences) { GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out)); } } -void CodeGeneratorARM64::GenerateReadBarrierForRoot(HInstruction* instruction, - Location out, - Location root) { +void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction, + Location out, + Location root) { DCHECK(kEmitCompilerReadBarrier); + // Insert a slow path based read barrier *after* the GC root load. + // // Note that GC roots are not affected by heap poisoning, so we do // not need to do anything special for this here. SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root); AddSlowPath(slow_path); - // TODO: Implement a fast path for ReadBarrierForRoot, performing - // the following operation (for Baker's algorithm): - // - // if (thread.tls32_.is_gc_marking) { - // root = Mark(root); - // } - __ B(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 98303f67ad..a9d1bbde98 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -195,7 +195,6 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) - FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION @@ -209,14 +208,53 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg); - void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* instr); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void HandleCondition(HCondition* instruction); + + // Generate a heap reference load using one register `out`: + // + // out <- *(out + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a read barrier and + // shall be a register in that case; it may be an invalid location + // otherwise. + void GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp); + // Generate a heap reference load using two different registers + // `out` and `obj`: + // + // out <- *(obj + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a Baker's (fast + // path) read barrier and shall be a register in that case; it may + // be an invalid location otherwise. + void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp); + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers (if any). + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + vixl::Register obj, + uint32_t offset); + void HandleShift(HBinaryOperation* instr); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); @@ -246,7 +284,6 @@ class LocationsBuilderARM64 : public HGraphVisitor { FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) - FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION @@ -339,6 +376,8 @@ class CodeGeneratorARM64 : public CodeGenerator { // Emit a write barrier. void MarkGCCard(vixl::Register object, vixl::Register value, bool value_can_be_null); + void GenerateMemoryBarrier(MemBarrierKind kind); + // Register allocation. void SetupBlockedRegisters() const OVERRIDE; @@ -388,9 +427,12 @@ class CodeGeneratorARM64 : public CodeGenerator { void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src); - void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); - void LoadAcquire(HInstruction* instruction, vixl::CPURegister dst, const vixl::MemOperand& src); - void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); + void Store(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst); + void LoadAcquire(HInstruction* instruction, + vixl::CPURegister dst, + const vixl::MemOperand& src, + bool needs_null_check); + void StoreRelease(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst); // Generate code to invoke a runtime entry point. void InvokeRuntime(QuickEntrypointEnum entrypoint, @@ -425,7 +467,27 @@ class CodeGeneratorARM64 : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; - // Generate a read barrier for a heap reference within `instruction`. + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t offset, + vixl::Register temp, + bool needs_null_check, + bool use_load_acquire); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference array load when Baker's read barriers are used. + void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t data_offset, + Location index, + vixl::Register temp, + bool needs_null_check); + + // Generate a read barrier for a heap reference within `instruction` + // using a slow path. // // A read barrier for an object reference read from the heap is // implemented as a call to the artReadBarrierSlow runtime entry @@ -442,23 +504,25 @@ class CodeGeneratorARM64 : public CodeGenerator { // When `index` is provided (i.e. for array accesses), the offset // value passed to artReadBarrierSlow is adjusted to take `index` // into account. - void GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // If read barriers are enabled, generate a read barrier for a heap reference. - // If heap poisoning is enabled, also unpoison the reference in `out`. - void MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // Generate a read barrier for a GC root within `instruction`. + void GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction` using + // a slow path. // // A read barrier for an object reference GC root is implemented as // a call to the artReadBarrierForRootSlow runtime entry point, @@ -468,9 +532,20 @@ class CodeGeneratorARM64 : public CodeGenerator { // // The `out` location contains the value returned by // artReadBarrierForRootSlow. - void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); private: + // Factored implementation of GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t offset, + Location index, + vixl::Register temp, + bool needs_null_check, + bool use_load_acquire); + using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>; using MethodToLiteralMap = ArenaSafeMap<MethodReference, vixl::Literal<uint64_t>*, diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc index 91e4a997fd..feb8b2092a 100644 --- a/compiler/optimizing/dominator_test.cc +++ b/compiler/optimizing/dominator_test.cc @@ -133,8 +133,9 @@ TEST(OptimizerTest, CFG4) { const uint32_t dominators[] = { kInvalidBlockId, - 0, - kInvalidBlockId + 3, + kInvalidBlockId, + 0 }; TestCode(data1, dominators, sizeof(dominators) / sizeof(int)); diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc index d4b9b71952..d5305646a8 100644 --- a/compiler/optimizing/graph_test.cc +++ b/compiler/optimizing/graph_test.cc @@ -164,7 +164,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges1) { // Ensure there is only one back edge. ASSERT_EQ(if_block->GetPredecessors().size(), 2u); - ASSERT_EQ(if_block->GetPredecessors()[0], entry_block); + ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor()); ASSERT_NE(if_block->GetPredecessors()[1], if_block); // Ensure the new block is the back edge. @@ -199,7 +199,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges2) { // Ensure there is only one back edge. ASSERT_EQ(if_block->GetPredecessors().size(), 2u); - ASSERT_EQ(if_block->GetPredecessors()[0], entry_block); + ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor()); ASSERT_NE(if_block->GetPredecessors()[1], if_block); // Ensure the new block is the back edge. diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 6b8f61a5b4..32c3a925e0 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -21,7 +21,6 @@ #include <cctype> #include <sstream> -#include "bounds_check_elimination.h" #include "code_generator.h" #include "dead_code_elimination.h" #include "disassembler.h" @@ -427,12 +426,6 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit"); } -#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64) - void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE { - StartAttributeStream("kind") << instruction->GetOpKind(); - } -#endif - #ifdef ART_ENABLE_CODEGEN_arm64 void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE { StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind(); @@ -440,6 +433,10 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("shift") << instruction->GetShiftAmount(); } } + + void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE { + StartAttributeStream("kind") << instruction->GetOpKind(); + } #endif bool IsPass(const char* name) { @@ -508,7 +505,6 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { if (IsPass(LICM::kLoopInvariantCodeMotionPassName) || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName) || IsPass(HDeadCodeElimination::kInitialDeadCodeEliminationPassName) - || IsPass(BoundsCheckElimination::kBoundsCheckEliminationPassName) || IsPass(SsaBuilder::kSsaBuilderPassName)) { HLoopInformation* info = instruction->GetBlock()->GetLoopInformation(); if (info == nullptr) { diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 20c4f1f698..2e79df1b84 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -419,7 +419,10 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits(); if (code_item->insns_size_in_code_units_ > inline_max_code_units) { VLOG(compiler) << "Method " << PrettyMethod(method) - << " is too big to inline"; + << " is too big to inline: " + << code_item->insns_size_in_code_units_ + << " > " + << inline_max_code_units; return false; } @@ -639,9 +642,12 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, for (; !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); - if (block->IsLoopHeader()) { + + if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) { + // Don't inline methods with irreducible loops, they could prevent some + // optimizations to run. VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " could not be inlined because it contains a loop"; + << " could not be inlined because it contains an irreducible loop"; return false; } diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc deleted file mode 100644 index db1f9a79aa..0000000000 --- a/compiler/optimizing/instruction_simplifier_arm.cc +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "instruction_simplifier_arm.h" -#include "instruction_simplifier_shared.h" - -namespace art { -namespace arm { - -void InstructionSimplifierArmVisitor::VisitMul(HMul* instruction) { - if (TryCombineMultiplyAccumulate(instruction, kArm)) { - RecordSimplification(); - } -} - -} // namespace arm -} // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h deleted file mode 100644 index 379b95d6ae..0000000000 --- a/compiler/optimizing/instruction_simplifier_arm.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_ -#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_ - -#include "nodes.h" -#include "optimization.h" - -namespace art { -namespace arm { - -class InstructionSimplifierArmVisitor : public HGraphVisitor { - public: - InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats) - : HGraphVisitor(graph), stats_(stats) {} - - private: - void RecordSimplification() { - if (stats_ != nullptr) { - stats_->RecordStat(kInstructionSimplificationsArch); - } - } - - void VisitMul(HMul* instruction) OVERRIDE; - - OptimizingCompilerStats* stats_; -}; - - -class InstructionSimplifierArm : public HOptimization { - public: - InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, "instruction_simplifier_arm", stats) {} - - void Run() OVERRIDE { - InstructionSimplifierArmVisitor visitor(graph_, stats_); - visitor.VisitReversePostOrder(); - } -}; - -} // namespace arm -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_ diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 83126a5c4d..4bcfc54791 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -17,7 +17,6 @@ #include "instruction_simplifier_arm64.h" #include "common_arm64.h" -#include "instruction_simplifier_shared.h" #include "mirror/array-inl.h" namespace art { @@ -180,6 +179,67 @@ bool InstructionSimplifierArm64Visitor::TryMergeIntoUsersShifterOperand(HInstruc return true; } +bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns( + HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) { + DCHECK(Primitive::IsIntOrLongType(mul->GetType())); + DCHECK(input_binop->IsAdd() || input_binop->IsSub()); + DCHECK_NE(input_binop, input_other); + if (!input_binop->HasOnlyOneNonEnvironmentUse()) { + return false; + } + + // Try to interpret patterns like + // a * (b <+/-> 1) + // as + // (a * b) <+/-> a + HInstruction* input_a = input_other; + HInstruction* input_b = nullptr; // Set to a non-null value if we found a pattern to optimize. + HInstruction::InstructionKind op_kind; + + if (input_binop->IsAdd()) { + if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) { + // Interpret + // a * (b + 1) + // as + // (a * b) + a + input_b = input_binop->GetLeastConstantLeft(); + op_kind = HInstruction::kAdd; + } + } else { + DCHECK(input_binop->IsSub()); + if (input_binop->GetRight()->IsConstant() && + input_binop->GetRight()->AsConstant()->IsMinusOne()) { + // Interpret + // a * (b - (-1)) + // as + // a + (a * b) + input_b = input_binop->GetLeft(); + op_kind = HInstruction::kAdd; + } else if (input_binop->GetLeft()->IsConstant() && + input_binop->GetLeft()->AsConstant()->IsOne()) { + // Interpret + // a * (1 - b) + // as + // a - (a * b) + input_b = input_binop->GetRight(); + op_kind = HInstruction::kSub; + } + } + + if (input_b == nullptr) { + // We did not find a pattern we can optimize. + return false; + } + + HArm64MultiplyAccumulate* mulacc = new(GetGraph()->GetArena()) HArm64MultiplyAccumulate( + mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc()); + + mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc); + input_binop->GetBlock()->RemoveInstruction(input_binop); + + return false; +} + void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { TryExtractArrayAccessAddress(instruction, instruction->GetArray(), @@ -195,8 +255,75 @@ void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) { } void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) { - if (TryCombineMultiplyAccumulate(instruction, kArm64)) { - RecordSimplification(); + Primitive::Type type = instruction->GetType(); + if (!Primitive::IsIntOrLongType(type)) { + return; + } + + HInstruction* use = instruction->HasNonEnvironmentUses() + ? instruction->GetUses().GetFirst()->GetUser() + : nullptr; + + if (instruction->HasOnlyOneNonEnvironmentUse() && (use->IsAdd() || use->IsSub())) { + // Replace code looking like + // MUL tmp, x, y + // SUB dst, acc, tmp + // with + // MULSUB dst, acc, x, y + // Note that we do not want to (unconditionally) perform the merge when the + // multiplication has multiple uses and it can be merged in all of them. + // Multiple uses could happen on the same control-flow path, and we would + // then increase the amount of work. In the future we could try to evaluate + // whether all uses are on different control-flow paths (using dominance and + // reverse-dominance information) and only perform the merge when they are. + HInstruction* accumulator = nullptr; + HBinaryOperation* binop = use->AsBinaryOperation(); + HInstruction* binop_left = binop->GetLeft(); + HInstruction* binop_right = binop->GetRight(); + // Be careful after GVN. This should not happen since the `HMul` has only + // one use. + DCHECK_NE(binop_left, binop_right); + if (binop_right == instruction) { + accumulator = binop_left; + } else if (use->IsAdd()) { + DCHECK_EQ(binop_left, instruction); + accumulator = binop_right; + } + + if (accumulator != nullptr) { + HArm64MultiplyAccumulate* mulacc = + new (GetGraph()->GetArena()) HArm64MultiplyAccumulate(type, + binop->GetKind(), + accumulator, + instruction->GetLeft(), + instruction->GetRight()); + + binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); + DCHECK(!instruction->HasUses()); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } + } + + // Use multiply accumulate instruction for a few simple patterns. + // We prefer not applying the following transformations if the left and + // right inputs perform the same operation. + // We rely on GVN having squashed the inputs if appropriate. However the + // results are still correct even if that did not happen. + if (instruction->GetLeft() == instruction->GetRight()) { + return; + } + + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + if ((right->IsAdd() || right->IsSub()) && + TrySimpleMultiplyAccumulatePatterns(instruction, right->AsBinaryOperation(), left)) { + return; + } + if ((left->IsAdd() || left->IsSub()) && + TrySimpleMultiplyAccumulatePatterns(instruction, left->AsBinaryOperation(), right)) { + return; } } diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 37a34c0373..b7f490bb8c 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -51,6 +51,10 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { return TryMergeIntoShifterOperand(use, bitfield_op, true); } + bool TrySimpleMultiplyAccumulatePatterns(HMul* mul, + HBinaryOperation* input_binop, + HInstruction* input_other); + // HInstruction visitors, sorted alphabetically. void VisitArrayGet(HArrayGet* instruction) OVERRIDE; void VisitArraySet(HArraySet* instruction) OVERRIDE; diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc deleted file mode 100644 index 45d196fa6d..0000000000 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "instruction_simplifier_shared.h" - -namespace art { - -namespace { - -bool TrySimpleMultiplyAccumulatePatterns(HMul* mul, - HBinaryOperation* input_binop, - HInstruction* input_other) { - DCHECK(Primitive::IsIntOrLongType(mul->GetType())); - DCHECK(input_binop->IsAdd() || input_binop->IsSub()); - DCHECK_NE(input_binop, input_other); - if (!input_binop->HasOnlyOneNonEnvironmentUse()) { - return false; - } - - // Try to interpret patterns like - // a * (b <+/-> 1) - // as - // (a * b) <+/-> a - HInstruction* input_a = input_other; - HInstruction* input_b = nullptr; // Set to a non-null value if we found a pattern to optimize. - HInstruction::InstructionKind op_kind; - - if (input_binop->IsAdd()) { - if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) { - // Interpret - // a * (b + 1) - // as - // (a * b) + a - input_b = input_binop->GetLeastConstantLeft(); - op_kind = HInstruction::kAdd; - } - } else { - DCHECK(input_binop->IsSub()); - if (input_binop->GetRight()->IsConstant() && - input_binop->GetRight()->AsConstant()->IsMinusOne()) { - // Interpret - // a * (b - (-1)) - // as - // a + (a * b) - input_b = input_binop->GetLeft(); - op_kind = HInstruction::kAdd; - } else if (input_binop->GetLeft()->IsConstant() && - input_binop->GetLeft()->AsConstant()->IsOne()) { - // Interpret - // a * (1 - b) - // as - // a - (a * b) - input_b = input_binop->GetRight(); - op_kind = HInstruction::kSub; - } - } - - if (input_b == nullptr) { - // We did not find a pattern we can optimize. - return false; - } - - ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena(); - HMultiplyAccumulate* mulacc = new(arena) HMultiplyAccumulate( - mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc()); - - mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc); - input_binop->GetBlock()->RemoveInstruction(input_binop); - - return true; -} - -} // namespace - -bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa) { - Primitive::Type type = mul->GetType(); - switch (isa) { - case kArm: - case kThumb2: - if (type != Primitive::kPrimInt) { - return false; - } - break; - case kArm64: - if (!Primitive::IsIntOrLongType(type)) { - return false; - } - break; - default: - return false; - } - - HInstruction* use = mul->HasNonEnvironmentUses() - ? mul->GetUses().GetFirst()->GetUser() - : nullptr; - - ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena(); - - if (mul->HasOnlyOneNonEnvironmentUse()) { - if (use->IsAdd() || use->IsSub()) { - // Replace code looking like - // MUL tmp, x, y - // SUB dst, acc, tmp - // with - // MULSUB dst, acc, x, y - // Note that we do not want to (unconditionally) perform the merge when the - // multiplication has multiple uses and it can be merged in all of them. - // Multiple uses could happen on the same control-flow path, and we would - // then increase the amount of work. In the future we could try to evaluate - // whether all uses are on different control-flow paths (using dominance and - // reverse-dominance information) and only perform the merge when they are. - HInstruction* accumulator = nullptr; - HBinaryOperation* binop = use->AsBinaryOperation(); - HInstruction* binop_left = binop->GetLeft(); - HInstruction* binop_right = binop->GetRight(); - // Be careful after GVN. This should not happen since the `HMul` has only - // one use. - DCHECK_NE(binop_left, binop_right); - if (binop_right == mul) { - accumulator = binop_left; - } else if (use->IsAdd()) { - DCHECK_EQ(binop_left, mul); - accumulator = binop_right; - } - - if (accumulator != nullptr) { - HMultiplyAccumulate* mulacc = - new (arena) HMultiplyAccumulate(type, - binop->GetKind(), - accumulator, - mul->GetLeft(), - mul->GetRight()); - - binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); - DCHECK(!mul->HasUses()); - mul->GetBlock()->RemoveInstruction(mul); - return true; - } - } else if (use->IsNeg() && isa != kArm) { - HMultiplyAccumulate* mulacc = - new (arena) HMultiplyAccumulate(type, - HInstruction::kSub, - mul->GetBlock()->GetGraph()->GetConstant(type, 0), - mul->GetLeft(), - mul->GetRight()); - - use->GetBlock()->ReplaceAndRemoveInstructionWith(use, mulacc); - DCHECK(!mul->HasUses()); - mul->GetBlock()->RemoveInstruction(mul); - return true; - } - } - - // Use multiply accumulate instruction for a few simple patterns. - // We prefer not applying the following transformations if the left and - // right inputs perform the same operation. - // We rely on GVN having squashed the inputs if appropriate. However the - // results are still correct even if that did not happen. - if (mul->GetLeft() == mul->GetRight()) { - return false; - } - - HInstruction* left = mul->GetLeft(); - HInstruction* right = mul->GetRight(); - if ((right->IsAdd() || right->IsSub()) && - TrySimpleMultiplyAccumulatePatterns(mul, right->AsBinaryOperation(), left)) { - return true; - } - if ((left->IsAdd() || left->IsSub()) && - TrySimpleMultiplyAccumulatePatterns(mul, left->AsBinaryOperation(), right)) { - return true; - } - return false; -} - -} // namespace art diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h deleted file mode 100644 index 9832ecc058..0000000000 --- a/compiler/optimizing/instruction_simplifier_shared.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_ -#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_ - -#include "nodes.h" - -namespace art { - -bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa); - -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_ diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index c5688a3ea2..8cf2d4f393 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -752,21 +752,33 @@ static void GenUnsafeGet(HInvoke* invoke, Register trg = RegisterFrom(trg_loc, type); bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease(); - MemOperand mem_op(base.X(), offset); - if (is_volatile) { - if (use_acquire_release) { - codegen->LoadAcquire(invoke, trg, mem_op); - } else { - codegen->Load(type, trg, mem_op); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case. + UseScratchRegisterScope temps(masm); + Register temp = temps.AcquireW(); + codegen->GenerateArrayLoadWithBakerReadBarrier( + invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + if (is_volatile && !use_acquire_release) { __ Dmb(InnerShareable, BarrierReads); } } else { - codegen->Load(type, trg, mem_op); - } + // Other cases. + MemOperand mem_op(base.X(), offset); + if (is_volatile) { + if (use_acquire_release) { + codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true); + } else { + codegen->Load(type, trg, mem_op); + __ Dmb(InnerShareable, BarrierReads); + } + } else { + codegen->Load(type, trg, mem_op); + } - if (type == Primitive::kPrimNot) { - DCHECK(trg.IsW()); - codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + if (type == Primitive::kPrimNot) { + DCHECK(trg.IsW()); + codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + } } } @@ -1026,10 +1038,15 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat vixl::Label loop_head, exit_loop; if (use_acquire_release) { __ Bind(&loop_head); - __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); - // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? + // TODO: When `type == Primitive::kPrimNot`, add a read barrier for + // the reference stored in the object before attempting the CAS, + // similar to the one in the art::Unsafe_compareAndSwapObject JNI + // implementation. + // // Note that this code is not (yet) used when read barriers are // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). + DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); + __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index cb7bc58b0c..adf8734214 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -288,9 +288,10 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { // Make sure the loop has only one pre header. This simplifies SSA building by having // to just look at the pre header to know which locals are initialized at entry of the - // loop. + // loop. Also, don't allow the entry block to be a pre header: this simplifies inlining + // this graph. size_t number_of_incomings = header->GetPredecessors().size() - info->NumberOfBackEdges(); - if (number_of_incomings != 1) { + if (number_of_incomings != 1 || (GetEntryBlock()->GetSingleSuccessor() == header)) { HBasicBlock* pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); AddBlock(pre_header); pre_header->AddInstruction(new (arena_) HGoto(header->GetDexPc())); @@ -1837,6 +1838,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { DCHECK(GetBlocks()[0]->IsEntryBlock()); DCHECK(GetBlocks()[2]->IsExitBlock()); DCHECK(!body->IsExitBlock()); + DCHECK(!body->IsInLoop()); HInstruction* last = body->GetLastInstruction(); invoke->GetBlock()->instructions_.AddAfter(invoke, body->GetInstructions()); @@ -1895,7 +1897,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // Update the meta information surrounding blocks: // (1) the graph they are now in, // (2) the reverse post order of that graph, - // (3) the potential loop information they are now in, + // (3) their potential loop information, inner and outer, // (4) try block membership. // Note that we do not need to update catch phi inputs because they // correspond to the register file of the outer method which the inlinee @@ -1924,15 +1926,24 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* current = it.Current(); if (current != exit_block_ && current != entry_block_ && current != first) { - DCHECK(!current->IsInLoop()); DCHECK(current->GetTryCatchInformation() == nullptr); DCHECK(current->GetGraph() == this); current->SetGraph(outer_graph); outer_graph->AddBlock(current); outer_graph->reverse_post_order_[++index_of_at] = current; - if (loop_info != nullptr) { + if (!current->IsInLoop()) { current->SetLoopInformation(loop_info); - for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) { + } else if (current->IsLoopHeader()) { + // Clear the information of which blocks are contained in that loop. Since the + // information is stored as a bit vector based on block ids, we have to update + // it, as those block ids were specific to the callee graph and we are now adding + // these blocks to the caller graph. + current->GetLoopInformation()->ClearAllBlocks(); + } + if (current->IsInLoop()) { + for (HLoopInformationOutwardIterator loop_it(*current); + !loop_it.Done(); + loop_it.Advance()) { loop_it.Current()->Add(current); } } @@ -1945,7 +1956,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { outer_graph->AddBlock(to); outer_graph->reverse_post_order_[++index_of_at] = to; if (loop_info != nullptr) { - to->SetLoopInformation(loop_info); + if (!to->IsInLoop()) { + to->SetLoopInformation(loop_info); + } for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) { loop_it.Current()->Add(to); } @@ -2208,7 +2221,10 @@ void HInvoke::SetIntrinsic(Intrinsics intrinsic, SetSideEffects(GetSideEffects().Union(SideEffects::CanTriggerGC())); } // Adjust method's exception status from intrinsic table. - SetCanThrow(exceptions == kCanThrow); + switch (exceptions) { + case kNoThrow: SetCanThrow(false); break; + case kCanThrow: SetCanThrow(true); break; + } } bool HNewInstance::IsStringAlloc() const { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 57fa558129..5246fd1f05 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -689,6 +689,10 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { void Add(HBasicBlock* block); void Remove(HBasicBlock* block); + void ClearAllBlocks() { + blocks_.ClearAllBits(); + } + private: // Internal recursive implementation of `Populate`. void PopulateRecursive(HBasicBlock* block); @@ -1226,16 +1230,6 @@ class HLoopInformationOutwardIterator : public ValueObject { M(UShr, BinaryOperation) \ M(Xor, BinaryOperation) \ -/* - * Instructions, shared across several (not all) architectures. - */ -#if !defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_ENABLE_CODEGEN_arm64) -#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M) -#else -#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M) \ - M(MultiplyAccumulate, Instruction) -#endif - #ifndef ART_ENABLE_CODEGEN_arm #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) #else @@ -1248,7 +1242,8 @@ class HLoopInformationOutwardIterator : public ValueObject { #else #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ M(Arm64DataProcWithShifterOp, Instruction) \ - M(Arm64IntermediateAddress, Instruction) + M(Arm64IntermediateAddress, Instruction) \ + M(Arm64MultiplyAccumulate, Instruction) #endif #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) @@ -1268,7 +1263,6 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION(M) \ FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ - FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M) \ FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) \ FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \ @@ -5728,9 +5722,6 @@ class HParallelMove : public HTemplateInstruction<0> { } // namespace art -#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64) -#include "nodes_shared.h" -#endif #ifdef ART_ENABLE_CODEGEN_arm #include "nodes_arm.h" #endif diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h index 173852a55d..445cdab191 100644 --- a/compiler/optimizing/nodes_arm64.h +++ b/compiler/optimizing/nodes_arm64.h @@ -118,6 +118,40 @@ class HArm64IntermediateAddress : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress); }; +class HArm64MultiplyAccumulate : public HExpression<3> { + public: + HArm64MultiplyAccumulate(Primitive::Type type, + InstructionKind op, + HInstruction* accumulator, + HInstruction* mul_left, + HInstruction* mul_right, + uint32_t dex_pc = kNoDexPc) + : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) { + SetRawInputAt(kInputAccumulatorIndex, accumulator); + SetRawInputAt(kInputMulLeftIndex, mul_left); + SetRawInputAt(kInputMulRightIndex, mul_right); + } + + static constexpr int kInputAccumulatorIndex = 0; + static constexpr int kInputMulLeftIndex = 1; + static constexpr int kInputMulRightIndex = 2; + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + return op_kind_ == other->AsArm64MultiplyAccumulate()->op_kind_; + } + + InstructionKind GetOpKind() const { return op_kind_; } + + DECLARE_INSTRUCTION(Arm64MultiplyAccumulate); + + private: + // Indicates if this is a MADD or MSUB. + InstructionKind op_kind_; + + DISALLOW_COPY_AND_ASSIGN(HArm64MultiplyAccumulate); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_ diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h deleted file mode 100644 index b04b622838..0000000000 --- a/compiler/optimizing/nodes_shared.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_NODES_SHARED_H_ -#define ART_COMPILER_OPTIMIZING_NODES_SHARED_H_ - -namespace art { - -class HMultiplyAccumulate : public HExpression<3> { - public: - HMultiplyAccumulate(Primitive::Type type, - InstructionKind op, - HInstruction* accumulator, - HInstruction* mul_left, - HInstruction* mul_right, - uint32_t dex_pc = kNoDexPc) - : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) { - SetRawInputAt(kInputAccumulatorIndex, accumulator); - SetRawInputAt(kInputMulLeftIndex, mul_left); - SetRawInputAt(kInputMulRightIndex, mul_right); - } - - static constexpr int kInputAccumulatorIndex = 0; - static constexpr int kInputMulLeftIndex = 1; - static constexpr int kInputMulRightIndex = 2; - - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(HInstruction* other) const OVERRIDE { - return op_kind_ == other->AsMultiplyAccumulate()->op_kind_; - } - - InstructionKind GetOpKind() const { return op_kind_; } - - DECLARE_INSTRUCTION(MultiplyAccumulate); - - private: - // Indicates if this is a MADD or MSUB. - const InstructionKind op_kind_; - - DISALLOW_COPY_AND_ASSIGN(HMultiplyAccumulate); -}; - -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_NODES_SHARED_H_ diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 4da48bdfc3..fffd00535c 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -62,7 +62,6 @@ #include "induction_var_analysis.h" #include "inliner.h" #include "instruction_simplifier.h" -#include "instruction_simplifier_arm.h" #include "intrinsics.h" #include "jit/debugger_interface.h" #include "jit/jit_code_cache.h" @@ -446,11 +445,8 @@ static void RunArchOptimizations(InstructionSet instruction_set, case kThumb2: case kArm: { arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats); - arm::InstructionSimplifierArm* simplifier = - new (arena) arm::InstructionSimplifierArm(graph, stats); HOptimization* arm_optimizations[] = { - fixups, - simplifier + fixups }; RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer); break; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index a966b62b4f..d77639d608 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -1734,6 +1734,12 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { } } +static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop( + HInstruction* instruction) { + return instruction->GetBlock()->GetGraph()->HasIrreducibleLoops() && + (instruction->IsConstant() || instruction->IsCurrentMethod()); +} + void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const { @@ -1750,7 +1756,19 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, // Interval was not split. return; } - DCHECK(destination != nullptr && source != nullptr); + + LiveInterval* parent = interval->GetParent(); + HInstruction* defined_by = parent->GetDefinedBy(); + if (destination == nullptr) { + // Our live_in fixed point calculation has found that the instruction is live + // in the `to` block because it will eventually enter an irreducible loop. Our + // live interval computation however does not compute a fixed point, and + // therefore will not have a location for that instruction for `to`. + // Because the instruction is a constant or the ArtMethod, we don't need to + // do anything: it will be materialized in the irreducible loop. + DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by)); + return; + } if (!destination->HasRegister()) { // Values are eagerly spilled. Spill slot already contains appropriate value. @@ -1761,13 +1779,13 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, // we need to put the moves at the entry of `to`. if (from->GetNormalSuccessors().size() == 1) { InsertParallelMoveAtExitOf(from, - interval->GetParent()->GetDefinedBy(), + defined_by, source->ToLocation(), destination->ToLocation()); } else { DCHECK_EQ(to->GetPredecessors().size(), 1u); InsertParallelMoveAtEntryOf(to, - interval->GetParent()->GetDefinedBy(), + defined_by, source->ToLocation(), destination->ToLocation()); } |