diff options
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 771 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 123 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 43 |
3 files changed, 665 insertions, 272 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 3d65e9c53c..c0e3959933 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -584,6 +584,56 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { } } +// Slow path marking an object during a read barrier. +class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location out, Location obj) + : instruction_(instruction), out_(out), obj_(obj) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Primitive::Type type = Primitive::kPrimNot; + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast()) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), obj_, type); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>(); + arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + private: + HInstruction* const instruction_; + const Location out_; + const Location obj_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { public: @@ -605,7 +655,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { // to be instrumented, e.g.: // // __ Ldr(out, HeapOperand(out, class_offset); - // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); // // In that case, we have lost the information about the original // object, and the emitted read barrier cannot work properly. @@ -621,7 +671,9 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); DCHECK(!instruction_->IsInvoke() || (instruction_->IsInvokeStaticOrDirect() && - instruction_->GetLocations()->Intrinsified())); + instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier for heap reference slow path: " + << instruction_->DebugName(); // The read barrier instrumentation does not support the // HArm64IntermediateAddress instruction yet. DCHECK(!(instruction_->IsArrayGet() && @@ -769,14 +821,18 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { public: ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) - : instruction_(instruction), out_(out), root_(root) {} + : instruction_(instruction), out_(out), root_(root) { + DCHECK(kEmitCompilerReadBarrier); + } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Primitive::Type type = Primitive::kPrimNot; DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) + << "Unexpected instruction in read barrier for GC root slow path: " + << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -1338,7 +1394,8 @@ void CodeGeneratorARM64::Load(Primitive::Type type, void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, CPURegister dst, - const MemOperand& src) { + const MemOperand& src, + bool needs_null_check) { MacroAssembler* masm = GetVIXLAssembler(); BlockPoolsScope block_pools(masm); UseScratchRegisterScope temps(masm); @@ -1354,20 +1411,28 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, switch (type) { case Primitive::kPrimBoolean: __ Ldarb(Register(dst), base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } break; case Primitive::kPrimByte: __ Ldarb(Register(dst), base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); break; case Primitive::kPrimChar: __ Ldarh(Register(dst), base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } break; case Primitive::kPrimShort: __ Ldarh(Register(dst), base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); break; case Primitive::kPrimInt: @@ -1375,7 +1440,9 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, case Primitive::kPrimLong: DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type)); __ Ldar(Register(dst), base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } break; case Primitive::kPrimFloat: case Primitive::kPrimDouble: { @@ -1384,7 +1451,9 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); __ Ldar(temp, base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } __ Fmov(FPRegister(dst), temp); break; } @@ -1505,7 +1574,7 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod __ Bind(slow_path->GetExitLabel()); } -void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { +void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { BarrierType type = BarrierAll; switch (kind) { @@ -1641,33 +1710,62 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) { void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = instruction->GetLocations(); + Location base_loc = locations->InAt(0); + Location out = locations->Out(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); Primitive::Type field_type = field_info.GetFieldType(); BlockPoolsScope block_pools(GetVIXLAssembler()); MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset()); bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); - if (field_info.IsVolatile()) { - if (use_acquire_release) { - // NB: LoadAcquire will record the pc info if needed. - codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field); + if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Object FieldGet with Baker's read barrier case. + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + // /* HeapReference<Object> */ out = *(base + offset) + Register base = RegisterFrom(base_loc, Primitive::kPrimNot); + Register temp = temps.AcquireW(); + // Note that potential implicit null checks are handled in this + // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call. + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, + out, + base, + offset, + temp, + /* needs_null_check */ true, + field_info.IsVolatile() && use_acquire_release); + if (field_info.IsVolatile() && !use_acquire_release) { + // For IRIW sequential consistency kLoadAny is not sufficient. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + // General case. + if (field_info.IsVolatile()) { + if (use_acquire_release) { + // Note that a potential implicit null check is handled in this + // CodeGeneratorARM64::LoadAcquire call. + // NB: LoadAcquire will record the pc info if needed. + codegen_->LoadAcquire( + instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true); + } else { + codegen_->Load(field_type, OutputCPURegister(instruction), field); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // For IRIW sequential consistency kLoadAny is not sufficient. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } } else { codegen_->Load(field_type, OutputCPURegister(instruction), field); codegen_->MaybeRecordImplicitNullCheck(instruction); - // For IRIW sequential consistency kLoadAny is not sufficient. - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } - } else { - codegen_->Load(field_type, OutputCPURegister(instruction), field); - codegen_->MaybeRecordImplicitNullCheck(instruction); - } - - if (field_type == Primitive::kPrimNot) { - LocationSummary* locations = instruction->GetLocations(); - Location base = locations->InAt(0); - Location out = locations->Out(); - uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - codegen_->MaybeGenerateReadBarrier(instruction, out, out, base, offset); + if (field_type == Primitive::kPrimNot) { + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); + } } } @@ -1713,10 +1811,10 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); } else { - GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); codegen_->Store(field_type, source, HeapOperand(obj, offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } } else { codegen_->Store(field_type, source, HeapOperand(obj, offset)); @@ -2021,50 +2119,62 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); Location index = locations->InAt(1); uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(); - MemOperand source = HeapOperand(obj); - CPURegister dest = OutputCPURegister(instruction); + Location out = locations->Out(); MacroAssembler* masm = GetVIXLAssembler(); UseScratchRegisterScope temps(masm); // Block pools between `Load` and `MaybeRecordImplicitNullCheck`. BlockPoolsScope block_pools(masm); - if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); - source = HeapOperand(obj, offset); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Object ArrayGet with Baker's read barrier case. + Register temp = temps.AcquireW(); + // The read barrier instrumentation does not support the + // HArm64IntermediateAddress instruction yet. + DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress()); + // Note that a potential implicit null check is handled in the + // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true); } else { - Register temp = temps.AcquireSameSizeAs(obj); - if (instruction->GetArray()->IsArm64IntermediateAddress()) { - // The read barrier instrumentation does not support the - // HArm64IntermediateAddress instruction yet. - DCHECK(!kEmitCompilerReadBarrier); - // We do not need to compute the intermediate address from the array: the - // input instruction has done it already. See the comment in - // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`. - if (kIsDebugBuild) { - HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress(); - DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset); - } - temp = obj; + // General case. + MemOperand source = HeapOperand(obj); + if (index.IsConstant()) { + offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); + source = HeapOperand(obj, offset); } else { - __ Add(temp, obj, offset); + Register temp = temps.AcquireSameSizeAs(obj); + if (instruction->GetArray()->IsArm64IntermediateAddress()) { + // The read barrier instrumentation does not support the + // HArm64IntermediateAddress instruction yet. + DCHECK(!kEmitCompilerReadBarrier); + // We do not need to compute the intermediate address from the array: the + // input instruction has done it already. See the comment in + // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`. + if (kIsDebugBuild) { + HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress(); + DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset); + } + temp = obj; + } else { + __ Add(temp, obj, offset); + } + source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); } - source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); - } - codegen_->Load(type, dest, source); - codegen_->MaybeRecordImplicitNullCheck(instruction); + codegen_->Load(type, OutputCPURegister(instruction), source); + codegen_->MaybeRecordImplicitNullCheck(instruction); - if (type == Primitive::kPrimNot) { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - Location obj_loc = locations->InAt(0); - Location out = locations->Out(); - if (index.IsConstant()) { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); - } else { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index); + if (type == Primitive::kPrimNot) { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + Location obj_loc = locations->InAt(0); + if (index.IsConstant()) { + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset); + } else { + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index); + } } } } @@ -2194,12 +2304,12 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { // __ Mov(temp2, temp); // // /* HeapReference<Class> */ temp = temp->component_type_ // __ Ldr(temp, HeapOperand(temp, component_offset)); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp_loc, temp_loc, temp2_loc, component_offset); // // // /* HeapReference<Class> */ temp2 = value->klass_ // __ Ldr(temp2, HeapOperand(Register(value), class_offset)); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc); // // __ Cmp(temp, temp2); @@ -2922,6 +3032,14 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); } +static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { + return kEmitCompilerReadBarrier && + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck); +} + void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); @@ -2948,21 +3066,22 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); // When read barriers are enabled, we need a temporary register for // some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + if (TypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); Register cls = InputRegisterAt(instruction, 1); Location out_loc = locations->Out(); Register out = OutputRegister(instruction); + Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(0) : + Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -2978,10 +3097,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - __ Ldr(out, HeapOperand(obj.W(), class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: { __ Cmp(out, cls); __ Cset(out, eq); @@ -2996,17 +3114,8 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { // object to avoid doing a comparison we know will fail. vixl::Label loop, success; __ Bind(&loop); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = WRegisterFrom(temp_loc); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ Ldr(out, HeapOperand(out, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Cmp(out, cls); @@ -3024,17 +3133,8 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ Bind(&loop); __ Cmp(out, cls); __ B(eq, &success); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = WRegisterFrom(temp_loc); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ Ldr(out, HeapOperand(out, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc); __ Cbnz(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ B(&done); @@ -3052,17 +3152,8 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ Cmp(out, cls); __ B(eq, &exact_check); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = WRegisterFrom(temp_loc); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->component_type_ - __ Ldr(out, HeapOperand(out, component_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Ldrh(out, HeapOperand(out, primitive_offset)); @@ -3101,6 +3192,13 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { // HInstanceOf instruction (following the runtime calling // convention), which might be cluttered by the potential first // read barrier emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, /* is_fatal */ false); @@ -3153,30 +3251,29 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { locations->SetInAt(1, Location::RequiresRegister()); // Note that TypeCheckSlowPathARM64 uses this "temp" register too. locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); // When read barriers are enabled, we need an additional temporary // register for some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { - locations->AddTemp(Location::RequiresRegister()); + if (TypeCheckNeedsATemporary(type_check_kind)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); Register cls = InputRegisterAt(instruction, 1); Location temp_loc = locations->GetTemp(0); + Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(1) : + Location::NoLocation(); Register temp = WRegisterFrom(temp_loc); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); bool is_type_check_slow_path_fatal = (type_check_kind == TypeCheckKind::kExactCheck || type_check_kind == TypeCheckKind::kAbstractClassCheck || @@ -3195,8 +3292,7 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - __ Ldr(temp, HeapOperand(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); switch (type_check_kind) { case TypeCheckKind::kExactCheck: @@ -3213,18 +3309,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // object to avoid doing a comparison we know will fail. vixl::Label loop, compare_classes; __ Bind(&loop); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = WRegisterFrom(temp2_loc); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ Ldr(temp, HeapOperand(temp, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc); // If the class reference currently in `temp` is not null, jump // to the `compare_classes` label to compare it with the checked @@ -3236,8 +3322,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ Ldr(temp, HeapOperand(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ B(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -3253,18 +3339,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ Cmp(temp, cls); __ B(eq, &done); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = WRegisterFrom(temp2_loc); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ Ldr(temp, HeapOperand(temp, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc); // If the class reference currently in `temp` is not null, jump // back at the beginning of the loop. @@ -3275,8 +3351,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ Ldr(temp, HeapOperand(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ B(type_check_slow_path->GetEntryLabel()); break; } @@ -3288,19 +3364,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ B(eq, &done); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = WRegisterFrom(temp2_loc); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->component_type_ - __ Ldr(temp, HeapOperand(temp, component_offset)); - codegen_->MaybeGenerateReadBarrier( - instruction, temp_loc, temp_loc, temp2_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc); // If the component type is not null (i.e. the object is indeed // an array), jump to label `check_non_primitive_component_type` @@ -3313,8 +3378,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ Ldr(temp, HeapOperand(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ B(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -3323,8 +3388,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ Cbz(temp, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - __ Ldr(temp, HeapOperand(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ B(type_check_slow_path->GetEntryLabel()); break; } @@ -3341,6 +3406,13 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // instruction (following the runtime calling convention), which // might be cluttered by the potential first read barrier // emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. __ B(type_check_slow_path->GetEntryLabel()); break; } @@ -3466,7 +3538,7 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method ATTRIBUTE_UNUSED) { - // On arm64 we support all dispatch types. + // On ARM64 we support all dispatch types. return desired_dispatch_info; } @@ -3743,32 +3815,17 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ Add(out.X(), current_method.X(), declaring_class_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ Ldr(out, MemOperand(current_method, declaring_class_offset)); - } + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize); // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value())); - - size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &out[type_index] - __ Add(out.X(), out.X(), cache_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = out[type_index] - __ Ldr(out, MemOperand(out.X(), cache_offset)); - } + // /* GcRoot<mirror::Class> */ out = out[type_index] + GenerateGcRootFieldLoad( + cls, out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); @@ -3831,30 +3888,14 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { Register out = OutputRegister(load); Register current_method = InputRegisterAt(load, 0); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ Add(out.X(), current_method.X(), declaring_class_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ Ldr(out, MemOperand(current_method, declaring_class_offset)); - } - + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); - - size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::String>* */ out = &out[string_index] - __ Add(out.X(), out.X(), cache_offset); - // /* mirror::String* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::String> */ out = out[string_index] - __ Ldr(out, MemOperand(out.X(), cache_offset)); - } + // /* GcRoot<mirror::String> */ out = out[string_index] + GenerateGcRootFieldLoad( + load, out_loc, out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex())); if (!load->IsInDexCache()) { SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); @@ -4223,7 +4264,7 @@ void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { } void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { - GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); + codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); } void LocationsBuilderARM64::VisitReturn(HReturn* instruction) { @@ -4608,14 +4649,288 @@ void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_inst } } -void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp) { + Primitive::Type type = Primitive::kPrimNot; + Register out_reg = RegisterFrom(out, type); + if (kEmitCompilerReadBarrier) { + Register temp_reg = RegisterFrom(maybe_temp, type); + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + out_reg, + offset, + temp_reg, + /* needs_null_check */ false, + /* use_load_acquire */ false); + } else { + // Load with slow path based read barrier. + // Save the value of `out` into `maybe_temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + __ Mov(temp_reg, out_reg); + // /* HeapReference<Object> */ out = *(out + offset) + __ Ldr(out_reg, HeapOperand(out_reg, offset)); + codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + __ Ldr(out_reg, HeapOperand(out_reg, offset)); + GetAssembler()->MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp) { + Primitive::Type type = Primitive::kPrimNot; + Register out_reg = RegisterFrom(out, type); + Register obj_reg = RegisterFrom(obj, type); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + Register temp_reg = RegisterFrom(maybe_temp, type); + // /* HeapReference<Object> */ out = *(obj + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + obj_reg, + offset, + temp_reg, + /* needs_null_check */ false, + /* use_load_acquire */ false); + } else { + // Load with slow path based read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ Ldr(out_reg, HeapOperand(obj_reg, offset)); + codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ Ldr(out_reg, HeapOperand(obj_reg, offset)); + GetAssembler()->MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + vixl::Register obj, + uint32_t offset) { + Register root_reg = RegisterFrom(root, Primitive::kPrimNot); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Fast path implementation of art::ReadBarrier::BarrierForRoot when + // Baker's read barrier are used: + // + // root = obj.field; + // if (Thread::Current()->GetIsGcMarking()) { + // root = ReadBarrier::Mark(root) + // } + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ Ldr(root_reg, MemOperand(obj, offset)); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path used to mark the GC root `root`. + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root, root); + codegen_->AddSlowPath(slow_path); + + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + Register temp = temps.AcquireW(); + // temp = Thread::Current()->GetIsGcMarking() + __ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64WordSize>().Int32Value())); + __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } else { + // GC root loaded through a slow path for read barriers other + // than Baker's. + // /* GcRoot<mirror::Object>* */ root = obj + offset + __ Add(root_reg.X(), obj.X(), offset); + // /* mirror::Object* */ root = root->Read() + codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + } + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ Ldr(root_reg, MemOperand(obj, offset)); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. + } +} + +void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t offset, + Register temp, + bool needs_null_check, + bool use_load_acquire) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = *(obj + offset) + Location no_index = Location::NoLocation(); + GenerateReferenceLoadWithBakerReadBarrier( + instruction, ref, obj, offset, no_index, temp, needs_null_check, use_load_acquire); +} + +void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t data_offset, + Location index, + Register temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // Array cells are never volatile variables, therefore array loads + // never use Load-Acquire instructions on ARM64. + const bool use_load_acquire = false; + + // /* HeapReference<Object> */ ref = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + GenerateReferenceLoadWithBakerReadBarrier( + instruction, ref, obj, data_offset, index, temp, needs_null_check, use_load_acquire); +} + +void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t offset, + Location index, + Register temp, + bool needs_null_check, + bool use_load_acquire) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + // If `index` is a valid location, then we are emitting an array + // load, so we shouldn't be using a Load Acquire instruction. + // In other words: `index.IsValid()` => `!use_load_acquire`. + DCHECK(!index.IsValid() || !use_load_acquire); + + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + + // In slow path based read barriers, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. + // } + // + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as it performs additional checks that we do + // not do here for performance reasons. + + Primitive::Type type = Primitive::kPrimNot; + Register ref_reg = RegisterFrom(ref, type); + DCHECK(obj.IsW()); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + // /* int32_t */ monitor = obj->monitor_ + __ Ldr(temp, HeapOperand(obj, monitor_offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + // /* uint32_t */ rb_state = lock_word.ReadBarrierState() + __ Lsr(temp, temp, LockWord::kReadBarrierStateShift); + __ And(temp, temp, Operand(LockWord::kReadBarrierStateMask)); + static_assert( + LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, + "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); + + // Introduce a dependency on the high bits of rb_state, which shall + // be all zeroes, to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + // temp2 = rb_state & ~LockWord::kReadBarrierStateMask = 0 + Register temp2 = temps.AcquireW(); + __ Bic(temp2, temp, Operand(LockWord::kReadBarrierStateMask)); + // obj is unchanged by this operation, but its value now depends on + // temp2, which depends on temp. + __ Add(obj, obj, Operand(temp2)); + temps.Release(temp2); + + // The actual reference load. + if (index.IsValid()) { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + temp2 = temps.AcquireW(); + // /* HeapReference<Object> */ ref = + // *(obj + offset + index * sizeof(HeapReference<Object>)) + MemOperand source = HeapOperand(obj); + if (index.IsConstant()) { + uint32_t computed_offset = + offset + (Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type)); + source = HeapOperand(obj, computed_offset); + } else { + __ Add(temp2, obj, offset); + source = HeapOperand(temp2, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); + } + Load(type, ref_reg, source); + temps.Release(temp2); + } else { + // /* HeapReference<Object> */ ref = *(obj + offset) + MemOperand field = HeapOperand(obj, offset); + if (use_load_acquire) { + LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false); + } else { + Load(type, ref_reg, field); + } + } + + // Object* ref = ref_addr->AsMirrorPtr() + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + + // Slow path used to mark the object `ref` when it is gray. + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref, ref); + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + __ Cmp(temp, ReadBarrier::gray_ptr_); + __ B(eq, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { DCHECK(kEmitCompilerReadBarrier); + // Insert a slow path based read barrier *after* the reference load. + // // If heap poisoning is enabled, the unpoisoning of the loaded // reference will be carried out by the runtime within the slow // path. @@ -4629,57 +4944,41 @@ void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction, ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index); AddSlowPath(slow_path); - // TODO: When read barrier has a fast path, add it here. - /* Currently the read barrier call is inserted after the original load. - * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the - * original load. This load-load ordering is required by the read barrier. - * The fast path/slow path (for Baker's algorithm) should look like: - * - * bool isGray = obj.LockWord & kReadBarrierMask; - * lfence; // load fence or artificial data dependence to prevent load-load reordering - * ref = obj.field; // this is the original load - * if (isGray) { - * ref = Mark(ref); // ideally the slow path just does Mark(ref) - * } - */ - __ B(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } -void CodeGeneratorARM64::MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { if (kEmitCompilerReadBarrier) { + // Baker's read barriers shall be handled by the fast path + // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier). + DCHECK(!kUseBakerReadBarrier); // If heap poisoning is enabled, unpoisoning will be taken care of // by the runtime within the slow path. - GenerateReadBarrier(instruction, out, ref, obj, offset, index); + GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); } else if (kPoisonHeapReferences) { GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out)); } } -void CodeGeneratorARM64::GenerateReadBarrierForRoot(HInstruction* instruction, - Location out, - Location root) { +void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction, + Location out, + Location root) { DCHECK(kEmitCompilerReadBarrier); + // Insert a slow path based read barrier *after* the GC root load. + // // Note that GC roots are not affected by heap poisoning, so we do // not need to do anything special for this here. SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root); AddSlowPath(slow_path); - // TODO: Implement a fast path for ReadBarrierForRoot, performing - // the following operation (for Baker's algorithm): - // - // if (thread.tls32_.is_gc_marking) { - // root = Mark(root); - // } - __ B(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 8eb9fcc558..a9d1bbde98 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -208,14 +208,53 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg); - void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* instr); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void HandleCondition(HCondition* instruction); + + // Generate a heap reference load using one register `out`: + // + // out <- *(out + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a read barrier and + // shall be a register in that case; it may be an invalid location + // otherwise. + void GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp); + // Generate a heap reference load using two different registers + // `out` and `obj`: + // + // out <- *(obj + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a Baker's (fast + // path) read barrier and shall be a register in that case; it may + // be an invalid location otherwise. + void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp); + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers (if any). + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + vixl::Register obj, + uint32_t offset); + void HandleShift(HBinaryOperation* instr); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); @@ -337,6 +376,8 @@ class CodeGeneratorARM64 : public CodeGenerator { // Emit a write barrier. void MarkGCCard(vixl::Register object, vixl::Register value, bool value_can_be_null); + void GenerateMemoryBarrier(MemBarrierKind kind); + // Register allocation. void SetupBlockedRegisters() const OVERRIDE; @@ -386,9 +427,12 @@ class CodeGeneratorARM64 : public CodeGenerator { void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src); - void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); - void LoadAcquire(HInstruction* instruction, vixl::CPURegister dst, const vixl::MemOperand& src); - void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); + void Store(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst); + void LoadAcquire(HInstruction* instruction, + vixl::CPURegister dst, + const vixl::MemOperand& src, + bool needs_null_check); + void StoreRelease(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst); // Generate code to invoke a runtime entry point. void InvokeRuntime(QuickEntrypointEnum entrypoint, @@ -423,7 +467,27 @@ class CodeGeneratorARM64 : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; - // Generate a read barrier for a heap reference within `instruction`. + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t offset, + vixl::Register temp, + bool needs_null_check, + bool use_load_acquire); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference array load when Baker's read barriers are used. + void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t data_offset, + Location index, + vixl::Register temp, + bool needs_null_check); + + // Generate a read barrier for a heap reference within `instruction` + // using a slow path. // // A read barrier for an object reference read from the heap is // implemented as a call to the artReadBarrierSlow runtime entry @@ -440,23 +504,25 @@ class CodeGeneratorARM64 : public CodeGenerator { // When `index` is provided (i.e. for array accesses), the offset // value passed to artReadBarrierSlow is adjusted to take `index` // into account. - void GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // If read barriers are enabled, generate a read barrier for a heap reference. - // If heap poisoning is enabled, also unpoison the reference in `out`. - void MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // Generate a read barrier for a GC root within `instruction`. + void GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction` using + // a slow path. // // A read barrier for an object reference GC root is implemented as // a call to the artReadBarrierForRootSlow runtime entry point, @@ -466,9 +532,20 @@ class CodeGeneratorARM64 : public CodeGenerator { // // The `out` location contains the value returned by // artReadBarrierForRootSlow. - void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); private: + // Factored implementation of GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t offset, + Location index, + vixl::Register temp, + bool needs_null_check, + bool use_load_acquire); + using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>; using MethodToLiteralMap = ArenaSafeMap<MethodReference, vixl::Literal<uint64_t>*, diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index c5688a3ea2..8cf2d4f393 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -752,21 +752,33 @@ static void GenUnsafeGet(HInvoke* invoke, Register trg = RegisterFrom(trg_loc, type); bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease(); - MemOperand mem_op(base.X(), offset); - if (is_volatile) { - if (use_acquire_release) { - codegen->LoadAcquire(invoke, trg, mem_op); - } else { - codegen->Load(type, trg, mem_op); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case. + UseScratchRegisterScope temps(masm); + Register temp = temps.AcquireW(); + codegen->GenerateArrayLoadWithBakerReadBarrier( + invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + if (is_volatile && !use_acquire_release) { __ Dmb(InnerShareable, BarrierReads); } } else { - codegen->Load(type, trg, mem_op); - } + // Other cases. + MemOperand mem_op(base.X(), offset); + if (is_volatile) { + if (use_acquire_release) { + codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true); + } else { + codegen->Load(type, trg, mem_op); + __ Dmb(InnerShareable, BarrierReads); + } + } else { + codegen->Load(type, trg, mem_op); + } - if (type == Primitive::kPrimNot) { - DCHECK(trg.IsW()); - codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + if (type == Primitive::kPrimNot) { + DCHECK(trg.IsW()); + codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + } } } @@ -1026,10 +1038,15 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat vixl::Label loop_head, exit_loop; if (use_acquire_release) { __ Bind(&loop_head); - __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); - // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? + // TODO: When `type == Primitive::kPrimNot`, add a read barrier for + // the reference stored in the object before attempting the CAS, + // similar to the one in the art::Unsafe_compareAndSwapObject JNI + // implementation. + // // Note that this code is not (yet) used when read barriers are // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). + DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); + __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); |