diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_mips.cc | 1493 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips.h | 130 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips64.cc | 1477 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips64.h | 129 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_mips.cc | 242 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_mips64.cc | 173 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 2 |
7 files changed, 3209 insertions, 437 deletions
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 5f02a52417..287891feae 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -461,6 +461,536 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS); }; +class ArraySetSlowPathMIPS : public SlowPathCodeMIPS { + public: + explicit ArraySetSlowPathMIPS(HInstruction* instruction) : SlowPathCodeMIPS(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove( + locations->InAt(0), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove( + locations->InAt(1), + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimInt, + nullptr); + parallel_move.AddMove( + locations->InAt(2), + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimNot, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); + mips_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS"; } + + private: + DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS); +}; + +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathMIPS below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). +// +// If `entrypoint` is a valid location it is assumed to already be +// holding the entrypoint. The case where the entrypoint is passed in +// is for the GcRoot read barrier. +class ReadBarrierMarkSlowPathMIPS : public SlowPathCodeMIPS { + public: + ReadBarrierMarkSlowPathMIPS(HInstruction* instruction, + Location ref, + Location entrypoint = Location::NoLocation()) + : SlowPathCodeMIPS(instruction), ref_(ref), entrypoint_(entrypoint) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register ref_reg = ref_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsArraySet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || + (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); + DCHECK((V0 <= ref_reg && ref_reg <= T7) || + (S2 <= ref_reg && ref_reg <= S7) || + (ref_reg == FP)) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in A0 and V0 respectively): + // + // A0 <- ref + // V0 <- ReadBarrierMark(A0) + // ref <- V0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + if (entrypoint_.IsValid()) { + mips_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); + DCHECK_EQ(entrypoint_.AsRegister<Register>(), T9); + __ Jalr(entrypoint_.AsRegister<Register>()); + __ NopIfNoReordering(); + } else { + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1); + // This runtime call does not require a stack map. + mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, + instruction_, + this, + /* direct */ false); + } + __ B(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + + // The location of the entrypoint if already loaded. + const Location entrypoint_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathMIPS); +}; + +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathMIPS above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathMIPS : public SlowPathCodeMIPS { + public: + ReadBarrierMarkAndUpdateFieldSlowPathMIPS(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Register temp1) + : SlowPathCodeMIPS(instruction), + ref_(ref), + obj_(obj), + field_offset_(field_offset), + temp1_(temp1) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS"; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register ref_reg = ref_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + DCHECK(field_offset_.IsRegisterPair()) << field_offset_; + + __ Bind(GetEntryLabel()); + + // Save the old reference. + // Note that we cannot use AT or TMP to save the old reference, as those + // are used by the code that follows, but we need the old reference after + // the call to the ReadBarrierMarkRegX entry point. + DCHECK_NE(temp1_, AT); + DCHECK_NE(temp1_, TMP); + __ Move(temp1_, ref_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); + DCHECK((V0 <= ref_reg && ref_reg <= T7) || + (S2 <= ref_reg && ref_reg <= S7) || + (ref_reg == FP)) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in A0 and V0 respectively): + // + // A0 <- ref + // V0 <- ReadBarrierMark(A0) + // ref <- V0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1); + // This runtime call does not require a stack map. + mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, + instruction_, + this, + /* direct */ false); + + // If the new reference is different from the old reference, + // update the field in the holder (`*(obj_ + field_offset_)`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // the compare-and-set (CAS) loop below would abort, leaving the + // field as-is. + MipsLabel done; + __ Beq(temp1_, ref_reg, &done); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achieved + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + + // Convenience aliases. + Register base = obj_; + // The UnsafeCASObject intrinsic uses a register pair as field + // offset ("long offset"), of which only the low part contains + // data. + Register offset = field_offset_.AsRegisterPairLow<Register>(); + Register expected = temp1_; + Register value = ref_reg; + Register tmp_ptr = TMP; // Pointer to actual memory. + Register tmp = AT; // Value in memory. + + __ Addu(tmp_ptr, base, offset); + + if (kPoisonHeapReferences) { + __ PoisonHeapReference(expected); + // Do not poison `value` if it is the same register as + // `expected`, which has just been poisoned. + if (value != expected) { + __ PoisonHeapReference(value); + } + } + + // do { + // tmp = [r_ptr] - expected; + // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); + + bool is_r6 = mips_codegen->GetInstructionSetFeatures().IsR6(); + MipsLabel loop_head, exit_loop; + __ Bind(&loop_head); + if (is_r6) { + __ LlR6(tmp, tmp_ptr); + } else { + __ LlR2(tmp, tmp_ptr); + } + __ Bne(tmp, expected, &exit_loop); + __ Move(tmp, value); + if (is_r6) { + __ ScR6(tmp, tmp_ptr); + } else { + __ ScR2(tmp, tmp_ptr); + } + __ Beqz(tmp, &loop_head); + __ Bind(&exit_loop); + + if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(expected); + // Do not unpoison `value` if it is the same register as + // `expected`, which has just been unpoisoned. + if (value != expected) { + __ UnpoisonHeapReference(value); + } + } + + __ Bind(&done); + __ B(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const Register obj_; + // The location of the offset of the marked reference field within `obj_`. + Location field_offset_; + + const Register temp1_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathMIPS); +}; + +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathMIPS : public SlowPathCodeMIPS { + public: + ReadBarrierForHeapReferenceSlowPathMIPS(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : SlowPathCodeMIPS(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial object + // has been overwritten by (or after) the heap object reference load + // to be instrumented, e.g.: + // + // __ LoadFromOffset(kLoadWord, out, out, offset); + // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier for heap reference slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. + if (instruction_->IsArrayGet()) { + // Compute the actual memory offset and store it in `index`. + Register index_reg = index_.AsRegister<Register>(); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); + if (codegen->IsCoreCalleeSaveRegister(index_reg)) { + // We are about to change the value of `index_reg` (see the + // calls to art::mips::MipsAssembler::Sll and + // art::mips::MipsAssembler::Addiu32 below), but it has + // not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + Register free_reg = FindAvailableCallerSaveRegister(codegen); + __ Move(free_reg, index_reg); + index_reg = free_reg; + index = Location::RegisterLocation(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the scale + // factor (2) cannot overflow in practice, as the runtime is + // unable to allocate object arrays with a size larger than + // 2^26 - 1 (that is, 2^28 - 4 bytes). + __ Sll(index_reg, index_reg, TIMES_4); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ Addiu32(index_reg, index_reg, offset_); + } else { + // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile + // intrinsics, `index_` is not shifted by a scale factor of 2 + // (as in the case of ArrayGet), as it is actually an offset + // to an object field within an object. + DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegisterPair()); + // UnsafeGet's offset location is a register pair, the low + // part contains the correct offset. + index = index_.ToLow(); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove(obj_, + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + __ LoadConst32(calling_convention.GetRegisterAt(2), offset_); + } + mips_codegen->InvokeRuntime(kQuickReadBarrierSlow, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + mips_codegen->Move32(out_, calling_convention.GetReturnLocation(Primitive::kPrimNot)); + + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathMIPS"; } + + private: + Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(ref_.AsRegister<Register>()); + size_t obj = static_cast<int>(obj_.AsRegister<Register>()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && + i != obj && + !codegen->IsCoreCalleeSaveRegister(i) && + !codegen->IsBlockedCoreRegister(i)) { + return static_cast<Register>(i); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on MIPS + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free caller-save register"; + UNREACHABLE(); + } + + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathMIPS); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS { + public: + ReadBarrierForRootSlowPathMIPS(HInstruction* instruction, Location out, Location root) + : SlowPathCodeMIPS(instruction), out_(out), root_(root) { + DCHECK(kEmitCompilerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) + << "Unexpected instruction in read barrier for GC root slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); + mips_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); + mips_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + mips_codegen->Move32(out_, calling_convention.GetReturnLocation(Primitive::kPrimNot)); + + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS"; } + + private: + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathMIPS); +}; + CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, const MipsInstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, @@ -1310,10 +1840,26 @@ void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint, uint32_t dex_pc, SlowPathCode* slow_path) { ValidateInvokeRuntime(entrypoint, instruction, slow_path); + GenerateInvokeRuntime(GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value(), + IsDirectEntrypoint(entrypoint)); + if (EntrypointRequiresStackMap(entrypoint)) { + RecordPcInfo(instruction, dex_pc, slow_path); + } +} + +void CodeGeneratorMIPS::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path, + bool direct) { + ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); + GenerateInvokeRuntime(entry_point_offset, direct); +} + +void CodeGeneratorMIPS::GenerateInvokeRuntime(int32_t entry_point_offset, bool direct) { bool reordering = __ SetReorder(false); - __ LoadFromOffset(kLoadWord, T9, TR, GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value()); + __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); __ Jalr(T9); - if (IsDirectEntrypoint(entrypoint)) { + if (direct) { // Reserve argument space on stack (for $a0-$a3) for // entrypoints that directly reference native implementations. // Called function may use this space to store $a0-$a3 regs. @@ -1323,9 +1869,6 @@ void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint, __ Nop(); // In delay slot. } __ SetReorder(reordering); - if (EntrypointRequiresStackMap(entrypoint)) { - RecordPcInfo(instruction, dex_pc, slow_path); - } } void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, @@ -1885,14 +2428,31 @@ void InstructionCodeGeneratorMIPS::VisitAnd(HAnd* instruction) { } void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) { + Primitive::Type type = instruction->GetType(); + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (type == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->GetType())) { + if (Primitive::IsFloatingPointType(type)) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in the case of an object array get with + // read barriers enabled: we do not want the move to overwrite the + // array's location, as we need it to emit the read barrier. + locations->SetOut(Location::RequiresRegister(), + object_array_get_with_read_barrier + ? Location::kOutputOverlap + : Location::kNoOutputOverlap); + } + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier. + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->AddTemp(Location::RequiresRegister()); } } @@ -1905,7 +2465,9 @@ static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS* void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); + Location out_loc = locations->Out(); Location index = locations->InAt(1); uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); auto null_checker = GetImplicitNullChecker(instruction, codegen_); @@ -1915,7 +2477,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { instruction->IsStringCharAt(); switch (type) { case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; @@ -1928,7 +2490,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; @@ -1941,7 +2503,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; @@ -1955,7 +2517,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (maybe_compressed_char_at) { uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); __ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker); @@ -2008,10 +2570,9 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimInt: - case Primitive::kPrimNot: { + case Primitive::kPrimInt: { DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t)); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; @@ -2024,8 +2585,53 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { break; } + case Primitive::kPrimNot: { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + // /* HeapReference<Object> */ out = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier call. + codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + index, + temp, + /* needs_null_check */ true); + } else { + Register out = out_loc.AsRegister<Register>(); + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ LoadFromOffset(kLoadWord, out, obj, offset, null_checker); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); + } else { + __ Sll(TMP, index.AsRegister<Register>(), TIMES_4); + __ Addu(TMP, obj, TMP); + __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, + out_loc, + out_loc, + obj_loc, + data_offset, + index); + } + } + break; + } + case Primitive::kPrimLong: { - Register out = locations->Out().AsRegisterPairLow<Register>(); + Register out = out_loc.AsRegisterPairLow<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; @@ -2039,7 +2645,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimFloat: { - FRegister out = locations->Out().AsFpuRegister<FRegister>(); + FRegister out = out_loc.AsFpuRegister<FRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; @@ -2053,7 +2659,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimDouble: { - FRegister out = locations->Out().AsFpuRegister<FRegister>(); + FRegister out = out_loc.AsFpuRegister<FRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; @@ -2070,11 +2676,6 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } - - if (type == Primitive::kPrimNot) { - Register out = locations->Out().AsRegister<Register>(); - __ MaybeUnpoisonHeapReference(out); - } } void LocationsBuilderMIPS::VisitArrayLength(HArrayLength* instruction) { @@ -2116,23 +2717,28 @@ Location LocationsBuilderMIPS::FpuRegisterOrConstantForStore(HInstruction* instr } void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) { - bool needs_runtime_call = instruction->NeedsTypeCheck(); + Primitive::Type value_type = instruction->GetComponentType(); + + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall); - if (needs_runtime_call) { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + may_need_runtime_call_for_type_check ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { + locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2))); } else { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { - locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2))); - } else { - locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2))); - } + locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2))); + } + if (needs_write_barrier) { + // Temporary register for the write barrier. + locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. } } @@ -2142,7 +2748,7 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { Location index = locations->InAt(1); Location value_location = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); - bool needs_runtime_call = locations->WillCall(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); auto null_checker = GetImplicitNullChecker(instruction, codegen_); @@ -2186,9 +2792,27 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimInt: + case Primitive::kPrimInt: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + if (index.IsConstant()) { + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; + } else { + __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4); + __ Addu(base_reg, obj, base_reg); + } + if (value_location.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); + } else { + Register value = value_location.AsRegister<Register>(); + __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); + } + break; + } + case Primitive::kPrimNot: { - if (!needs_runtime_call) { + if (value_location.IsConstant()) { + // Just setting null. uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; @@ -2196,48 +2820,110 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4); __ Addu(base_reg, obj, base_reg); } - if (value_location.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); - DCHECK(!needs_write_barrier); - } else { - Register value = value_location.AsRegister<Register>(); - if (kPoisonHeapReferences && needs_write_barrier) { - // Note that in the case where `value` is a null reference, - // we do not enter this block, as a null reference does not - // need poisoning. - DCHECK_EQ(value_type, Primitive::kPrimNot); - // Use Sw() instead of StoreToOffset() in order to be able to - // hold the poisoned reference in AT and thus avoid allocating - // yet another temporary register. - if (index.IsConstant()) { - if (!IsInt<16>(static_cast<int32_t>(data_offset))) { - int16_t low = Low16Bits(data_offset); - uint32_t high = data_offset - low; - __ Addiu32(TMP, obj, high); - base_reg = TMP; - data_offset = low; - } - } else { - DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))); - } - __ PoisonHeapReference(AT, value); - __ Sw(AT, base_reg, data_offset); - null_checker(); + int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); + DCHECK_EQ(value, 0); + __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); + DCHECK(!needs_write_barrier); + DCHECK(!may_need_runtime_call_for_type_check); + break; + } + + DCHECK(needs_write_barrier); + Register value = value_location.AsRegister<Register>(); + Register temp1 = locations->GetTemp(0).AsRegister<Register>(); + Register temp2 = TMP; // Doesn't need to survive slow path. + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + MipsLabel done; + SlowPathCodeMIPS* slow_path = nullptr; + + if (may_need_runtime_call_for_type_check) { + slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathMIPS(instruction); + codegen_->AddSlowPath(slow_path); + if (instruction->GetValueCanBeNull()) { + MipsLabel non_zero; + __ Bnez(value, &non_zero); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + if (index.IsConstant()) { + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; } else { - __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); - } - if (needs_write_barrier) { - DCHECK_EQ(value_type, Primitive::kPrimNot); - codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull()); + __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4); + __ Addu(base_reg, obj, base_reg); } + __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); + __ B(&done); + __ Bind(&non_zero); } + + // Note that when read barriers are enabled, the type checks + // are performed without read barriers. This is fine, even in + // the case where a class object is in the from-space after + // the flip, as a comparison involving such a type would not + // produce a false positive; it may of course produce a false + // negative, in which case we would take the ArraySet slow + // path. + + // /* HeapReference<Class> */ temp1 = obj->klass_ + __ LoadFromOffset(kLoadWord, temp1, obj, class_offset, null_checker); + __ MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); + // /* HeapReference<Class> */ temp2 = value->klass_ + __ LoadFromOffset(kLoadWord, temp2, value, class_offset); + // If heap poisoning is enabled, no need to unpoison `temp1` + // nor `temp2`, as we are comparing two poisoned references. + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + MipsLabel do_put; + __ Beq(temp1, temp2, &do_put); + // If heap poisoning is enabled, the `temp1` reference has + // not been unpoisoned yet; unpoison it now. + __ MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); + // If heap poisoning is enabled, no need to unpoison + // `temp1`, as we are comparing against null below. + __ Bnez(temp1, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ Bne(temp1, temp2, slow_path->GetEntryLabel()); + } + } + + Register source = value; + if (kPoisonHeapReferences) { + // Note that in the case where `value` is a null reference, + // we do not enter this block, as a null reference does not + // need poisoning. + __ Move(temp1, value); + __ PoisonHeapReference(temp1); + source = temp1; + } + + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + if (index.IsConstant()) { + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; } else { - DCHECK_EQ(value_type, Primitive::kPrimNot); - // Note: if heap poisoning is enabled, pAputObject takes care - // of poisoning the reference. - codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); + __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4); + __ Addu(base_reg, obj, base_reg); + } + __ StoreToOffset(kStoreWord, source, base_reg, data_offset); + + if (!may_need_runtime_call_for_type_check) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull()); + + if (done.IsLinked()) { + __ Bind(&done); + } + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); } break; } @@ -2327,6 +3013,23 @@ void InstructionCodeGeneratorMIPS::VisitBoundsCheck(HBoundsCheck* instruction) { __ Bgeu(index, length, slow_path->GetEntryLabel()); } +// Temp is used for read barrier. +static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { + if (kEmitCompilerReadBarrier && + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + return 1; + } + return 0; +} + +// Extra temp is used for read barrier. +static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { + return 1 + NumberOfInstanceOfTemps(type_check_kind); +} + void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); @@ -2337,7 +3040,7 @@ void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; @@ -2351,15 +3054,20 @@ void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); + locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); - Register temp = locations->GetTemp(0).AsRegister<Register>(); + Location temp_loc = locations->GetTemp(0); + Register temp = temp_loc.AsRegister<Register>(); + const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); + DCHECK_LE(num_temps, 2u); + Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation(); const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -2396,8 +3104,12 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Jump to slow path for throwing the exception or doing a // more involved array check. __ Bne(temp, cls, slow_path->GetEntryLabel()); @@ -2406,15 +3118,22 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kAbstractClassCheck: { // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. MipsLabel loop; __ Bind(&loop); // /* HeapReference<Class> */ temp = temp->super_class_ - __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + super_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // If the class reference currently in `temp` is null, jump to the slow path to throw the // exception. __ Beqz(temp, slow_path->GetEntryLabel()); @@ -2425,15 +3144,22 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kClassHierarchyCheck: { // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Walk over the class hierarchy to find a match. MipsLabel loop; __ Bind(&loop); __ Beq(temp, cls, &done); // /* HeapReference<Class> */ temp = temp->super_class_ - __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + super_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // If the class reference currently in `temp` is null, jump to the slow path to throw the // exception. Otherwise, jump to the beginning of the loop. __ Bnez(temp, &loop); @@ -2443,14 +3169,21 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kArrayObjectCheck: { // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Do an exact check. __ Beq(temp, cls, &done); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ - __ LoadFromOffset(kLoadWord, temp, temp, component_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + component_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // If the component type is null, jump to the slow path to throw the exception. __ Beqz(temp, slow_path->GetEntryLabel()); // Otherwise, the object is indeed an array, further check that this component @@ -2477,11 +3210,19 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // Avoid read barriers to improve performance of the fast path. We can not get false // positives by doing this. // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // /* HeapReference<Class> */ temp = temp->iftable_ - __ LoadFromOffset(kLoadWord, temp, temp, iftable_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + temp_loc, + iftable_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Iftable is never null. __ Lw(TMP, temp, array_length_offset); // Loop through the iftable and check if any class matches. @@ -5032,8 +5773,15 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field Primitive::Type field_type = field_info.GetFieldType(); bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble); bool generate_volatile = field_info.IsVolatile() && is_wide; + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (field_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall); + instruction, + generate_volatile + ? LocationSummary::kCallOnMainOnly + : (object_field_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall)); locations->SetInAt(0, Location::RequiresRegister()); if (generate_volatile) { @@ -5054,7 +5802,18 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in the case of an object field get with + // read barriers enabled: we do not want the move to overwrite the + // object's location, as we need it to emit the read barrier. + locations->SetOut(Location::RequiresRegister(), + object_field_get_with_read_barrier + ? Location::kOutputOverlap + : Location::kNoOutputOverlap); + } + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); } } } @@ -5064,7 +5823,9 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, uint32_t dex_pc) { Primitive::Type type = field_info.GetFieldType(); LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); + Location dst_loc = locations->Out(); LoadOperandType load_type = kLoadUnsignedByte; bool is_volatile = field_info.IsVolatile(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); @@ -5107,40 +5868,61 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>(); if (type == Primitive::kPrimDouble) { // FP results are returned in core registers. Need to move them. - Location out = locations->Out(); - if (out.IsFpuRegister()) { - __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), out.AsFpuRegister<FRegister>()); + if (dst_loc.IsFpuRegister()) { + __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), dst_loc.AsFpuRegister<FRegister>()); __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(), - out.AsFpuRegister<FRegister>()); + dst_loc.AsFpuRegister<FRegister>()); } else { - DCHECK(out.IsDoubleStackSlot()); + DCHECK(dst_loc.IsDoubleStackSlot()); __ StoreToOffset(kStoreWord, locations->GetTemp(1).AsRegister<Register>(), SP, - out.GetStackIndex()); + dst_loc.GetStackIndex()); __ StoreToOffset(kStoreWord, locations->GetTemp(2).AsRegister<Register>(), SP, - out.GetStackIndex() + 4); + dst_loc.GetStackIndex() + 4); } } } else { - if (!Primitive::IsFloatingPointType(type)) { + if (type == Primitive::kPrimNot) { + // /* HeapReference<Object> */ dst = *(obj + offset) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp_loc = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier call. + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + dst_loc, + obj, + offset, + temp_loc, + /* needs_null_check */ true); + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + } else { + __ LoadFromOffset(kLoadWord, dst_loc.AsRegister<Register>(), obj, offset, null_checker); + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset); + } + } else if (!Primitive::IsFloatingPointType(type)) { Register dst; if (type == Primitive::kPrimLong) { - DCHECK(locations->Out().IsRegisterPair()); - dst = locations->Out().AsRegisterPairLow<Register>(); + DCHECK(dst_loc.IsRegisterPair()); + dst = dst_loc.AsRegisterPairLow<Register>(); } else { - DCHECK(locations->Out().IsRegister()); - dst = locations->Out().AsRegister<Register>(); + DCHECK(dst_loc.IsRegister()); + dst = dst_loc.AsRegister<Register>(); } __ LoadFromOffset(load_type, dst, obj, offset, null_checker); - if (type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(dst); - } } else { - DCHECK(locations->Out().IsFpuRegister()); - FRegister dst = locations->Out().AsFpuRegister<FRegister>(); + DCHECK(dst_loc.IsFpuRegister()); + FRegister dst = dst_loc.AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { __ LoadSFromOffset(dst, obj, offset, null_checker); } else { @@ -5149,7 +5931,9 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, } } - if (is_volatile) { + // Memory barriers, in the case of references, are handled in the + // previous switch statement. + if (is_volatile && (type != Primitive::kPrimNot)) { GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } } @@ -5290,7 +6074,6 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, } } - // TODO: memory barriers? if (needs_write_barrier) { Register src = value_location.AsRegister<Register>(); codegen_->MarkGCCard(obj, src, value_can_be_null); @@ -5320,14 +6103,133 @@ void InstructionCodeGeneratorMIPS::VisitInstanceFieldSet(HInstanceFieldSet* inst instruction->GetValueCanBeNull()); } -void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad( - HInstruction* instruction ATTRIBUTE_UNUSED, - Location root, - Register obj, - uint32_t offset) { +void InstructionCodeGeneratorMIPS::GenerateReferenceLoadOneRegister( + HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option) { + Register out_reg = out.AsRegister<Register>(); + if (read_barrier_option == kWithReadBarrier) { + CHECK(kEmitCompilerReadBarrier); + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + out_reg, + offset, + maybe_temp, + /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // Save the value of `out` into `maybe_temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + __ Move(maybe_temp.AsRegister<Register>(), out_reg); + // /* HeapReference<Object> */ out = *(out + offset) + __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters( + HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option) { + Register out_reg = out.AsRegister<Register>(); + Register obj_reg = obj.AsRegister<Register>(); + if (read_barrier_option == kWithReadBarrier) { + CHECK(kEmitCompilerReadBarrier); + if (kUseBakerReadBarrier) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + obj_reg, + offset, + maybe_temp, + /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + Register obj, + uint32_t offset, + ReadBarrierOption read_barrier_option) { Register root_reg = root.AsRegister<Register>(); - if (kEmitCompilerReadBarrier) { - UNIMPLEMENTED(FATAL) << "for read barrier"; + if (read_barrier_option == kWithReadBarrier) { + DCHECK(kEmitCompilerReadBarrier); + if (kUseBakerReadBarrier) { + // Fast path implementation of art::ReadBarrier::BarrierForRoot when + // Baker's read barrier are used: + // + // root = obj.field; + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp != null) { + // root = temp(root) + // } + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path marking the GC root `root`. + Location temp = Location::RegisterLocation(T9); + SlowPathCodeMIPS* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS( + instruction, + root, + /*entrypoint*/ temp); + codegen_->AddSlowPath(slow_path); + + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ Bnez(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } else { + // GC root loaded through a slow path for read barriers other + // than Baker's. + // /* GcRoot<mirror::Object>* */ root = obj + offset + __ Addiu32(root_reg, obj, offset); + // /* mirror::Object* */ root = root->Read() + codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + } } else { // Plain GC root load with no read barrier. // /* GcRoot<mirror::Object> */ root = *(obj + offset) @@ -5337,6 +6239,226 @@ void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad( } } +void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = *(obj + offset) + Location no_index = Location::NoLocation(); + ScaleFactor no_scale_factor = TIMES_1; + GenerateReferenceLoadWithBakerReadBarrier(instruction, + ref, + obj, + offset, + no_index, + no_scale_factor, + temp, + needs_null_check); +} + +void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + // /* HeapReference<Object> */ ref = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + ScaleFactor scale_factor = TIMES_4; + GenerateReferenceLoadWithBakerReadBarrier(instruction, + ref, + obj, + data_offset, + index, + scale_factor, + temp, + needs_null_check); +} + +void CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location index, + ScaleFactor scale_factor, + Location temp, + bool needs_null_check, + bool always_update_field) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // In slow path based read barriers, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. + // } + // + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as it performs additional checks that we do + // not do here for performance reasons. + + Register ref_reg = ref.AsRegister<Register>(); + Register temp_reg = temp.AsRegister<Register>(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + // /* int32_t */ monitor = obj->monitor_ + __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + __ Sync(0); // Barrier to prevent load-load reordering. + + // The actual reference load. + if (index.IsValid()) { + // Load types involving an "index": ArrayGet, + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics. + // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor)) + if (index.IsConstant()) { + size_t computed_offset = + (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset; + __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset); + } else { + // Handle the special case of the + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics, which use a register pair as index ("long + // offset"), of which only the low part contains data. + Register index_reg = index.IsRegisterPair() + ? index.AsRegisterPairLow<Register>() + : index.AsRegister<Register>(); + __ Sll(TMP, index_reg, scale_factor); + __ Addu(TMP, obj, TMP); + __ LoadFromOffset(kLoadWord, ref_reg, TMP, offset); + } + } else { + // /* HeapReference<Object> */ ref = *(obj + offset) + __ LoadFromOffset(kLoadWord, ref_reg, obj, offset); + } + + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_reg); + + // Slow path marking the object `ref` when it is gray. + SlowPathCodeMIPS* slow_path; + if (always_update_field) { + // ReadBarrierMarkAndUpdateFieldSlowPathMIPS only supports address + // of the form `obj + field_offset`, where `obj` is a register and + // `field_offset` is a register pair (of which only the lower half + // is used). Thus `offset` and `scale_factor` above are expected + // to be null in this code path. + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); + slow_path = new (GetGraph()->GetArena()) + ReadBarrierMarkAndUpdateFieldSlowPathMIPS(instruction, + ref, + obj, + /* field_offset */ index, + temp_reg); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS(instruction, ref); + } + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::GrayState()) + // ref = ReadBarrier::Mark(ref); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit into the sign bit (31) and + // performing a branch on less than zero. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size"); + __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift); + __ Bltz(temp_reg, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorMIPS::GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // Insert a slow path based read barrier *after* the reference load. + // + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathMIPS(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorMIPS::MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // Baker's read barriers shall be handled by the fast path + // (CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier). + DCHECK(!kUseBakerReadBarrier); + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(out.AsRegister<Register>()); + } +} + +void CodeGeneratorMIPS::GenerateReadBarrierForRootSlow(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Insert a slow path based read barrier *after* the GC root load. + // + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCodeMIPS* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathMIPS(instruction, out, root); + AddSlowPath(slow_path); + + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); @@ -5345,7 +6467,8 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -5360,14 +6483,20 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { // The output does overlap inputs. // Note that TypeCheckSlowPathMIPS uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); } void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); + const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); + DCHECK_LE(num_temps, 1u); + Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -5385,8 +6514,12 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // Classes must be equal for the instanceof to succeed. __ Xor(out, out, cls); __ Sltiu(out, out, 1); @@ -5395,15 +6528,22 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kAbstractClassCheck: { // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. MipsLabel loop; __ Bind(&loop); // /* HeapReference<Class> */ out = out->super_class_ - __ LoadFromOffset(kLoadWord, out, out, super_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadOneRegister(instruction, + out_loc, + super_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. __ Beqz(out, &done); __ Bne(out, cls, &loop); @@ -5413,15 +6553,22 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kClassHierarchyCheck: { // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // Walk over the class hierarchy to find a match. MipsLabel loop, success; __ Bind(&loop); __ Beq(out, cls, &success); // /* HeapReference<Class> */ out = out->super_class_ - __ LoadFromOffset(kLoadWord, out, out, super_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadOneRegister(instruction, + out_loc, + super_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); __ Bnez(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ B(&done); @@ -5432,15 +6579,22 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: { // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // Do an exact check. MipsLabel success; __ Beq(out, cls, &success); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ - __ LoadFromOffset(kLoadWord, out, out, component_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadOneRegister(instruction, + out_loc, + component_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. __ Beqz(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); @@ -5455,8 +6609,12 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayCheck: { // No read barrier since the slow path will retry upon failure. // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction, /* is_fatal */ false); @@ -5627,9 +6785,6 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { - if (kEmitCompilerReadBarrier) { - UNIMPLEMENTED(FATAL) << "for read barrier"; - } // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization // is incompatible with it. // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods @@ -5665,9 +6820,6 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { - if (kEmitCompilerReadBarrier) { - UNIMPLEMENTED(FATAL) << "for read barrier"; - } // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization // is incompatible with it. bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); @@ -5916,12 +7068,13 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( cls, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(V0)); + calling_convention.GetReturnLocation(Primitive::kPrimNot)); return; } DCHECK(!cls->NeedsAccessCheck()); - LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); @@ -5976,6 +7129,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF break; } + const ReadBarrierOption read_barrier_option = cls->IsInBootImage() + ? kWithoutReadBarrier + : kCompilerReadBarrierOption; bool generate_null_check = false; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { @@ -5985,11 +7141,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF GenerateGcRootFieldLoad(cls, out_loc, base_or_current_method_reg, - ArtMethod::DeclaringClassOffset().Int32Value()); + ArtMethod::DeclaringClassOffset().Int32Value(), + read_barrier_option); break; } case HLoadClass::LoadKind::kBootImageLinkTimeAddress: DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); __ LoadLiteral(out, base_or_current_method_reg, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), @@ -5997,6 +7155,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); bool reordering = __ SetReorder(false); @@ -6006,7 +7165,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF break; } case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK(!kEmitCompilerReadBarrier); + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(cls->GetClass().Get())); DCHECK_NE(address, 0u); @@ -6020,7 +7179,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); bool reordering = __ SetReorder(false); codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); - GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678); + GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option); __ SetReorder(reordering); generate_null_check = true; break; @@ -6032,7 +7191,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF bool reordering = __ SetReorder(false); __ Bind(&info->high_label); __ Lui(out, /* placeholder */ 0x1234); - GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678); + GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option); __ SetReorder(reordering); break; } @@ -6165,7 +7324,11 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); bool reordering = __ SetReorder(false); codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); - GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678); + GenerateGcRootFieldLoad(load, + out_loc, + out, + /* placeholder */ 0x5678, + kCompilerReadBarrierOption); __ SetReorder(reordering); SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); codegen_->AddSlowPath(slow_path); @@ -6181,7 +7344,11 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ bool reordering = __ SetReorder(false); __ Bind(&info->high_label); __ Lui(out, /* placeholder */ 0x1234); - GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678); + GenerateGcRootFieldLoad(load, + out_loc, + out, + /* placeholder */ 0x5678, + kCompilerReadBarrierOption); __ SetReorder(reordering); return; } diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 98fee24a74..3875c4bdba 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -241,6 +241,38 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { uint32_t dex_pc, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); + + // Generate a heap reference load using one register `out`: + // + // out <- *(out + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a read barrier and + // shall be a register in that case; it may be an invalid location + // otherwise. + void GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option); + // Generate a heap reference load using two different registers + // `out` and `obj`: + // + // out <- *(obj + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a Baker's (fast + // path) read barrier and shall be a register in that case; it may + // be an invalid location otherwise. + void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option); + // Generate a GC root reference load: // // root <- *(obj + offset) @@ -249,7 +281,9 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, Register obj, - uint32_t offset); + uint32_t offset, + ReadBarrierOption read_barrier_option); + void GenerateIntCompare(IfCondition cond, LocationSummary* locations); // When the function returns `false` it means that the condition holds if `dst` is non-zero // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero @@ -353,6 +387,91 @@ class CodeGeneratorMIPS : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location temp, + bool needs_null_check); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference array load when Baker's read barriers are used. + void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check); + + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // + // Load the object reference located at the address + // `obj + offset + (index << scale_factor)`, held by object `obj`, into + // `ref`, and mark it if needed. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location index, + ScaleFactor scale_factor, + Location temp, + bool needs_null_check, + bool always_update_field = false); + + // Generate a read barrier for a heap reference within `instruction` + // using a slow path. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction` using + // a slow path. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); + void MarkGCCard(Register object, Register value, bool value_can_be_null); // Register allocation. @@ -400,6 +519,15 @@ class CodeGeneratorMIPS : public CodeGenerator { uint32_t dex_pc, SlowPathCode* slow_path = nullptr) OVERRIDE; + // Generate code to invoke a runtime entry point, but do not record + // PC-related information in a stack map. + void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path, + bool direct); + + void GenerateInvokeRuntime(int32_t entry_point_offset, bool direct); + ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; } bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index c82533bc7d..78b31e9e86 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -407,6 +407,528 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64); }; +class ArraySetSlowPathMIPS64 : public SlowPathCodeMIPS64 { + public: + explicit ArraySetSlowPathMIPS64(HInstruction* instruction) : SlowPathCodeMIPS64(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove( + locations->InAt(0), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove( + locations->InAt(1), + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimInt, + nullptr); + parallel_move.AddMove( + locations->InAt(2), + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimNot, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + + CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); + mips64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); + RestoreLiveRegisters(codegen, locations); + __ Bc(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS64); +}; + +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). +// +// If `entrypoint` is a valid location it is assumed to already be +// holding the entrypoint. The case where the entrypoint is passed in +// is for the GcRoot read barrier. +class ReadBarrierMarkSlowPathMIPS64 : public SlowPathCodeMIPS64 { + public: + ReadBarrierMarkSlowPathMIPS64(HInstruction* instruction, + Location ref, + Location entrypoint = Location::NoLocation()) + : SlowPathCodeMIPS64(instruction), ref_(ref), entrypoint_(entrypoint) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + GpuRegister ref_reg = ref_.AsRegister<GpuRegister>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsArraySet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || + (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); + DCHECK((V0 <= ref_reg && ref_reg <= T2) || + (S2 <= ref_reg && ref_reg <= S7) || + (ref_reg == S8)) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in A0 and V0 respectively): + // + // A0 <- ref + // V0 <- ReadBarrierMark(A0) + // ref <- V0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + if (entrypoint_.IsValid()) { + mips64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); + DCHECK_EQ(entrypoint_.AsRegister<GpuRegister>(), T9); + __ Jalr(entrypoint_.AsRegister<GpuRegister>()); + __ Nop(); + } else { + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1); + // This runtime call does not require a stack map. + mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, + instruction_, + this); + } + __ Bc(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + + // The location of the entrypoint if already loaded. + const Location entrypoint_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathMIPS64); +}; + +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathMIPS64 above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 : public SlowPathCodeMIPS64 { + public: + ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(HInstruction* instruction, + Location ref, + GpuRegister obj, + Location field_offset, + GpuRegister temp1) + : SlowPathCodeMIPS64(instruction), + ref_(ref), + obj_(obj), + field_offset_(field_offset), + temp1_(temp1) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS64"; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + GpuRegister ref_reg = ref_.AsRegister<GpuRegister>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + DCHECK(field_offset_.IsRegister()) << field_offset_; + + __ Bind(GetEntryLabel()); + + // Save the old reference. + // Note that we cannot use AT or TMP to save the old reference, as those + // are used by the code that follows, but we need the old reference after + // the call to the ReadBarrierMarkRegX entry point. + DCHECK_NE(temp1_, AT); + DCHECK_NE(temp1_, TMP); + __ Move(temp1_, ref_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); + DCHECK((V0 <= ref_reg && ref_reg <= T2) || + (S2 <= ref_reg && ref_reg <= S7) || + (ref_reg == S8)) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in A0 and V0 respectively): + // + // A0 <- ref + // V0 <- ReadBarrierMark(A0) + // ref <- V0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1); + // This runtime call does not require a stack map. + mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, + instruction_, + this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*(obj_ + field_offset_)`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // the compare-and-set (CAS) loop below would abort, leaving the + // field as-is. + Mips64Label done; + __ Beqc(temp1_, ref_reg, &done); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achieved + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + + // Convenience aliases. + GpuRegister base = obj_; + GpuRegister offset = field_offset_.AsRegister<GpuRegister>(); + GpuRegister expected = temp1_; + GpuRegister value = ref_reg; + GpuRegister tmp_ptr = TMP; // Pointer to actual memory. + GpuRegister tmp = AT; // Value in memory. + + __ Daddu(tmp_ptr, base, offset); + + if (kPoisonHeapReferences) { + __ PoisonHeapReference(expected); + // Do not poison `value` if it is the same register as + // `expected`, which has just been poisoned. + if (value != expected) { + __ PoisonHeapReference(value); + } + } + + // do { + // tmp = [r_ptr] - expected; + // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); + + Mips64Label loop_head, exit_loop; + __ Bind(&loop_head); + __ Ll(tmp, tmp_ptr); + // The LL instruction sign-extends the 32-bit value, but + // 32-bit references must be zero-extended. Zero-extend `tmp`. + __ Dext(tmp, tmp, 0, 32); + __ Bnec(tmp, expected, &exit_loop); + __ Move(tmp, value); + __ Sc(tmp, tmp_ptr); + __ Beqzc(tmp, &loop_head); + __ Bind(&exit_loop); + + if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(expected); + // Do not unpoison `value` if it is the same register as + // `expected`, which has just been unpoisoned. + if (value != expected) { + __ UnpoisonHeapReference(value); + } + } + + __ Bind(&done); + __ Bc(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const GpuRegister obj_; + // The location of the offset of the marked reference field within `obj_`. + Location field_offset_; + + const GpuRegister temp1_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathMIPS64); +}; + +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathMIPS64 : public SlowPathCodeMIPS64 { + public: + ReadBarrierForHeapReferenceSlowPathMIPS64(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : SlowPathCodeMIPS64(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial object + // has been overwritten by (or after) the heap object reference load + // to be instrumented, e.g.: + // + // __ LoadFromOffset(kLoadWord, out, out, offset); + // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + Primitive::Type type = Primitive::kPrimNot; + GpuRegister reg_out = out_.AsRegister<GpuRegister>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier for heap reference slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. + if (instruction_->IsArrayGet()) { + // Compute the actual memory offset and store it in `index`. + GpuRegister index_reg = index_.AsRegister<GpuRegister>(); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); + if (codegen->IsCoreCalleeSaveRegister(index_reg)) { + // We are about to change the value of `index_reg` (see the + // calls to art::mips64::Mips64Assembler::Sll and + // art::mips64::MipsAssembler::Addiu32 below), but it has + // not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + GpuRegister free_reg = FindAvailableCallerSaveRegister(codegen); + __ Move(free_reg, index_reg); + index_reg = free_reg; + index = Location::RegisterLocation(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the scale + // factor (2) cannot overflow in practice, as the runtime is + // unable to allocate object arrays with a size larger than + // 2^26 - 1 (that is, 2^28 - 4 bytes). + __ Sll(index_reg, index_reg, TIMES_4); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ Addiu32(index_reg, index_reg, offset_); + } else { + // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile + // intrinsics, `index_` is not shifted by a scale factor of 2 + // (as in the case of ArrayGet), as it is actually an offset + // to an object field within an object. + DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegister()); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove(obj_, + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + __ LoadConst32(calling_convention.GetRegisterAt(2), offset_); + } + mips64_codegen->InvokeRuntime(kQuickReadBarrierSlow, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + mips64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + __ Bc(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierForHeapReferenceSlowPathMIPS64"; + } + + private: + GpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(ref_.AsRegister<GpuRegister>()); + size_t obj = static_cast<int>(obj_.AsRegister<GpuRegister>()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && + i != obj && + !codegen->IsCoreCalleeSaveRegister(i) && + !codegen->IsBlockedCoreRegister(i)) { + return static_cast<GpuRegister>(i); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on MIPS64 + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free caller-save register"; + UNREACHABLE(); + } + + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathMIPS64); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 { + public: + ReadBarrierForRootSlowPathMIPS64(HInstruction* instruction, Location out, Location root) + : SlowPathCodeMIPS64(instruction), out_(out), root_(root) { + DCHECK(kEmitCompilerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Primitive::Type type = Primitive::kPrimNot; + GpuRegister reg_out = out_.AsRegister<GpuRegister>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) + << "Unexpected instruction in read barrier for GC root slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); + mips64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + root_, + Primitive::kPrimNot); + mips64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + mips64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + __ Bc(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS64"; } + + private: + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathMIPS64); +}; + CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, const Mips64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, @@ -1140,23 +1662,32 @@ void CodeGeneratorMIPS64::InvokeRuntime(QuickEntrypointEnum entrypoint, uint32_t dex_pc, SlowPathCode* slow_path) { ValidateInvokeRuntime(entrypoint, instruction, slow_path); - __ LoadFromOffset(kLoadDoubleword, - T9, - TR, - GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value()); - __ Jalr(T9); - __ Nop(); + GenerateInvokeRuntime(GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value()); if (EntrypointRequiresStackMap(entrypoint)) { RecordPcInfo(instruction, dex_pc, slow_path); } } +void CodeGeneratorMIPS64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path) { + ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); + GenerateInvokeRuntime(entry_point_offset); +} + +void CodeGeneratorMIPS64::GenerateInvokeRuntime(int32_t entry_point_offset) { + __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); + __ Jalr(T9); + __ Nop(); +} + void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg) { __ LoadFromOffset(kLoadWord, TMP, class_reg, mirror::Class::StatusOffset().Int32Value()); __ LoadConst32(AT, mirror::Class::kStatusInitialized); __ Bltc(TMP, AT, slow_path->GetEntryLabel()); - // TODO: barrier needed? + // Even if the initialized flag is set, we need to ensure consistent memory ordering. + __ Sync(0); __ Bind(slow_path->GetExitLabel()); } @@ -1447,14 +1978,31 @@ void InstructionCodeGeneratorMIPS64::VisitAnd(HAnd* instruction) { } void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) { + Primitive::Type type = instruction->GetType(); + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (type == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->GetType())) { + if (Primitive::IsFloatingPointType(type)) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in the case of an object array get with + // read barriers enabled: we do not want the move to overwrite the + // array's location, as we need it to emit the read barrier. + locations->SetOut(Location::RequiresRegister(), + object_array_get_with_read_barrier + ? Location::kOutputOverlap + : Location::kNoOutputOverlap); + } + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier. + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->AddTemp(Location::RequiresRegister()); } } @@ -1467,7 +2015,9 @@ static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS6 void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); + Location obj_loc = locations->InAt(0); + GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); + Location out_loc = locations->Out(); Location index = locations->InAt(1); uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); auto null_checker = GetImplicitNullChecker(instruction, codegen_); @@ -1477,7 +2027,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { instruction->IsStringCharAt(); switch (type) { case Primitive::kPrimBoolean: { - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; @@ -1490,7 +2040,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimByte: { - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; @@ -1503,7 +2053,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimShort: { - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; @@ -1517,7 +2067,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimChar: { - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); if (maybe_compressed_char_at) { uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); __ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker); @@ -1570,10 +2120,9 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimInt: - case Primitive::kPrimNot: { + case Primitive::kPrimInt: { DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t)); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); LoadOperandType load_type = (type == Primitive::kPrimNot) ? kLoadUnsignedWord : kLoadWord; if (index.IsConstant()) { size_t offset = @@ -1587,8 +2136,53 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { break; } + case Primitive::kPrimNot: { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + // /* HeapReference<Object> */ out = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier call. + codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + index, + temp, + /* needs_null_check */ true); + } else { + GpuRegister out = out_loc.AsRegister<GpuRegister>(); + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ LoadFromOffset(kLoadUnsignedWord, out, obj, offset, null_checker); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); + } else { + __ Sll(TMP, index.AsRegister<GpuRegister>(), TIMES_4); + __ Addu(TMP, obj, TMP); + __ LoadFromOffset(kLoadUnsignedWord, out, TMP, data_offset, null_checker); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, + out_loc, + out_loc, + obj_loc, + data_offset, + index); + } + } + break; + } + case Primitive::kPrimLong: { - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; @@ -1602,7 +2196,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimFloat: { - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + FpuRegister out = out_loc.AsFpuRegister<FpuRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; @@ -1616,7 +2210,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimDouble: { - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + FpuRegister out = out_loc.AsFpuRegister<FpuRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; @@ -1633,11 +2227,6 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } - - if (type == Primitive::kPrimNot) { - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - __ MaybeUnpoisonHeapReference(out); - } } void LocationsBuilderMIPS64::VisitArrayLength(HArrayLength* instruction) { @@ -1679,23 +2268,28 @@ Location LocationsBuilderMIPS64::FpuRegisterOrConstantForStore(HInstruction* ins } void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) { - bool needs_runtime_call = instruction->NeedsTypeCheck(); + Primitive::Type value_type = instruction->GetComponentType(); + + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall); - if (needs_runtime_call) { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + may_need_runtime_call_for_type_check ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { + locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2))); } else { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { - locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2))); - } else { - locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2))); - } + locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2))); + } + if (needs_write_barrier) { + // Temporary register for the write barrier. + locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. } } @@ -1705,7 +2299,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { Location index = locations->InAt(1); Location value_location = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); - bool needs_runtime_call = locations->WillCall(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); auto null_checker = GetImplicitNullChecker(instruction, codegen_); @@ -1749,68 +2343,138 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimInt: + case Primitive::kPrimInt: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + if (index.IsConstant()) { + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; + } else { + __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4); + __ Daddu(base_reg, obj, base_reg); + } + if (value_location.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); + } else { + GpuRegister value = value_location.AsRegister<GpuRegister>(); + __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); + } + break; + } + case Primitive::kPrimNot: { - if (!needs_runtime_call) { + if (value_location.IsConstant()) { + // Just setting null. uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; } else { - DCHECK(index.IsRegister()) << index; __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4); __ Daddu(base_reg, obj, base_reg); } - if (value_location.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); - DCHECK(!needs_write_barrier); - } else { - GpuRegister value = value_location.AsRegister<GpuRegister>(); - if (kPoisonHeapReferences && needs_write_barrier) { - // Note that in the case where `value` is a null reference, - // we do not enter this block, as a null reference does not - // need poisoning. - DCHECK_EQ(value_type, Primitive::kPrimNot); - // Use Sw() instead of StoreToOffset() in order to be able to - // hold the poisoned reference in AT and thus avoid allocating - // yet another temporary register. - if (index.IsConstant()) { - if (!IsInt<16>(static_cast<int32_t>(data_offset))) { - int16_t low16 = Low16Bits(data_offset); - // For consistency with StoreToOffset() and such treat data_offset as int32_t. - uint64_t high48 = static_cast<uint64_t>(static_cast<int32_t>(data_offset)) - low16; - int16_t upper16 = High16Bits(high48); - // Allow the full [-2GB,+2GB) range in case `low16` is negative and needs a - // compensatory 64KB added, which may push `high48` above 2GB and require - // the dahi instruction. - int16_t higher16 = High32Bits(high48) + ((upper16 < 0) ? 1 : 0); - __ Daui(TMP, obj, upper16); - if (higher16 != 0) { - __ Dahi(TMP, higher16); - } - base_reg = TMP; - data_offset = low16; - } - } else { - DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))); - } - __ PoisonHeapReference(AT, value); - __ Sw(AT, base_reg, data_offset); - null_checker(); + int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); + DCHECK_EQ(value, 0); + __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); + DCHECK(!needs_write_barrier); + DCHECK(!may_need_runtime_call_for_type_check); + break; + } + + DCHECK(needs_write_barrier); + GpuRegister value = value_location.AsRegister<GpuRegister>(); + GpuRegister temp1 = locations->GetTemp(0).AsRegister<GpuRegister>(); + GpuRegister temp2 = TMP; // Doesn't need to survive slow path. + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + Mips64Label done; + SlowPathCodeMIPS64* slow_path = nullptr; + + if (may_need_runtime_call_for_type_check) { + slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathMIPS64(instruction); + codegen_->AddSlowPath(slow_path); + if (instruction->GetValueCanBeNull()) { + Mips64Label non_zero; + __ Bnezc(value, &non_zero); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + if (index.IsConstant()) { + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; } else { - __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); - } - if (needs_write_barrier) { - DCHECK_EQ(value_type, Primitive::kPrimNot); - codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull()); + __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4); + __ Daddu(base_reg, obj, base_reg); } + __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); + __ Bc(&done); + __ Bind(&non_zero); + } + + // Note that when read barriers are enabled, the type checks + // are performed without read barriers. This is fine, even in + // the case where a class object is in the from-space after + // the flip, as a comparison involving such a type would not + // produce a false positive; it may of course produce a false + // negative, in which case we would take the ArraySet slow + // path. + + // /* HeapReference<Class> */ temp1 = obj->klass_ + __ LoadFromOffset(kLoadUnsignedWord, temp1, obj, class_offset, null_checker); + __ MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ LoadFromOffset(kLoadUnsignedWord, temp1, temp1, component_offset); + // /* HeapReference<Class> */ temp2 = value->klass_ + __ LoadFromOffset(kLoadUnsignedWord, temp2, value, class_offset); + // If heap poisoning is enabled, no need to unpoison `temp1` + // nor `temp2`, as we are comparing two poisoned references. + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + Mips64Label do_put; + __ Beqc(temp1, temp2, &do_put); + // If heap poisoning is enabled, the `temp1` reference has + // not been unpoisoned yet; unpoison it now. + __ MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ LoadFromOffset(kLoadUnsignedWord, temp1, temp1, super_offset); + // If heap poisoning is enabled, no need to unpoison + // `temp1`, as we are comparing against null below. + __ Bnezc(temp1, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ Bnec(temp1, temp2, slow_path->GetEntryLabel()); } + } + + GpuRegister source = value; + if (kPoisonHeapReferences) { + // Note that in the case where `value` is a null reference, + // we do not enter this block, as a null reference does not + // need poisoning. + __ Move(temp1, value); + __ PoisonHeapReference(temp1); + source = temp1; + } + + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + if (index.IsConstant()) { + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; } else { - DCHECK_EQ(value_type, Primitive::kPrimNot); - // Note: if heap poisoning is enabled, pAputObject takes care - // of poisoning the reference. - codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); + __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4); + __ Daddu(base_reg, obj, base_reg); + } + __ StoreToOffset(kStoreWord, source, base_reg, data_offset); + + if (!may_need_runtime_call_for_type_check) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull()); + + if (done.IsLinked()) { + __ Bind(&done); + } + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); } break; } @@ -1900,6 +2564,23 @@ void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) __ Bgeuc(index, length, slow_path->GetEntryLabel()); } +// Temp is used for read barrier. +static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { + if (kEmitCompilerReadBarrier && + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + return 1; + } + return 0; +} + +// Extra temp is used for read barrier. +static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { + return 1 + NumberOfInstanceOfTemps(type_check_kind); +} + void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); @@ -1910,7 +2591,7 @@ void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; @@ -1924,15 +2605,20 @@ void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); + locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); - GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); + Location obj_loc = locations->InAt(0); + GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); - GpuRegister temp = locations->GetTemp(0).AsRegister<GpuRegister>(); + Location temp_loc = locations->GetTemp(0); + GpuRegister temp = temp_loc.AsRegister<GpuRegister>(); + const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); + DCHECK_LE(num_temps, 2u); + Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation(); const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -1969,8 +2655,12 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Jump to slow path for throwing the exception or doing a // more involved array check. __ Bnec(temp, cls, slow_path->GetEntryLabel()); @@ -1979,15 +2669,22 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kAbstractClassCheck: { // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. Mips64Label loop; __ Bind(&loop); // /* HeapReference<Class> */ temp = temp->super_class_ - __ LoadFromOffset(kLoadUnsignedWord, temp, temp, super_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + super_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // If the class reference currently in `temp` is null, jump to the slow path to throw the // exception. __ Beqzc(temp, slow_path->GetEntryLabel()); @@ -1998,15 +2695,22 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kClassHierarchyCheck: { // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Walk over the class hierarchy to find a match. Mips64Label loop; __ Bind(&loop); __ Beqc(temp, cls, &done); // /* HeapReference<Class> */ temp = temp->super_class_ - __ LoadFromOffset(kLoadUnsignedWord, temp, temp, super_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + super_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // If the class reference currently in `temp` is null, jump to the slow path to throw the // exception. Otherwise, jump to the beginning of the loop. __ Bnezc(temp, &loop); @@ -2016,14 +2720,21 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kArrayObjectCheck: { // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Do an exact check. __ Beqc(temp, cls, &done); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ - __ LoadFromOffset(kLoadUnsignedWord, temp, temp, component_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + component_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // If the component type is null, jump to the slow path to throw the exception. __ Beqzc(temp, slow_path->GetEntryLabel()); // Otherwise, the object is indeed an array, further check that this component @@ -2050,11 +2761,19 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { // Avoid read barriers to improve performance of the fast path. We can not get false // positives by doing this. // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // /* HeapReference<Class> */ temp = temp->iftable_ - __ LoadFromOffset(kLoadUnsignedWord, temp, temp, iftable_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + temp_loc, + iftable_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Iftable is never null. __ Lw(TMP, temp, array_length_offset); // Loop through the iftable and check if any class matches. @@ -3270,14 +3989,31 @@ void CodeGeneratorMIPS64::GenerateNop() { } void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction, - const FieldInfo& field_info ATTRIBUTE_UNUSED) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + const FieldInfo& field_info) { + Primitive::Type field_type = field_info.GetFieldType(); + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (field_type == Primitive::kPrimNot); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( + instruction, + object_field_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in the case of an object field get with + // read barriers enabled: we do not want the move to overwrite the + // object's location, as we need it to emit the read barrier. + locations->SetOut(Location::RequiresRegister(), + object_field_get_with_read_barrier + ? Location::kOutputOverlap + : Location::kNoOutputOverlap); + } + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); } } @@ -3285,8 +4021,11 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { Primitive::Type type = field_info.GetFieldType(); LocationSummary* locations = instruction->GetLocations(); - GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); + Location obj_loc = locations->InAt(0); + GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); + Location dst_loc = locations->Out(); LoadOperandType load_type = kLoadUnsignedByte; + bool is_volatile = field_info.IsVolatile(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); auto null_checker = GetImplicitNullChecker(instruction, codegen_); @@ -3319,19 +4058,46 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction, UNREACHABLE(); } if (!Primitive::IsFloatingPointType(type)) { - DCHECK(locations->Out().IsRegister()); - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - __ LoadFromOffset(load_type, dst, obj, offset, null_checker); + DCHECK(dst_loc.IsRegister()); + GpuRegister dst = dst_loc.AsRegister<GpuRegister>(); + if (type == Primitive::kPrimNot) { + // /* HeapReference<Object> */ dst = *(obj + offset) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp_loc = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier call. + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + dst_loc, + obj, + offset, + temp_loc, + /* needs_null_check */ true); + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + } else { + __ LoadFromOffset(kLoadUnsignedWord, dst, obj, offset, null_checker); + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset); + } + } else { + __ LoadFromOffset(load_type, dst, obj, offset, null_checker); + } } else { - DCHECK(locations->Out().IsFpuRegister()); - FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); + DCHECK(dst_loc.IsFpuRegister()); + FpuRegister dst = dst_loc.AsFpuRegister<FpuRegister>(); __ LoadFpuFromOffset(load_type, dst, obj, offset, null_checker); } - // TODO: memory barrier? - if (type == Primitive::kPrimNot) { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - __ MaybeUnpoisonHeapReference(dst); + // Memory barriers, in the case of references, are handled in the + // previous switch statement. + if (is_volatile && (type != Primitive::kPrimNot)) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } } @@ -3355,6 +4121,7 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction, GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); Location value_location = locations->InAt(1); StoreOperandType store_type = kStoreByte; + bool is_volatile = field_info.IsVolatile(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1)); auto null_checker = GetImplicitNullChecker(instruction, codegen_); @@ -3382,6 +4149,10 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction, UNREACHABLE(); } + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + } + if (value_location.IsConstant()) { int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); __ StoreConstToOffset(store_type, value, obj, offset, TMP, null_checker); @@ -3405,12 +4176,16 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction, __ StoreFpuToOffset(store_type, src, obj, offset, null_checker); } } - // TODO: memory barriers? + if (needs_write_barrier) { DCHECK(value_location.IsRegister()); GpuRegister src = value_location.AsRegister<GpuRegister>(); codegen_->MarkGCCard(obj, src, value_can_be_null); } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } } void LocationsBuilderMIPS64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { @@ -3429,14 +4204,134 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceFieldSet(HInstanceFieldSet* in HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); } +void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadOneRegister( + HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option) { + GpuRegister out_reg = out.AsRegister<GpuRegister>(); + if (read_barrier_option == kWithReadBarrier) { + CHECK(kEmitCompilerReadBarrier); + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + out_reg, + offset, + maybe_temp, + /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // Save the value of `out` into `maybe_temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + __ Move(maybe_temp.AsRegister<GpuRegister>(), out_reg); + // /* HeapReference<Object> */ out = *(out + offset) + __ LoadFromOffset(kLoadUnsignedWord, out_reg, out_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + __ LoadFromOffset(kLoadUnsignedWord, out_reg, out_reg, offset); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters( + HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option) { + GpuRegister out_reg = out.AsRegister<GpuRegister>(); + GpuRegister obj_reg = obj.AsRegister<GpuRegister>(); + if (read_barrier_option == kWithReadBarrier) { + CHECK(kEmitCompilerReadBarrier); + if (kUseBakerReadBarrier) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + obj_reg, + offset, + maybe_temp, + /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, out_reg, obj_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, out_reg, obj_reg, offset); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad( - HInstruction* instruction ATTRIBUTE_UNUSED, + HInstruction* instruction, Location root, GpuRegister obj, - uint32_t offset) { + uint32_t offset, + ReadBarrierOption read_barrier_option) { GpuRegister root_reg = root.AsRegister<GpuRegister>(); - if (kEmitCompilerReadBarrier) { - UNIMPLEMENTED(FATAL) << "for read barrier"; + if (read_barrier_option == kWithReadBarrier) { + DCHECK(kEmitCompilerReadBarrier); + if (kUseBakerReadBarrier) { + // Fast path implementation of art::ReadBarrier::BarrierForRoot when + // Baker's read barrier are used: + // + // root = obj.field; + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp != null) { + // root = temp(root) + // } + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path marking the GC root `root`. + Location temp = Location::RegisterLocation(T9); + SlowPathCodeMIPS64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64( + instruction, + root, + /*entrypoint*/ temp); + codegen_->AddSlowPath(slow_path); + + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ Bnezc(temp.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } else { + // GC root loaded through a slow path for read barriers other + // than Baker's. + // /* GcRoot<mirror::Object>* */ root = obj + offset + __ Daddiu64(root_reg, obj, static_cast<int32_t>(offset)); + // /* mirror::Object* */ root = root->Read() + codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + } } else { // Plain GC root load with no read barrier. // /* GcRoot<mirror::Object> */ root = *(obj + offset) @@ -3446,6 +4341,219 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad( } } +void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + GpuRegister obj, + uint32_t offset, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = *(obj + offset) + Location no_index = Location::NoLocation(); + ScaleFactor no_scale_factor = TIMES_1; + GenerateReferenceLoadWithBakerReadBarrier(instruction, + ref, + obj, + offset, + no_index, + no_scale_factor, + temp, + needs_null_check); +} + +void CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + GpuRegister obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + // /* HeapReference<Object> */ ref = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + ScaleFactor scale_factor = TIMES_4; + GenerateReferenceLoadWithBakerReadBarrier(instruction, + ref, + obj, + data_offset, + index, + scale_factor, + temp, + needs_null_check); +} + +void CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + GpuRegister obj, + uint32_t offset, + Location index, + ScaleFactor scale_factor, + Location temp, + bool needs_null_check, + bool always_update_field) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // In slow path based read barriers, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. + // } + // + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as it performs additional checks that we do + // not do here for performance reasons. + + GpuRegister ref_reg = ref.AsRegister<GpuRegister>(); + GpuRegister temp_reg = temp.AsRegister<GpuRegister>(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + // /* int32_t */ monitor = obj->monitor_ + __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + __ Sync(0); // Barrier to prevent load-load reordering. + + // The actual reference load. + if (index.IsValid()) { + // Load types involving an "index": ArrayGet, + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics. + // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor)) + if (index.IsConstant()) { + size_t computed_offset = + (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset; + __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, computed_offset); + } else { + GpuRegister index_reg = index.AsRegister<GpuRegister>(); + __ Dsll(TMP, index_reg, scale_factor); + __ Daddu(TMP, obj, TMP); + __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, offset); + } + } else { + // /* HeapReference<Object> */ ref = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, offset); + } + + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_reg); + + // Slow path marking the object `ref` when it is gray. + SlowPathCodeMIPS64* slow_path; + if (always_update_field) { + // ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 only supports address + // of the form `obj + field_offset`, where `obj` is a register and + // `field_offset` is a register. Thus `offset` and `scale_factor` + // above are expected to be null in this code path. + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); + slow_path = new (GetGraph()->GetArena()) + ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(instruction, + ref, + obj, + /* field_offset */ index, + temp_reg); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64(instruction, ref); + } + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::GrayState()) + // ref = ReadBarrier::Mark(ref); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit into the sign bit (31) and + // performing a branch on less than zero. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size"); + __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift); + __ Bltzc(temp_reg, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorMIPS64::GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // Insert a slow path based read barrier *after* the reference load. + // + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathMIPS64(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + __ Bc(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorMIPS64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // Baker's read barriers shall be handled by the fast path + // (CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier). + DCHECK(!kUseBakerReadBarrier); + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(out.AsRegister<GpuRegister>()); + } +} + +void CodeGeneratorMIPS64::GenerateReadBarrierForRootSlow(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Insert a slow path based read barrier *after* the GC root load. + // + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCodeMIPS64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathMIPS64(instruction, out, root); + AddSlowPath(slow_path); + + __ Bc(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); @@ -3454,7 +4562,8 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -3469,14 +4578,20 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { // The output does overlap inputs. // Note that TypeCheckSlowPathMIPS64 uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); } void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); - GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); + Location obj_loc = locations->InAt(0); + GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + Location out_loc = locations->Out(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); + const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); + DCHECK_LE(num_temps, 1u); + Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -3494,8 +4609,12 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // Classes must be equal for the instanceof to succeed. __ Xor(out, out, cls); __ Sltiu(out, out, 1); @@ -3504,15 +4623,22 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kAbstractClassCheck: { // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. Mips64Label loop; __ Bind(&loop); // /* HeapReference<Class> */ out = out->super_class_ - __ LoadFromOffset(kLoadUnsignedWord, out, out, super_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadOneRegister(instruction, + out_loc, + super_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. __ Beqzc(out, &done); __ Bnec(out, cls, &loop); @@ -3522,15 +4648,22 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kClassHierarchyCheck: { // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // Walk over the class hierarchy to find a match. Mips64Label loop, success; __ Bind(&loop); __ Beqc(out, cls, &success); // /* HeapReference<Class> */ out = out->super_class_ - __ LoadFromOffset(kLoadUnsignedWord, out, out, super_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadOneRegister(instruction, + out_loc, + super_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); __ Bnezc(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ Bc(&done); @@ -3541,15 +4674,22 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: { // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // Do an exact check. Mips64Label success; __ Beqc(out, cls, &success); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ - __ LoadFromOffset(kLoadUnsignedWord, out, out, component_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadOneRegister(instruction, + out_loc, + component_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. __ Beqzc(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); @@ -3564,8 +4704,12 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayCheck: { // No read barrier since the slow path will retry upon failure. // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction, /* is_fatal */ false); @@ -3735,9 +4879,6 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codeg HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { - if (kEmitCompilerReadBarrier) { - UNIMPLEMENTED(FATAL) << "for read barrier"; - } bool fallback_load = false; switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: @@ -3765,9 +4906,6 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind( HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { - if (kEmitCompilerReadBarrier) { - UNIMPLEMENTED(FATAL) << "for read barrier"; - } bool fallback_load = false; switch (desired_class_load_kind) { case HLoadClass::LoadKind::kInvalid: @@ -3960,7 +5098,8 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { } DCHECK(!cls->NeedsAccessCheck()); - LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); @@ -3989,6 +5128,9 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S current_method_reg = locations->InAt(0).AsRegister<GpuRegister>(); } + const ReadBarrierOption read_barrier_option = cls->IsInBootImage() + ? kWithoutReadBarrier + : kCompilerReadBarrierOption; bool generate_null_check = false; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: @@ -3998,10 +5140,12 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S GenerateGcRootFieldLoad(cls, out_loc, current_method_reg, - ArtMethod::DeclaringClassOffset().Int32Value()); + ArtMethod::DeclaringClassOffset().Int32Value(), + read_barrier_option); break; case HLoadClass::LoadKind::kBootImageLinkTimeAddress: DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); __ LoadLiteral(out, kLoadUnsignedWord, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), @@ -4009,6 +5153,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); CodeGeneratorMIPS64::PcRelativePatchInfo* info = codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT); @@ -4016,7 +5161,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S break; } case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK(!kEmitCompilerReadBarrier); + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(cls->GetClass().Get())); DCHECK_NE(address, 0u); @@ -4029,7 +5174,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S CodeGeneratorMIPS64::PcRelativePatchInfo* info = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out); - GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678); + GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option); generate_null_check = true; break; } @@ -4039,7 +5184,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass())); - GenerateGcRootFieldLoad(cls, out_loc, out, 0); + GenerateGcRootFieldLoad(cls, out_loc, out, 0, read_barrier_option); break; case HLoadClass::LoadKind::kDexCacheViaMethod: case HLoadClass::LoadKind::kInvalid: @@ -4136,7 +5281,11 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA CodeGeneratorMIPS64::PcRelativePatchInfo* info = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out); - GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678); + GenerateGcRootFieldLoad(load, + out_loc, + out, + /* placeholder */ 0x5678, + kCompilerReadBarrierOption); SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load); codegen_->AddSlowPath(slow_path); __ Beqzc(out, slow_path->GetEntryLabel()); @@ -4149,7 +5298,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA codegen_->DeduplicateJitStringLiteral(load->GetDexFile(), load->GetStringIndex(), load->GetString())); - GenerateGcRootFieldLoad(load, out_loc, out, 0); + GenerateGcRootFieldLoad(load, out_loc, out, 0, kCompilerReadBarrierOption); return; default: break; diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 6040dc9492..fd1a174608 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -237,6 +237,38 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { const FieldInfo& field_info, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + + // Generate a heap reference load using one register `out`: + // + // out <- *(out + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a read barrier and + // shall be a register in that case; it may be an invalid location + // otherwise. + void GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option); + // Generate a heap reference load using two different registers + // `out` and `obj`: + // + // out <- *(obj + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a Baker's (fast + // path) read barrier and shall be a register in that case; it may + // be an invalid location otherwise. + void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option); + // Generate a GC root reference load: // // root <- *(obj + offset) @@ -245,7 +277,9 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, GpuRegister obj, - uint32_t offset); + uint32_t offset, + ReadBarrierOption read_barrier_option); + void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, Mips64Label* true_target, @@ -316,6 +350,91 @@ class CodeGeneratorMIPS64 : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + GpuRegister obj, + uint32_t offset, + Location temp, + bool needs_null_check); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference array load when Baker's read barriers are used. + void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + GpuRegister obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check); + + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // + // Load the object reference located at the address + // `obj + offset + (index << scale_factor)`, held by object `obj`, into + // `ref`, and mark it if needed. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + GpuRegister obj, + uint32_t offset, + Location index, + ScaleFactor scale_factor, + Location temp, + bool needs_null_check, + bool always_update_field = false); + + // Generate a read barrier for a heap reference within `instruction` + // using a slow path. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction` using + // a slow path. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); + void MarkGCCard(GpuRegister object, GpuRegister value, bool value_can_be_null); // Register allocation. @@ -366,6 +485,14 @@ class CodeGeneratorMIPS64 : public CodeGenerator { uint32_t dex_pc, SlowPathCode* slow_path = nullptr) OVERRIDE; + // Generate code to invoke a runtime entry point, but do not record + // PC-related information in a stack map. + void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path); + + void GenerateInvokeRuntime(int32_t entry_point_offset); + ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; } bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return false; } diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index bf85b1989e..b67793c4ed 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -1514,21 +1514,31 @@ void IntrinsicCodeGeneratorMIPS::VisitThreadCurrentThread(HInvoke* invoke) { Thread::PeerOffset<kMipsPointerSize>().Int32Value()); } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - bool can_call = - invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || - invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile; +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + Primitive::Type type) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->SetOut(Location::RequiresRegister(), + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in InstructionCodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } } +// Note that the caller must supply a properly aligned memory address. +// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). static void GenUnsafeGet(HInvoke* invoke, Primitive::Type type, bool is_volatile, @@ -1539,49 +1549,109 @@ static void GenUnsafeGet(HInvoke* invoke, (type == Primitive::kPrimLong) || (type == Primitive::kPrimNot)) << type; MipsAssembler* assembler = codegen->GetAssembler(); + // Target register. + Location trg_loc = locations->Out(); // Object pointer. - Register base = locations->InAt(1).AsRegister<Register>(); + Location base_loc = locations->InAt(1); + Register base = base_loc.AsRegister<Register>(); // The "offset" argument is passed as a "long". Since this code is for // a 32-bit processor, we can only use 32-bit addresses, so we only // need the low 32-bits of offset. - Register offset_lo = invoke->GetLocations()->InAt(2).AsRegisterPairLow<Register>(); + Location offset_loc = locations->InAt(2); + Register offset_lo = offset_loc.AsRegisterPairLow<Register>(); - __ Addu(TMP, base, offset_lo); - if (is_volatile) { - __ Sync(0); + if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == Primitive::kPrimNot))) { + __ Addu(TMP, base, offset_lo); } - if (type == Primitive::kPrimLong) { - Register trg_lo = locations->Out().AsRegisterPairLow<Register>(); - Register trg_hi = locations->Out().AsRegisterPairHigh<Register>(); - if (is_R6) { - __ Lw(trg_lo, TMP, 0); - __ Lw(trg_hi, TMP, 4); - } else { - __ Lwr(trg_lo, TMP, 0); - __ Lwl(trg_lo, TMP, 3); - __ Lwr(trg_hi, TMP, 4); - __ Lwl(trg_hi, TMP, 7); + switch (type) { + case Primitive::kPrimLong: { + Register trg_lo = trg_loc.AsRegisterPairLow<Register>(); + Register trg_hi = trg_loc.AsRegisterPairHigh<Register>(); + CHECK(!is_volatile); // TODO: support atomic 8-byte volatile loads. + if (is_R6) { + __ Lw(trg_lo, TMP, 0); + __ Lw(trg_hi, TMP, 4); + } else { + __ Lwr(trg_lo, TMP, 0); + __ Lwl(trg_lo, TMP, 3); + __ Lwr(trg_hi, TMP, 4); + __ Lwl(trg_hi, TMP, 7); + } + break; } - } else { - Register trg = locations->Out().AsRegister<Register>(); - if (is_R6) { - __ Lw(trg, TMP, 0); - } else { - __ Lwr(trg, TMP, 0); - __ Lwl(trg, TMP, 3); + case Primitive::kPrimInt: { + Register trg = trg_loc.AsRegister<Register>(); + if (is_R6) { + __ Lw(trg, TMP, 0); + } else { + __ Lwr(trg, TMP, 0); + __ Lwl(trg, TMP, 3); + } + if (is_volatile) { + __ Sync(0); + } + break; } - if (type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(trg); + case Primitive::kPrimNot: { + Register trg = trg_loc.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, + trg_loc, + base, + /* offset */ 0U, + /* index */ offset_loc, + TIMES_1, + temp, + /* needs_null_check */ false); + if (is_volatile) { + __ Sync(0); + } + } else { + if (is_R6) { + __ Lw(trg, TMP, 0); + } else { + __ Lwr(trg, TMP, 0); + __ Lwl(trg, TMP, 3); + } + if (is_volatile) { + __ Sync(0); + } + codegen->GenerateReadBarrierSlow(invoke, + trg_loc, + trg_loc, + base_loc, + /* offset */ 0U, + /* index */ offset_loc); + } + } else { + if (is_R6) { + __ Lw(trg, TMP, 0); + } else { + __ Lwr(trg, TMP, 0); + __ Lwl(trg, TMP, 3); + } + if (is_volatile) { + __ Sync(0); + } + __ MaybeUnpoisonHeapReference(trg); + } + break; } + + default: + LOG(FATAL) << "Unexpected type " << type; + UNREACHABLE(); } } // int sun.misc.Unsafe.getInt(Object o, long offset) void IntrinsicLocationsBuilderMIPS::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGet(HInvoke* invoke) { @@ -1590,7 +1660,7 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeGet(HInvoke* invoke) { // int sun.misc.Unsafe.getIntVolatile(Object o, long offset) void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) { @@ -1599,25 +1669,16 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) { // long sun.misc.Unsafe.getLong(Object o, long offset) void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLong(HInvoke* invoke) { GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, IsR6(), codegen_); } -// long sun.misc.Unsafe.getLongVolatile(Object o, long offset) -void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, IsR6(), codegen_); -} - // Object sun.misc.Unsafe.getObject(Object o, long offset) void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObject(HInvoke* invoke) { @@ -1626,7 +1687,7 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObject(HInvoke* invoke) { // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset) void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { @@ -1643,6 +1704,8 @@ static void CreateIntIntIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* in locations->SetInAt(3, Location::RequiresRegister()); } +// Note that the caller must supply a properly aligned memory address. +// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile, @@ -1681,7 +1744,7 @@ static void GenUnsafePut(LocationSummary* locations, } else { Register value_lo = locations->InAt(3).AsRegisterPairLow<Register>(); Register value_hi = locations->InAt(3).AsRegisterPairHigh<Register>(); - + CHECK(!is_volatile); // TODO: support atomic 8-byte volatile stores. if (is_R6) { __ Sw(value_lo, TMP, 0); __ Sw(value_hi, TMP, 4); @@ -1815,50 +1878,71 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLongOrdered(HInvoke* invoke) { codegen_); } -// void sun.misc.Unsafe.putLongVolatile(Object o, long offset, long x) -void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLongVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLongVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimLong, - /* is_volatile */ true, - /* is_ordered */ false, - IsR6(), - codegen_); -} - -static void CreateIntIntIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + + // Temporary register used in CAS by (Baker) read barrier. + if (can_call) { + locations->AddTemp(Location::RequiresRegister()); + } } -static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorMIPS* codegen) { +// Note that the caller must supply a properly aligned memory address. +// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). +static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS* codegen) { MipsAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); bool isR6 = codegen->GetInstructionSetFeatures().IsR6(); Register base = locations->InAt(1).AsRegister<Register>(); - Register offset_lo = locations->InAt(2).AsRegisterPairLow<Register>(); + Location offset_loc = locations->InAt(2); + Register offset_lo = offset_loc.AsRegisterPairLow<Register>(); Register expected = locations->InAt(3).AsRegister<Register>(); Register value = locations->InAt(4).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); DCHECK_NE(base, out); DCHECK_NE(offset_lo, out); DCHECK_NE(expected, out); if (type == Primitive::kPrimNot) { - // Mark card for object assuming new value is stored. + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + // Mark card for object assuming new value is stored. Worst case we will mark an unchanged + // object and scan the receiver at the next GC for nothing. bool value_can_be_null = true; // TODO: Worth finding out this information? codegen->MarkGCCard(base, value, value_can_be_null); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + /* offset */ 0u, + /* index */ offset_loc, + ScaleFactor::TIMES_1, + temp, + /* needs_null_check */ false, + /* always_update_field */ true); + } } MipsLabel loop_head, exit_loop; @@ -1926,20 +2010,30 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x) void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASInt(HInvoke* invoke) { - CreateIntIntIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); + GenCas(invoke, Primitive::kPrimInt, codegen_); } // boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x) void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASObject(HInvoke* invoke) { - CreateIntIntIntIntIntToIntLocations(arena_, invoke); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + return; + } + + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASObject(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + GenCas(invoke, Primitive::kPrimNot, codegen_); } // int java.lang.String.compareTo(String anotherString) @@ -2664,6 +2758,8 @@ UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil) UNIMPLEMENTED_INTRINSIC(MIPS, MathFloor) UNIMPLEMENTED_INTRINSIC(MIPS, MathRint) UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundDouble) +UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetLongVolatile); +UNIMPLEMENTED_INTRINSIC(MIPS, UnsafePutLongVolatile); UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong) UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent) diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 1ee89cf127..6098767aae 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1151,16 +1151,31 @@ void IntrinsicCodeGeneratorMIPS64::VisitThreadCurrentThread(HInvoke* invoke) { Thread::PeerOffset<kMips64PointerSize>().Int32Value()); } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + Primitive::Type type) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->SetOut(Location::RequiresRegister(), + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in InstructionCodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } } +// Note that the caller must supply a properly aligned memory address. +// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). static void GenUnsafeGet(HInvoke* invoke, Primitive::Type type, bool is_volatile, @@ -1168,30 +1183,71 @@ static void GenUnsafeGet(HInvoke* invoke, LocationSummary* locations = invoke->GetLocations(); DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong) || - (type == Primitive::kPrimNot)); + (type == Primitive::kPrimNot)) << type; Mips64Assembler* assembler = codegen->GetAssembler(); + // Target register. + Location trg_loc = locations->Out(); + GpuRegister trg = trg_loc.AsRegister<GpuRegister>(); // Object pointer. - GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>(); + Location base_loc = locations->InAt(1); + GpuRegister base = base_loc.AsRegister<GpuRegister>(); // Long offset. - GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>(); - GpuRegister trg = locations->Out().AsRegister<GpuRegister>(); + Location offset_loc = locations->InAt(2); + GpuRegister offset = offset_loc.AsRegister<GpuRegister>(); - __ Daddu(TMP, base, offset); - if (is_volatile) { - __ Sync(0); + if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == Primitive::kPrimNot))) { + __ Daddu(TMP, base, offset); } + switch (type) { + case Primitive::kPrimLong: + __ Ld(trg, TMP, 0); + if (is_volatile) { + __ Sync(0); + } + break; + case Primitive::kPrimInt: __ Lw(trg, TMP, 0); + if (is_volatile) { + __ Sync(0); + } break; case Primitive::kPrimNot: - __ Lwu(trg, TMP, 0); - __ MaybeUnpoisonHeapReference(trg); - break; - - case Primitive::kPrimLong: - __ Ld(trg, TMP, 0); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, + trg_loc, + base, + /* offset */ 0U, + /* index */ offset_loc, + TIMES_1, + temp, + /* needs_null_check */ false); + if (is_volatile) { + __ Sync(0); + } + } else { + __ Lwu(trg, TMP, 0); + if (is_volatile) { + __ Sync(0); + } + codegen->GenerateReadBarrierSlow(invoke, + trg_loc, + trg_loc, + base_loc, + /* offset */ 0U, + /* index */ offset_loc); + } + } else { + __ Lwu(trg, TMP, 0); + if (is_volatile) { + __ Sync(0); + } + __ MaybeUnpoisonHeapReference(trg); + } break; default: @@ -1202,7 +1258,7 @@ static void GenUnsafeGet(HInvoke* invoke, // int sun.misc.Unsafe.getInt(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) { @@ -1211,7 +1267,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) { // int sun.misc.Unsafe.getIntVolatile(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { @@ -1220,7 +1276,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { // long sun.misc.Unsafe.getLong(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { @@ -1229,7 +1285,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { // long sun.misc.Unsafe.getLongVolatile(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { @@ -1238,7 +1294,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { // Object sun.misc.Unsafe.getObject(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { @@ -1247,7 +1303,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { @@ -1264,6 +1320,8 @@ static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) { locations->SetInAt(3, Location::RequiresRegister()); } +// Note that the caller must supply a properly aligned memory address. +// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile, @@ -1429,35 +1487,70 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) { codegen_); } -static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + + // Temporary register used in CAS by (Baker) read barrier. + if (can_call) { + locations->AddTemp(Location::RequiresRegister()); + } } -static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorMIPS64* codegen) { +// Note that the caller must supply a properly aligned memory address. +// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). +static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS64* codegen) { Mips64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>(); - GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>(); + Location offset_loc = locations->InAt(2); + GpuRegister offset = offset_loc.AsRegister<GpuRegister>(); GpuRegister expected = locations->InAt(3).AsRegister<GpuRegister>(); GpuRegister value = locations->InAt(4).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + Location out_loc = locations->Out(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); DCHECK_NE(base, out); DCHECK_NE(offset, out); DCHECK_NE(expected, out); if (type == Primitive::kPrimNot) { - // Mark card for object assuming new value is stored. + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + // Mark card for object assuming new value is stored. Worst case we will mark an unchanged + // object and scan the receiver at the next GC for nothing. bool value_can_be_null = true; // TODO: Worth finding out this information? codegen->MarkGCCard(base, value, value_can_be_null); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + /* offset */ 0u, + /* index */ offset_loc, + ScaleFactor::TIMES_1, + temp, + /* needs_null_check */ false, + /* always_update_field */ true); + } } Mips64Label loop_head, exit_loop; @@ -1521,29 +1614,39 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASInt(HInvoke* invoke) { - CreateIntIntIntIntIntToInt(arena_, invoke); + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); + GenCas(invoke, Primitive::kPrimInt, codegen_); } // boolean sun.misc.Unsafe.compareAndSwapLong(Object o, long offset, long expected, long x) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASLong(HInvoke* invoke) { - CreateIntIntIntIntIntToInt(arena_, invoke); + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASLong(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_); + GenCas(invoke, Primitive::kPrimLong, codegen_); } // boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASObject(HInvoke* invoke) { - CreateIntIntIntIntIntToInt(arena_, invoke); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + return; + } + + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASObject(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + GenCas(invoke, Primitive::kPrimNot, codegen_); } // int java.lang.String.compareTo(String anotherString) diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 3c6d2d64a9..eb88fdee84 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -454,6 +454,8 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) { static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) { return instruction_set == kArm64 || instruction_set == kThumb2 + || instruction_set == kMips + || instruction_set == kMips64 || instruction_set == kX86 || instruction_set == kX86_64; } |