diff options
author | 2017-06-17 19:57:27 -0700 | |
---|---|---|
committer | 2017-07-19 15:03:10 -0700 | |
commit | 4147fcc43c2ee019a06e55384985e3eaf82dcb8c (patch) | |
tree | 11ec92efbfddf7736bbc74ed35fcfb3756bfcfb0 | |
parent | e8f48da635c4d07bbe431e5819da8e1fad91a8ef (diff) |
MIPS: Reduce Baker read barrier code size overhead
Test: booted MIPS64 (with 2nd arch MIPS32R6) in QEMU
Test: test-art-target-gtest
Test: testrunner.py --target --optimizing
Test: same tests as above on CI20
Test: booted MIPS32 and MIPS64 in QEMU with poisoning
in configurations:
- with Baker read barrier thunks
- without Baker read barrier thunks
- ART_READ_BARRIER_TYPE=TABLELOOKUP
Change-Id: I79f320bf8862a04215c76cfeff3118ebc87f7ef2
-rw-r--r-- | compiler/optimizing/code_generator_mips.cc | 447 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips.h | 8 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips64.cc | 360 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips64.h | 5 | ||||
-rw-r--r-- | compiler/utils/mips/assembler_mips.cc | 16 | ||||
-rw-r--r-- | compiler/utils/mips/assembler_mips.h | 1 | ||||
-rw-r--r-- | compiler/utils/mips64/assembler_mips64.cc | 4 | ||||
-rw-r--r-- | compiler/utils/mips64/assembler_mips64.h | 1 | ||||
-rw-r--r-- | runtime/arch/arch_test.cc | 8 | ||||
-rw-r--r-- | runtime/arch/mips/asm_support_mips.S | 7 | ||||
-rw-r--r-- | runtime/arch/mips/asm_support_mips.h | 20 | ||||
-rw-r--r-- | runtime/arch/mips/entrypoints_init_mips.cc | 25 | ||||
-rw-r--r-- | runtime/arch/mips/quick_entrypoints_mips.S | 379 | ||||
-rw-r--r-- | runtime/arch/mips64/asm_support_mips64.S | 7 | ||||
-rw-r--r-- | runtime/arch/mips64/asm_support_mips64.h | 20 | ||||
-rw-r--r-- | runtime/arch/mips64/entrypoints_init_mips64.cc | 21 | ||||
-rw-r--r-- | runtime/arch/mips64/quick_entrypoints_mips64.S | 371 | ||||
-rw-r--r-- | runtime/oat.h | 4 |
18 files changed, 1556 insertions, 148 deletions
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 23d188d630..b6eb5c1d1d 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -16,6 +16,7 @@ #include "code_generator_mips.h" +#include "arch/mips/asm_support_mips.h" #include "arch/mips/entrypoints_direct_mips.h" #include "arch/mips/instruction_set_features_mips.h" #include "art_method.h" @@ -40,6 +41,11 @@ namespace mips { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = A0; +// Flags controlling the use of thunks for Baker read barriers. +constexpr bool kBakerReadBarrierThunksEnableForFields = true; +constexpr bool kBakerReadBarrierThunksEnableForArrays = true; +constexpr bool kBakerReadBarrierThunksEnableForGcRoots = true; + Location MipsReturnLocation(Primitive::Type return_type) { switch (return_type) { case Primitive::kPrimBoolean: @@ -1486,7 +1492,8 @@ void CodeGeneratorMIPS::MoveLocation(Location destination, __ Mfc1(dst_low, src); __ MoveFromFpuHigh(dst_high, src); } else { - DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination; + DCHECK(source.IsDoubleStackSlot()) + << "Cannot move from " << source << " to " << destination; int32_t off = source.GetStackIndex(); Register r = destination.AsRegisterPairLow<Register>(); __ LoadFromOffset(kLoadDoubleword, r, SP, off); @@ -1539,7 +1546,8 @@ void CodeGeneratorMIPS::MoveLocation(Location destination, } else if (source.IsFpuRegister()) { __ StoreDToOffset(source.AsFpuRegister<FRegister>(), SP, dst_offset); } else { - DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination; + DCHECK(source.IsDoubleStackSlot()) + << "Cannot move from " << source << " to " << destination; __ LoadFromOffset(kLoadWord, TMP, SP, source.GetStackIndex()); __ StoreToOffset(kStoreWord, TMP, SP, dst_offset); __ LoadFromOffset(kLoadWord, TMP, SP, source.GetStackIndex() + 4); @@ -1763,8 +1771,10 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo } // A following instruction will add the sign-extended low half of the 32-bit // offset to `out` (e.g. lw, jialc, addiu). - DCHECK_EQ(info_low->patch_info_high, info_high); - __ Bind(&info_low->label); + if (info_low != nullptr) { + DCHECK_EQ(info_low->patch_info_high, info_high); + __ Bind(&info_low->label); + } } CodeGeneratorMIPS::JitPatchInfo* CodeGeneratorMIPS::NewJitRootStringPatch( @@ -1791,25 +1801,26 @@ void CodeGeneratorMIPS::PatchJitRootUse(uint8_t* code, const uint8_t* roots_data, const CodeGeneratorMIPS::JitPatchInfo& info, uint64_t index_in_table) const { - uint32_t literal_offset = GetAssembler().GetLabelLocation(&info.high_label); + uint32_t high_literal_offset = GetAssembler().GetLabelLocation(&info.high_label); + uint32_t low_literal_offset = GetAssembler().GetLabelLocation(&info.low_label); uintptr_t address = reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); uint32_t addr32 = dchecked_integral_cast<uint32_t>(address); // lui reg, addr32_high - DCHECK_EQ(code[literal_offset + 0], 0x34); - DCHECK_EQ(code[literal_offset + 1], 0x12); - DCHECK_EQ((code[literal_offset + 2] & 0xE0), 0x00); - DCHECK_EQ(code[literal_offset + 3], 0x3C); + DCHECK_EQ(code[high_literal_offset + 0], 0x34); + DCHECK_EQ(code[high_literal_offset + 1], 0x12); + DCHECK_EQ((code[high_literal_offset + 2] & 0xE0), 0x00); + DCHECK_EQ(code[high_literal_offset + 3], 0x3C); // instr reg, reg, addr32_low - DCHECK_EQ(code[literal_offset + 4], 0x78); - DCHECK_EQ(code[literal_offset + 5], 0x56); + DCHECK_EQ(code[low_literal_offset + 0], 0x78); + DCHECK_EQ(code[low_literal_offset + 1], 0x56); addr32 += (addr32 & 0x8000) << 1; // Account for sign extension in "instr reg, reg, addr32_low". // lui reg, addr32_high - code[literal_offset + 0] = static_cast<uint8_t>(addr32 >> 16); - code[literal_offset + 1] = static_cast<uint8_t>(addr32 >> 24); + code[high_literal_offset + 0] = static_cast<uint8_t>(addr32 >> 16); + code[high_literal_offset + 1] = static_cast<uint8_t>(addr32 >> 24); // instr reg, reg, addr32_low - code[literal_offset + 4] = static_cast<uint8_t>(addr32 >> 0); - code[literal_offset + 5] = static_cast<uint8_t>(addr32 >> 8); + code[low_literal_offset + 0] = static_cast<uint8_t>(addr32 >> 0); + code[low_literal_offset + 1] = static_cast<uint8_t>(addr32 >> 8); } void CodeGeneratorMIPS::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { @@ -2545,7 +2556,12 @@ void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier. if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - locations->AddTemp(Location::RequiresRegister()); + bool temp_needed = instruction->GetIndex()->IsConstant() + ? !kBakerReadBarrierThunksEnableForFields + : !kBakerReadBarrierThunksEnableForArrays; + if (temp_needed) { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -2681,16 +2697,32 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); + bool temp_needed = index.IsConstant() + ? !kBakerReadBarrierThunksEnableForFields + : !kBakerReadBarrierThunksEnableForArrays; + Location temp = temp_needed ? locations->GetTemp(0) : Location::NoLocation(); // Note that a potential implicit null check is handled in this // CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier call. - codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, - out_loc, - obj, - data_offset, - index, - temp, - /* needs_null_check */ true); + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); + if (index.IsConstant()) { + // Array load with a constant index can be treated as a field load. + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + offset, + temp, + /* needs_null_check */ false); + } else { + codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + index, + temp, + /* needs_null_check */ false); + } } else { Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { @@ -3093,6 +3125,7 @@ void InstructionCodeGeneratorMIPS::VisitBoundsCheck(HBoundsCheck* instruction) { // Temp is used for read barrier. static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { if (kEmitCompilerReadBarrier && + !(kUseBakerReadBarrier && kBakerReadBarrierThunksEnableForFields) && (kUseBakerReadBarrier || type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -6096,7 +6129,9 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + if (!kBakerReadBarrierThunksEnableForFields) { + locations->AddTemp(Location::RequiresRegister()); + } } } } @@ -6171,7 +6206,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, if (type == Primitive::kPrimNot) { // /* HeapReference<Object> */ dst = *(obj + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = locations->GetTemp(0); + Location temp_loc = + kBakerReadBarrierThunksEnableForFields ? Location::NoLocation() : locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, @@ -6395,7 +6431,9 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadOneRegister( Register out_reg = out.AsRegister<Register>(); if (read_barrier_option == kWithReadBarrier) { CHECK(kEmitCompilerReadBarrier); - DCHECK(maybe_temp.IsRegister()) << maybe_temp; + if (!kUseBakerReadBarrier || !kBakerReadBarrierThunksEnableForFields) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + } if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) @@ -6435,7 +6473,9 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters( if (read_barrier_option == kWithReadBarrier) { CHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; + if (!kBakerReadBarrierThunksEnableForFields) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + } // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, @@ -6458,67 +6498,172 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters( } } +static inline int GetBakerMarkThunkNumber(Register reg) { + static_assert(BAKER_MARK_INTROSPECTION_REGISTER_COUNT == 21, "Expecting equal"); + if (reg >= V0 && reg <= T7) { // 14 consequtive regs. + return reg - V0; + } else if (reg >= S2 && reg <= S7) { // 6 consequtive regs. + return 14 + (reg - S2); + } else if (reg == FP) { // One more. + return 20; + } + LOG(FATAL) << "Unexpected register " << reg; + UNREACHABLE(); +} + +static inline int GetBakerMarkFieldArrayThunkDisplacement(Register reg, bool short_offset) { + int num = GetBakerMarkThunkNumber(reg) + + (short_offset ? BAKER_MARK_INTROSPECTION_REGISTER_COUNT : 0); + return num * BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE; +} + +static inline int GetBakerMarkGcRootThunkDisplacement(Register reg) { + return GetBakerMarkThunkNumber(reg) * BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRY_SIZE + + BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET; +} + void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruction, Location root, Register obj, uint32_t offset, - ReadBarrierOption read_barrier_option) { + ReadBarrierOption read_barrier_option, + MipsLabel* label_low) { + bool reordering; + if (label_low != nullptr) { + DCHECK_EQ(offset, 0x5678u); + } Register root_reg = root.AsRegister<Register>(); if (read_barrier_option == kWithReadBarrier) { DCHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used: - // - // root = obj.field; - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // if (temp != null) { - // root = temp(root) - // } - - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - __ LoadFromOffset(kLoadWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - // Slow path marking the GC root `root`. - Location temp = Location::RegisterLocation(T9); - SlowPathCodeMIPS* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS( - instruction, - root, - /*entrypoint*/ temp); - codegen_->AddSlowPath(slow_path); + if (kBakerReadBarrierThunksEnableForGcRoots) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { + // temp = &gc_root_thunk<root_reg> + // root = temp(root) + // } + + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(0); + const int thunk_disp = GetBakerMarkGcRootThunkDisplacement(root_reg); + int16_t offset_low = Low16Bits(offset); + int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign + // extension in lw. + bool short_offset = IsInt<16>(static_cast<int32_t>(offset)); + Register base = short_offset ? obj : TMP; + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); + reordering = __ SetReorder(false); + if (!short_offset) { + DCHECK(!label_low); + __ AddUpper(base, obj, offset_high); + } + __ Beqz(T9, (isR6 ? 2 : 4)); // Skip jialc / addiu+jalr+nop. + if (label_low != nullptr) { + DCHECK(short_offset); + __ Bind(label_low); + } + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, base, offset_low); // Single instruction + // in delay slot. + if (isR6) { + __ Jialc(T9, thunk_disp); + } else { + __ Addiu(T9, T9, thunk_disp); + __ Jalr(T9); + __ Nop(); + } + __ SetReorder(reordering); + } else { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (T9) the read barrier mark entry point corresponding + // to register `root`. If `temp` is null, it means that `GetIsGcMarking()` + // is false, and vice versa. + // + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp != null) { + // root = temp(root) + // } + + if (label_low != nullptr) { + reordering = __ SetReorder(false); + __ Bind(label_low); + } + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + if (label_low != nullptr) { + __ SetReorder(reordering); + } + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path marking the GC root `root`. + Location temp = Location::RegisterLocation(T9); + SlowPathCodeMIPS* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS( + instruction, + root, + /*entrypoint*/ temp); + codegen_->AddSlowPath(slow_path); - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ Bnez(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); + __ Bnez(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } else { + if (label_low != nullptr) { + reordering = __ SetReorder(false); + __ Bind(label_low); + } // GC root loaded through a slow path for read barriers other // than Baker's. // /* GcRoot<mirror::Object>* */ root = obj + offset __ Addiu32(root_reg, obj, offset); + if (label_low != nullptr) { + __ SetReorder(reordering); + } // /* mirror::Object* */ root = root->Read() codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); } } else { + if (label_low != nullptr) { + reordering = __ SetReorder(false); + __ Bind(label_low); + } // Plain GC root load with no read barrier. // /* GcRoot<mirror::Object> */ root = *(obj + offset) __ LoadFromOffset(kLoadWord, root_reg, obj, offset); // Note that GC roots are not affected by heap poisoning, thus we // do not have to unpoison `root_reg` here. + if (label_low != nullptr) { + __ SetReorder(reordering); + } } } @@ -6531,6 +6676,88 @@ void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* inst DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierThunksEnableForFields) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. If the holder is not gray, + // it issues a load-load memory barrier and returns to the original + // reference load. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // if (temp != nullptr) { + // temp = &field_array_thunk<holder_reg> + // temp() + // } + // not_gray_return_address: + // // If the offset is too large to fit into the lw instruction, we + // // use an adjusted base register (TMP) here. This register + // // receives bits 16 ... 31 of the offset before the thunk invocation + // // and the thunk benefits from it. + // HeapReference<mirror::Object> reference = *(obj+offset); // Original reference load. + // gray_return_address: + + DCHECK(temp.IsInvalid()); + bool isR6 = GetInstructionSetFeatures().IsR6(); + int16_t offset_low = Low16Bits(offset); + int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign extension in lw. + bool short_offset = IsInt<16>(static_cast<int32_t>(offset)); + bool reordering = __ SetReorder(false); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(0); + // There may have or may have not been a null check if the field offset is smaller than + // the page size. + // There must've been a null check in case it's actually a load from an array. + // We will, however, perform an explicit null check in the thunk as it's easier to + // do it than not. + if (instruction->IsArrayGet()) { + DCHECK(!needs_null_check); + } + const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, short_offset); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); + Register ref_reg = ref.AsRegister<Register>(); + Register base = short_offset ? obj : TMP; + if (short_offset) { + if (isR6) { + __ Beqzc(T9, 2); // Skip jialc. + __ Nop(); // In forbidden slot. + __ Jialc(T9, thunk_disp); + } else { + __ Beqz(T9, 3); // Skip jalr+nop. + __ Addiu(T9, T9, thunk_disp); // In delay slot. + __ Jalr(T9); + __ Nop(); // In delay slot. + } + } else { + if (isR6) { + __ Beqz(T9, 2); // Skip jialc. + __ Aui(base, obj, offset_high); // In delay slot. + __ Jialc(T9, thunk_disp); + } else { + __ Lui(base, offset_high); + __ Beqz(T9, 2); // Skip jalr. + __ Addiu(T9, T9, thunk_disp); // In delay slot. + __ Jalr(T9); + __ Addu(base, base, obj); // In delay slot. + } + } + // /* HeapReference<Object> */ ref = *(obj + offset) + __ LoadFromOffset(kLoadWord, ref_reg, base, offset_low); // Single instruction. + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + __ MaybeUnpoisonHeapReference(ref_reg); + __ SetReorder(reordering); + return; + } + // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); ScaleFactor no_scale_factor = TIMES_1; @@ -6557,9 +6784,69 @@ void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* inst static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + ScaleFactor scale_factor = TIMES_4; + + if (kBakerReadBarrierThunksEnableForArrays) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. If the holder is not gray, + // it issues a load-load memory barrier and returns to the original + // reference load. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // if (temp != nullptr) { + // temp = &field_array_thunk<holder_reg> + // temp() + // } + // not_gray_return_address: + // // The element address is pre-calculated in the TMP register before the + // // thunk invocation and the thunk benefits from it. + // HeapReference<mirror::Object> reference = data[index]; // Original reference load. + // gray_return_address: + + DCHECK(temp.IsInvalid()); + DCHECK(index.IsValid()); + bool reordering = __ SetReorder(false); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(0); + // We will not do the explicit null check in the thunk as some form of a null check + // must've been done earlier. + DCHECK(!needs_null_check); + const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset */ false); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); + Register ref_reg = ref.AsRegister<Register>(); + Register index_reg = index.IsRegisterPair() + ? index.AsRegisterPairLow<Register>() + : index.AsRegister<Register>(); + if (GetInstructionSetFeatures().IsR6()) { + __ Beqz(T9, 2); // Skip jialc. + __ Lsa(TMP, index_reg, obj, scale_factor); // In delay slot. + __ Jialc(T9, thunk_disp); + } else { + __ Sll(TMP, index_reg, scale_factor); + __ Beqz(T9, 2); // Skip jalr. + __ Addiu(T9, T9, thunk_disp); // In delay slot. + __ Jalr(T9); + __ Addu(TMP, TMP, obj); // In delay slot. + } + // /* HeapReference<Object> */ ref = *(obj + data_offset + (index << scale_factor)) + DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))) << data_offset; + __ LoadFromOffset(kLoadWord, ref_reg, TMP, data_offset); // Single instruction. + __ MaybeUnpoisonHeapReference(ref_reg); + __ SetReorder(reordering); + return; + } + // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - ScaleFactor scale_factor = TIMES_4; GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, @@ -7461,10 +7748,14 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF bool reordering = __ SetReorder(false); codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high, temp, - base_or_current_method_reg, - info_low); - GenerateGcRootFieldLoad(cls, out_loc, temp, /* placeholder */ 0x5678, read_barrier_option); + base_or_current_method_reg); __ SetReorder(reordering); + GenerateGcRootFieldLoad(cls, + out_loc, + temp, + /* placeholder */ 0x5678, + read_barrier_option, + &info_low->label); generate_null_check = true; break; } @@ -7475,8 +7766,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF bool reordering = __ SetReorder(false); __ Bind(&info->high_label); __ Lui(out, /* placeholder */ 0x1234); - GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option); __ SetReorder(reordering); + GenerateGcRootFieldLoad(cls, + out_loc, + out, + /* placeholder */ 0x5678, + read_barrier_option, + &info->low_label); break; } case HLoadClass::LoadKind::kRuntimeCall: @@ -7623,14 +7919,14 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ bool reordering = __ SetReorder(false); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, temp, - base_or_current_method_reg, - info_low); + base_or_current_method_reg); + __ SetReorder(reordering); GenerateGcRootFieldLoad(load, out_loc, temp, /* placeholder */ 0x5678, - kCompilerReadBarrierOption); - __ SetReorder(reordering); + kCompilerReadBarrierOption, + &info_low->label); SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load, info_high); codegen_->AddSlowPath(slow_path); @@ -7646,12 +7942,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ bool reordering = __ SetReorder(false); __ Bind(&info->high_label); __ Lui(out, /* placeholder */ 0x1234); + __ SetReorder(reordering); GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678, - kCompilerReadBarrierOption); - __ SetReorder(reordering); + kCompilerReadBarrierOption, + &info->low_label); return; } default: diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 52ee852269..7195b9d89d 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -285,7 +285,8 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { Location root, Register obj, uint32_t offset, - ReadBarrierOption read_barrier_option); + ReadBarrierOption read_barrier_option, + MipsLabel* label_low = nullptr); void GenerateIntCompare(IfCondition cond, LocationSummary* locations); // When the function returns `false` it means that the condition holds if `dst` is non-zero @@ -637,7 +638,7 @@ class CodeGeneratorMIPS : public CodeGenerator { void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high, Register out, Register base, - PcRelativePatchInfo* info_low); + PcRelativePatchInfo* info_low = nullptr); // The JitPatchInfo is used for JIT string and class loads. struct JitPatchInfo { @@ -649,8 +650,9 @@ class CodeGeneratorMIPS : public CodeGenerator { // String/type index. uint64_t index; // Label for the instruction loading the most significant half of the address. - // The least significant half is loaded with the instruction that follows immediately. MipsLabel high_label; + // Label for the instruction supplying the least significant half of the address. + MipsLabel low_label; }; void PatchJitRootUse(uint8_t* code, diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 454a2ddc14..3e79f474b6 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -16,6 +16,7 @@ #include "code_generator_mips64.h" +#include "arch/mips64/asm_support_mips64.h" #include "art_method.h" #include "code_generator_utils.h" #include "compiled_method.h" @@ -38,6 +39,11 @@ namespace mips64 { static constexpr int kCurrentMethodStackOffset = 0; static constexpr GpuRegister kMethodRegisterArgument = A0; +// Flags controlling the use of thunks for Baker read barriers. +constexpr bool kBakerReadBarrierThunksEnableForFields = true; +constexpr bool kBakerReadBarrierThunksEnableForArrays = true; +constexpr bool kBakerReadBarrierThunksEnableForGcRoots = true; + Location Mips64ReturnLocation(Primitive::Type return_type) { switch (return_type) { case Primitive::kPrimBoolean: @@ -1649,8 +1655,10 @@ void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchIn __ Auipc(out, /* placeholder */ 0x1234); // A following instruction will add the sign-extended low half of the 32-bit // offset to `out` (e.g. ld, jialc, daddiu). - DCHECK_EQ(info_low->patch_info_high, info_high); - __ Bind(&info_low->label); + if (info_low != nullptr) { + DCHECK_EQ(info_low->patch_info_high, info_high); + __ Bind(&info_low->label); + } } Literal* CodeGeneratorMIPS64::DeduplicateJitStringLiteral(const DexFile& dex_file, @@ -2117,7 +2125,12 @@ void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier. if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - locations->AddTemp(Location::RequiresRegister()); + bool temp_needed = instruction->GetIndex()->IsConstant() + ? !kBakerReadBarrierThunksEnableForFields + : !kBakerReadBarrierThunksEnableForArrays; + if (temp_needed) { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -2254,16 +2267,32 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); + bool temp_needed = index.IsConstant() + ? !kBakerReadBarrierThunksEnableForFields + : !kBakerReadBarrierThunksEnableForArrays; + Location temp = temp_needed ? locations->GetTemp(0) : Location::NoLocation(); // Note that a potential implicit null check is handled in this // CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier call. - codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, - out_loc, - obj, - data_offset, - index, - temp, - /* needs_null_check */ true); + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); + if (index.IsConstant()) { + // Array load with a constant index can be treated as a field load. + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + offset, + temp, + /* needs_null_check */ false); + } else { + codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + index, + temp, + /* needs_null_check */ false); + } } else { GpuRegister out = out_loc.AsRegister<GpuRegister>(); if (index.IsConstant()) { @@ -2666,6 +2695,7 @@ void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) // Temp is used for read barrier. static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { if (kEmitCompilerReadBarrier && + !(kUseBakerReadBarrier && kBakerReadBarrierThunksEnableForFields) && (kUseBakerReadBarrier || type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -4118,7 +4148,9 @@ void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction, if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + if (!kBakerReadBarrierThunksEnableForFields) { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -4168,7 +4200,8 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction, if (type == Primitive::kPrimNot) { // /* HeapReference<Object> */ dst = *(obj + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = locations->GetTemp(0); + Location temp_loc = + kBakerReadBarrierThunksEnableForFields ? Location::NoLocation() : locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, @@ -4318,7 +4351,9 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadOneRegister( GpuRegister out_reg = out.AsRegister<GpuRegister>(); if (read_barrier_option == kWithReadBarrier) { CHECK(kEmitCompilerReadBarrier); - DCHECK(maybe_temp.IsRegister()) << maybe_temp; + if (!kUseBakerReadBarrier || !kBakerReadBarrierThunksEnableForFields) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + } if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) @@ -4358,7 +4393,9 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters( if (read_barrier_option == kWithReadBarrier) { CHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; + if (!kBakerReadBarrierThunksEnableForFields) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + } // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, @@ -4381,55 +4418,134 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters( } } -void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad( - HInstruction* instruction, - Location root, - GpuRegister obj, - uint32_t offset, - ReadBarrierOption read_barrier_option) { +static inline int GetBakerMarkThunkNumber(GpuRegister reg) { + static_assert(BAKER_MARK_INTROSPECTION_REGISTER_COUNT == 20, "Expecting equal"); + if (reg >= V0 && reg <= T2) { // 13 consequtive regs. + return reg - V0; + } else if (reg >= S2 && reg <= S7) { // 6 consequtive regs. + return 13 + (reg - S2); + } else if (reg == S8) { // One more. + return 19; + } + LOG(FATAL) << "Unexpected register " << reg; + UNREACHABLE(); +} + +static inline int GetBakerMarkFieldArrayThunkDisplacement(GpuRegister reg, bool short_offset) { + int num = GetBakerMarkThunkNumber(reg) + + (short_offset ? BAKER_MARK_INTROSPECTION_REGISTER_COUNT : 0); + return num * BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE; +} + +static inline int GetBakerMarkGcRootThunkDisplacement(GpuRegister reg) { + return GetBakerMarkThunkNumber(reg) * BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRY_SIZE + + BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET; +} + +void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + GpuRegister obj, + uint32_t offset, + ReadBarrierOption read_barrier_option, + Mips64Label* label_low) { + if (label_low != nullptr) { + DCHECK_EQ(offset, 0x5678u); + } GpuRegister root_reg = root.AsRegister<GpuRegister>(); if (read_barrier_option == kWithReadBarrier) { DCHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used: - // - // root = obj.field; - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // if (temp != null) { - // root = temp(root) - // } - - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - // Slow path marking the GC root `root`. - Location temp = Location::RegisterLocation(T9); - SlowPathCodeMIPS64* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64( - instruction, - root, - /*entrypoint*/ temp); - codegen_->AddSlowPath(slow_path); + if (kBakerReadBarrierThunksEnableForGcRoots) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { + // temp = &gc_root_thunk<root_reg> + // root = temp(root) + // } + + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(0); + const int thunk_disp = GetBakerMarkGcRootThunkDisplacement(root_reg); + int16_t offset_low = Low16Bits(offset); + int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign + // extension in lwu. + bool short_offset = IsInt<16>(static_cast<int32_t>(offset)); + GpuRegister base = short_offset ? obj : TMP; + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); + if (!short_offset) { + DCHECK(!label_low); + __ Daui(base, obj, offset_high); + } + __ Beqz(T9, 2); // Skip jialc. + if (label_low != nullptr) { + DCHECK(short_offset); + __ Bind(label_low); + } + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, root_reg, base, offset_low); // Single instruction + // in delay slot. + __ Jialc(T9, thunk_disp); + } else { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (T9) the read barrier mark entry point corresponding + // to register `root`. If `temp` is null, it means that `GetIsGcMarking()` + // is false, and vice versa. + // + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp != null) { + // root = temp(root) + // } + + if (label_low != nullptr) { + __ Bind(label_low); + } + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path marking the GC root `root`. + Location temp = Location::RegisterLocation(T9); + SlowPathCodeMIPS64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64( + instruction, + root, + /*entrypoint*/ temp); + codegen_->AddSlowPath(slow_path); - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset); - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ Bnezc(temp.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset); + __ Bnezc(temp.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } else { + if (label_low != nullptr) { + __ Bind(label_low); + } // GC root loaded through a slow path for read barriers other // than Baker's. // /* GcRoot<mirror::Object>* */ root = obj + offset @@ -4438,6 +4554,9 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad( codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); } } else { + if (label_low != nullptr) { + __ Bind(label_low); + } // Plain GC root load with no read barrier. // /* GcRoot<mirror::Object> */ root = *(obj + offset) __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset); @@ -4455,6 +4574,71 @@ void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierThunksEnableForFields) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. If the holder is not gray, + // it issues a load-load memory barrier and returns to the original + // reference load. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // if (temp != nullptr) { + // temp = &field_array_thunk<holder_reg> + // temp() + // } + // not_gray_return_address: + // // If the offset is too large to fit into the lw instruction, we + // // use an adjusted base register (TMP) here. This register + // // receives bits 16 ... 31 of the offset before the thunk invocation + // // and the thunk benefits from it. + // HeapReference<mirror::Object> reference = *(obj+offset); // Original reference load. + // gray_return_address: + + DCHECK(temp.IsInvalid()); + bool short_offset = IsInt<16>(static_cast<int32_t>(offset)); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(0); + // There may have or may have not been a null check if the field offset is smaller than + // the page size. + // There must've been a null check in case it's actually a load from an array. + // We will, however, perform an explicit null check in the thunk as it's easier to + // do it than not. + if (instruction->IsArrayGet()) { + DCHECK(!needs_null_check); + } + const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, short_offset); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); + GpuRegister ref_reg = ref.AsRegister<GpuRegister>(); + if (short_offset) { + __ Beqzc(T9, 2); // Skip jialc. + __ Nop(); // In forbidden slot. + __ Jialc(T9, thunk_disp); + // /* HeapReference<Object> */ ref = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, offset); // Single instruction. + } else { + int16_t offset_low = Low16Bits(offset); + int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign extension in lwu. + __ Beqz(T9, 2); // Skip jialc. + __ Daui(TMP, obj, offset_high); // In delay slot. + __ Jialc(T9, thunk_disp); + // /* HeapReference<Object> */ ref = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, offset_low); // Single instruction. + } + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + __ MaybeUnpoisonHeapReference(ref_reg); + return; + } + // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); ScaleFactor no_scale_factor = TIMES_1; @@ -4481,9 +4665,57 @@ void CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + ScaleFactor scale_factor = TIMES_4; + + if (kBakerReadBarrierThunksEnableForArrays) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. If the holder is not gray, + // it issues a load-load memory barrier and returns to the original + // reference load. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // if (temp != nullptr) { + // temp = &field_array_thunk<holder_reg> + // temp() + // } + // not_gray_return_address: + // // The element address is pre-calculated in the TMP register before the + // // thunk invocation and the thunk benefits from it. + // HeapReference<mirror::Object> reference = data[index]; // Original reference load. + // gray_return_address: + + DCHECK(temp.IsInvalid()); + DCHECK(index.IsValid()); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(0); + // We will not do the explicit null check in the thunk as some form of a null check + // must've been done earlier. + DCHECK(!needs_null_check); + const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset */ false); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); + __ Beqz(T9, 2); // Skip jialc. + GpuRegister ref_reg = ref.AsRegister<GpuRegister>(); + GpuRegister index_reg = index.AsRegister<GpuRegister>(); + __ Dlsa(TMP, index_reg, obj, scale_factor); // In delay slot. + __ Jialc(T9, thunk_disp); + // /* HeapReference<Object> */ ref = *(obj + data_offset + (index << scale_factor)) + DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))) << data_offset; + __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, data_offset); // Single instruction. + __ MaybeUnpoisonHeapReference(ref_reg); + return; + } + // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - ScaleFactor scale_factor = TIMES_4; GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, @@ -5278,8 +5510,13 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S GpuRegister temp = non_baker_read_barrier ? out : locations->GetTemp(0).AsRegister<GpuRegister>(); - codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high, temp, info_low); - GenerateGcRootFieldLoad(cls, out_loc, temp, /* placeholder */ 0x5678, read_barrier_option); + codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high, temp); + GenerateGcRootFieldLoad(cls, + out_loc, + temp, + /* placeholder */ 0x5678, + read_barrier_option, + &info_low->label); generate_null_check = true; break; } @@ -5399,12 +5636,13 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA GpuRegister temp = non_baker_read_barrier ? out : locations->GetTemp(0).AsRegister<GpuRegister>(); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, temp, info_low); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, temp); GenerateGcRootFieldLoad(load, out_loc, temp, /* placeholder */ 0x5678, - kCompilerReadBarrierOption); + kCompilerReadBarrierOption, + &info_low->label); SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load, info_high); codegen_->AddSlowPath(slow_path); diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index c94cc93dad..d03a9eabd4 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -281,7 +281,8 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { Location root, GpuRegister obj, uint32_t offset, - ReadBarrierOption read_barrier_option); + ReadBarrierOption read_barrier_option, + Mips64Label* label_low = nullptr); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, @@ -592,7 +593,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high, GpuRegister out, - PcRelativePatchInfo* info_low); + PcRelativePatchInfo* info_low = nullptr); void PatchJitRootUse(uint8_t* code, const uint8_t* roots_data, diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index c581f1c58f..24e34508d1 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -828,6 +828,22 @@ void MipsAssembler::Aui(Register rt, Register rs, uint16_t imm16) { DsFsmInstrRrr(EmitI(0xf, rs, rt, imm16), rt, rt, rs); } +void MipsAssembler::AddUpper(Register rt, Register rs, uint16_t imm16, Register tmp) { + bool increment = (rs == rt); + if (increment) { + CHECK_NE(rs, tmp); + } + if (IsR6()) { + Aui(rt, rs, imm16); + } else if (increment) { + Lui(tmp, imm16); + Addu(rt, rs, tmp); + } else { + Lui(rt, imm16); + Addu(rt, rs, rt); + } +} + void MipsAssembler::Sync(uint32_t stype) { DsFsmInstrNop(EmitR(0, ZERO, ZERO, ZERO, stype & 0x1f, 0xf)); } diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 33803bb576..e42bb3fa3d 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -280,6 +280,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi void Lwpc(Register rs, uint32_t imm19); // R6 void Lui(Register rt, uint16_t imm16); void Aui(Register rt, Register rs, uint16_t imm16); // R6 + void AddUpper(Register rt, Register rs, uint16_t imm16, Register tmp = AT); void Sync(uint32_t stype); void Mfhi(Register rd); // R2 void Mflo(Register rd); // R2 diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 24900a7f10..90398540f8 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -795,6 +795,10 @@ void Mips64Assembler::Bc1nez(FpuRegister ft, uint16_t imm16) { EmitFI(0x11, 0xD, ft, imm16); } +void Mips64Assembler::Beqz(GpuRegister rt, uint16_t imm16) { + EmitI(0x4, ZERO, rt, imm16); +} + void Mips64Assembler::EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 773db9b208..5e88033743 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -563,6 +563,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Bnezc(GpuRegister rs, uint32_t imm21); void Bc1eqz(FpuRegister ft, uint16_t imm16); void Bc1nez(FpuRegister ft, uint16_t imm16); + void Beqz(GpuRegister rt, uint16_t imm16); void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft); void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft); diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc index 838ae40838..dd98f5132f 100644 --- a/runtime/arch/arch_test.cc +++ b/runtime/arch/arch_test.cc @@ -129,6 +129,10 @@ static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARG #undef FRAME_SIZE_SAVE_REFS_AND_ARGS static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING; #undef FRAME_SIZE_SAVE_EVERYTHING +#undef BAKER_MARK_INTROSPECTION_REGISTER_COUNT +#undef BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRY_SIZE } // namespace mips namespace mips64 { @@ -141,6 +145,10 @@ static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARG #undef FRAME_SIZE_SAVE_REFS_AND_ARGS static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING; #undef FRAME_SIZE_SAVE_EVERYTHING +#undef BAKER_MARK_INTROSPECTION_REGISTER_COUNT +#undef BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRY_SIZE } // namespace mips64 namespace x86 { diff --git a/runtime/arch/mips/asm_support_mips.S b/runtime/arch/mips/asm_support_mips.S index 948b06ce61..50095ae77e 100644 --- a/runtime/arch/mips/asm_support_mips.S +++ b/runtime/arch/mips/asm_support_mips.S @@ -127,6 +127,13 @@ #endif // USE_HEAP_POISONING .endm +// Byte size of the instructions (un)poisoning heap references. +#ifdef USE_HEAP_POISONING +#define HEAP_POISON_INSTR_SIZE 4 +#else +#define HEAP_POISON_INSTR_SIZE 0 +#endif // USE_HEAP_POISONING + // Based on contents of creg select the minimum integer // At the end of the macro the original value of creg is lost .macro MINint dreg,rreg,sreg,creg diff --git a/runtime/arch/mips/asm_support_mips.h b/runtime/arch/mips/asm_support_mips.h index 7437774c13..9d8572ffb5 100644 --- a/runtime/arch/mips/asm_support_mips.h +++ b/runtime/arch/mips/asm_support_mips.h @@ -24,4 +24,24 @@ #define FRAME_SIZE_SAVE_REFS_AND_ARGS 112 #define FRAME_SIZE_SAVE_EVERYTHING 256 +// &art_quick_read_barrier_mark_introspection is the first of many entry points: +// 21 entry points for long field offsets, large array indices and variable array indices +// (see macro BRB_FIELD_LONG_OFFSET_ENTRY) +// 21 entry points for short field offsets and small array indices +// (see macro BRB_FIELD_SHORT_OFFSET_ENTRY) +// 21 entry points for GC roots +// (see macro BRB_GC_ROOT_ENTRY) + +// There are as many entry points of each kind as there are registers that +// can hold a reference: V0-V1, A0-A3, T0-T7, S2-S8. +#define BAKER_MARK_INTROSPECTION_REGISTER_COUNT 21 + +#define BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE (8 * 4) // 8 instructions in + // BRB_FIELD_*_OFFSET_ENTRY. + +#define BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET \ + (2 * BAKER_MARK_INTROSPECTION_REGISTER_COUNT * BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE) + +#define BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRY_SIZE (4 * 4) // 4 instructions in BRB_GC_ROOT_ENTRY. + #endif // ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_H_ diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc index 9978da5f74..3010246ac3 100644 --- a/runtime/arch/mips/entrypoints_init_mips.cc +++ b/runtime/arch/mips/entrypoints_init_mips.cc @@ -16,6 +16,7 @@ #include <string.h> +#include "arch/mips/asm_support_mips.h" #include "atomic.h" #include "entrypoints/jni/jni_entrypoints.h" #include "entrypoints/quick/quick_alloc_entrypoints.h" @@ -59,6 +60,10 @@ extern "C" mirror::Object* art_quick_read_barrier_mark_reg21(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg22(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg29(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots(mirror::Object*); +extern "C" void art_quick_read_barrier_mark_introspection_end_of_entries(void); + // Math entrypoints. extern int32_t CmpgDouble(double a, double b); extern int32_t CmplDouble(double a, double b); @@ -87,6 +92,23 @@ extern "C" int64_t __divdi3(int64_t, int64_t); extern "C" int64_t __moddi3(int64_t, int64_t); void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { + intptr_t introspection_field_array_entries_size = + reinterpret_cast<intptr_t>(&art_quick_read_barrier_mark_introspection_gc_roots) - + reinterpret_cast<intptr_t>(&art_quick_read_barrier_mark_introspection); + static_assert( + BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET == 2 * + BAKER_MARK_INTROSPECTION_REGISTER_COUNT * BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE, + "Expecting equal"); + DCHECK_EQ(introspection_field_array_entries_size, + BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET); + intptr_t introspection_gc_root_entries_size = + reinterpret_cast<intptr_t>(&art_quick_read_barrier_mark_introspection_end_of_entries) - + reinterpret_cast<intptr_t>(&art_quick_read_barrier_mark_introspection_gc_roots); + DCHECK_EQ(introspection_gc_root_entries_size, + BAKER_MARK_INTROSPECTION_REGISTER_COUNT * BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRY_SIZE); + qpoints->pReadBarrierMarkReg00 = is_active ? art_quick_read_barrier_mark_introspection : nullptr; + static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg00), + "Non-direct C stub marked direct."); qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg01), "Non-direct C stub marked direct."); @@ -416,9 +438,6 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Cannot use the following registers to pass arguments: // 0(ZERO), 1(AT), 16(S0), 17(S1), 24(T8), 25(T9), 26(K0), 27(K1), 28(GP), 29(SP), 31(RA). // Note that there are 30 entry points only: 00 for register 1(AT), ..., 29 for register 30(S8). - qpoints->pReadBarrierMarkReg00 = nullptr; - static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg00), - "Non-direct C stub marked direct."); qpoints->pReadBarrierMarkReg15 = nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg15), "Non-direct C stub marked direct."); diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S index 00e3d67207..d9abaa05ec 100644 --- a/runtime/arch/mips/quick_entrypoints_mips.S +++ b/runtime/arch/mips/quick_entrypoints_mips.S @@ -2721,6 +2721,385 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, $s7 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, $s8 // RA (register 31) is reserved. +// Caller code: +// Short constant offset/index: +// R2: | R6: +// lw $t9, pReadBarrierMarkReg00 +// beqz $t9, skip_call | beqzc $t9, skip_call +// addiu $t9, $t9, thunk_disp | nop +// jalr $t9 | jialc $t9, thunk_disp +// nop | +// skip_call: | skip_call: +// lw `out`, ofs(`obj`) | lw `out`, ofs(`obj`) +// [subu `out`, $zero, `out`] | [subu `out`, $zero, `out`] # Unpoison reference. +.macro BRB_FIELD_SHORT_OFFSET_ENTRY obj +1: + # Explicit null check. May be redundant (for array elements or when the field + # offset is larger than the page size, 4KB). + # $ra will be adjusted to point to lw's stack map when throwing NPE. + beqz \obj, .Lintrospection_throw_npe +#if defined(_MIPS_ARCH_MIPS32R6) + lapc $gp, .Lintrospection_exits # $gp = address of .Lintrospection_exits. +#else + addiu $gp, $t9, (.Lintrospection_exits - 1b) # $gp = address of .Lintrospection_exits. +#endif + .set push + .set noat + lw $at, MIRROR_OBJECT_LOCK_WORD_OFFSET(\obj) + sll $at, $at, 31 - LOCK_WORD_READ_BARRIER_STATE_SHIFT # Move barrier state bit + # to sign bit. + bltz $at, .Lintrospection_field_array # If gray, load reference, mark. + move $t8, \obj # Move `obj` to $t8 for common code. + .set pop + jalr $zero, $ra # Otherwise, load-load barrier and return. + sync +.endm + +// Caller code (R2): +// Long constant offset/index: | Variable index: +// lw $t9, pReadBarrierMarkReg00 +// lui $t8, ofs_hi | sll $t8, `index`, 2 +// beqz $t9, skip_call | beqz $t9, skip_call +// addiu $t9, $t9, thunk_disp | addiu $t9, $t9, thunk_disp +// jalr $t9 | jalr $t9 +// skip_call: | skip_call: +// addu $t8, $t8, `obj` | addu $t8, $t8, `obj` +// lw `out`, ofs_lo($t8) | lw `out`, ofs($t8) +// [subu `out`, $zero, `out`] | [subu `out`, $zero, `out`] # Unpoison reference. +// +// Caller code (R6): +// Long constant offset/index: | Variable index: +// lw $t9, pReadBarrierMarkReg00 +// beqz $t9, skip_call | beqz $t9, skip_call +// aui $t8, `obj`, ofs_hi | lsa $t8, `index`, `obj`, 2 +// jialc $t9, thunk_disp | jialc $t9, thunk_disp +// skip_call: | skip_call: +// lw `out`, ofs_lo($t8) | lw `out`, ofs($t8) +// [subu `out`, $zero, `out`] | [subu `out`, $zero, `out`] # Unpoison reference. +.macro BRB_FIELD_LONG_OFFSET_ENTRY obj +1: + # No explicit null check for variable indices or large constant indices/offsets + # as it must have been done earlier. +#if defined(_MIPS_ARCH_MIPS32R6) + lapc $gp, .Lintrospection_exits # $gp = address of .Lintrospection_exits. +#else + addiu $gp, $t9, (.Lintrospection_exits - 1b) # $gp = address of .Lintrospection_exits. +#endif + .set push + .set noat + lw $at, MIRROR_OBJECT_LOCK_WORD_OFFSET(\obj) + sll $at, $at, 31 - LOCK_WORD_READ_BARRIER_STATE_SHIFT # Move barrier state bit + # to sign bit. + bltz $at, .Lintrospection_field_array # If gray, load reference, mark. + nop + .set pop + jalr $zero, $ra # Otherwise, load-load barrier and return. + sync + break # Padding to 8 instructions. +.endm + +.macro BRB_GC_ROOT_ENTRY root +1: +#if defined(_MIPS_ARCH_MIPS32R6) + lapc $gp, .Lintrospection_exit_\root # $gp = exit point address. +#else + addiu $gp, $t9, (.Lintrospection_exit_\root - 1b) # $gp = exit point address. +#endif + bnez \root, .Lintrospection_common + move $t8, \root # Move reference to $t8 for common code. + jalr $zero, $ra # Return if null. + # The next instruction (from the following BRB_GC_ROOT_ENTRY) fills the delay slot. + # This instruction has no effect (actual NOP for the last entry; otherwise changes $gp, + # which is unused after that anyway). +.endm + +.macro BRB_FIELD_EXIT out +.Lintrospection_exit_\out: + jalr $zero, $ra + move \out, $t8 # Return reference in expected register. +.endm + +.macro BRB_FIELD_EXIT_BREAK + break + break +.endm + +ENTRY_NO_GP art_quick_read_barrier_mark_introspection + # Entry points for offsets/indices not fitting into int16_t and for variable indices. + BRB_FIELD_LONG_OFFSET_ENTRY $v0 + BRB_FIELD_LONG_OFFSET_ENTRY $v1 + BRB_FIELD_LONG_OFFSET_ENTRY $a0 + BRB_FIELD_LONG_OFFSET_ENTRY $a1 + BRB_FIELD_LONG_OFFSET_ENTRY $a2 + BRB_FIELD_LONG_OFFSET_ENTRY $a3 + BRB_FIELD_LONG_OFFSET_ENTRY $t0 + BRB_FIELD_LONG_OFFSET_ENTRY $t1 + BRB_FIELD_LONG_OFFSET_ENTRY $t2 + BRB_FIELD_LONG_OFFSET_ENTRY $t3 + BRB_FIELD_LONG_OFFSET_ENTRY $t4 + BRB_FIELD_LONG_OFFSET_ENTRY $t5 + BRB_FIELD_LONG_OFFSET_ENTRY $t6 + BRB_FIELD_LONG_OFFSET_ENTRY $t7 + BRB_FIELD_LONG_OFFSET_ENTRY $s2 + BRB_FIELD_LONG_OFFSET_ENTRY $s3 + BRB_FIELD_LONG_OFFSET_ENTRY $s4 + BRB_FIELD_LONG_OFFSET_ENTRY $s5 + BRB_FIELD_LONG_OFFSET_ENTRY $s6 + BRB_FIELD_LONG_OFFSET_ENTRY $s7 + BRB_FIELD_LONG_OFFSET_ENTRY $s8 + + # Entry points for offsets/indices fitting into int16_t. + BRB_FIELD_SHORT_OFFSET_ENTRY $v0 + BRB_FIELD_SHORT_OFFSET_ENTRY $v1 + BRB_FIELD_SHORT_OFFSET_ENTRY $a0 + BRB_FIELD_SHORT_OFFSET_ENTRY $a1 + BRB_FIELD_SHORT_OFFSET_ENTRY $a2 + BRB_FIELD_SHORT_OFFSET_ENTRY $a3 + BRB_FIELD_SHORT_OFFSET_ENTRY $t0 + BRB_FIELD_SHORT_OFFSET_ENTRY $t1 + BRB_FIELD_SHORT_OFFSET_ENTRY $t2 + BRB_FIELD_SHORT_OFFSET_ENTRY $t3 + BRB_FIELD_SHORT_OFFSET_ENTRY $t4 + BRB_FIELD_SHORT_OFFSET_ENTRY $t5 + BRB_FIELD_SHORT_OFFSET_ENTRY $t6 + BRB_FIELD_SHORT_OFFSET_ENTRY $t7 + BRB_FIELD_SHORT_OFFSET_ENTRY $s2 + BRB_FIELD_SHORT_OFFSET_ENTRY $s3 + BRB_FIELD_SHORT_OFFSET_ENTRY $s4 + BRB_FIELD_SHORT_OFFSET_ENTRY $s5 + BRB_FIELD_SHORT_OFFSET_ENTRY $s6 + BRB_FIELD_SHORT_OFFSET_ENTRY $s7 + BRB_FIELD_SHORT_OFFSET_ENTRY $s8 + + .global art_quick_read_barrier_mark_introspection_gc_roots +art_quick_read_barrier_mark_introspection_gc_roots: + # Entry points for GC roots. + BRB_GC_ROOT_ENTRY $v0 + BRB_GC_ROOT_ENTRY $v1 + BRB_GC_ROOT_ENTRY $a0 + BRB_GC_ROOT_ENTRY $a1 + BRB_GC_ROOT_ENTRY $a2 + BRB_GC_ROOT_ENTRY $a3 + BRB_GC_ROOT_ENTRY $t0 + BRB_GC_ROOT_ENTRY $t1 + BRB_GC_ROOT_ENTRY $t2 + BRB_GC_ROOT_ENTRY $t3 + BRB_GC_ROOT_ENTRY $t4 + BRB_GC_ROOT_ENTRY $t5 + BRB_GC_ROOT_ENTRY $t6 + BRB_GC_ROOT_ENTRY $t7 + BRB_GC_ROOT_ENTRY $s2 + BRB_GC_ROOT_ENTRY $s3 + BRB_GC_ROOT_ENTRY $s4 + BRB_GC_ROOT_ENTRY $s5 + BRB_GC_ROOT_ENTRY $s6 + BRB_GC_ROOT_ENTRY $s7 + BRB_GC_ROOT_ENTRY $s8 + .global art_quick_read_barrier_mark_introspection_end_of_entries +art_quick_read_barrier_mark_introspection_end_of_entries: + nop # Fill the delay slot of the last BRB_GC_ROOT_ENTRY. + +.Lintrospection_throw_npe: + b art_quick_throw_null_pointer_exception + addiu $ra, $ra, 4 # Skip lw, make $ra point to lw's stack map. + + .set push + .set noat + + // Fields and array elements. + +.Lintrospection_field_array: + // Get the field/element address using $t8 and the offset from the lw instruction. + lh $at, 0($ra) # $ra points to lw: $at = field/element offset. + addiu $ra, $ra, 4 + HEAP_POISON_INSTR_SIZE # Skip lw(+subu). + addu $t8, $t8, $at # $t8 = field/element address. + + // Calculate the address of the exit point, store it in $gp and load the reference into $t8. + lb $at, (-HEAP_POISON_INSTR_SIZE - 2)($ra) # $ra-HEAP_POISON_INSTR_SIZE-4 points to + # "lw `out`, ...". + andi $at, $at, 31 # Extract `out` from lw. + sll $at, $at, 3 # Multiply `out` by the exit point size (BRB_FIELD_EXIT* macros). + + lw $t8, 0($t8) # $t8 = reference. + UNPOISON_HEAP_REF $t8 + + // Return if null reference. + bnez $t8, .Lintrospection_common + addu $gp, $gp, $at # $gp = address of the exit point. + + // Early return through the exit point. +.Lintrospection_return_early: + jalr $zero, $gp # Move $t8 to `out` and return. + nop + + // Code common for GC roots, fields and array elements. + +.Lintrospection_common: + // Check lock word for mark bit, if marked return. + lw $t9, MIRROR_OBJECT_LOCK_WORD_OFFSET($t8) + sll $at, $t9, 31 - LOCK_WORD_MARK_BIT_SHIFT # Move mark bit to sign bit. + bltz $at, .Lintrospection_return_early +#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) + // The below code depends on the lock word state being in the highest bits + // and the "forwarding address" state having all bits set. +#error "Unexpected lock word state shift or forwarding address state value." +#endif + // Test that both the forwarding state bits are 1. + sll $at, $t9, 1 + and $at, $at, $t9 # Sign bit = 1 IFF both bits are 1. + bgez $at, .Lintrospection_mark + nop + + .set pop + + // Shift left by the forwarding address shift. This clears out the state bits since they are + // in the top 2 bits of the lock word. + jalr $zero, $gp # Move $t8 to `out` and return. + sll $t8, $t9, LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT + +.Lintrospection_mark: + // Partially set up the stack frame preserving only $ra. + addiu $sp, $sp, -160 # Includes 16 bytes of space for argument registers $a0-$a3. + .cfi_adjust_cfa_offset 160 + sw $ra, 156($sp) + .cfi_rel_offset 31, 156 + + // Set up $gp, clobbering $ra and using the branch delay slot for a useful instruction. + bal 1f + sw $gp, 152($sp) # Preserve the exit point address. +1: + .cpload $ra + + // Finalize the stack frame and call. + sw $t7, 148($sp) + .cfi_rel_offset 15, 148 + sw $t6, 144($sp) + .cfi_rel_offset 14, 144 + sw $t5, 140($sp) + .cfi_rel_offset 13, 140 + sw $t4, 136($sp) + .cfi_rel_offset 12, 136 + sw $t3, 132($sp) + .cfi_rel_offset 11, 132 + sw $t2, 128($sp) + .cfi_rel_offset 10, 128 + sw $t1, 124($sp) + .cfi_rel_offset 9, 124 + sw $t0, 120($sp) + .cfi_rel_offset 8, 120 + sw $a3, 116($sp) + .cfi_rel_offset 7, 116 + sw $a2, 112($sp) + .cfi_rel_offset 6, 112 + sw $a1, 108($sp) + .cfi_rel_offset 5, 108 + sw $a0, 104($sp) + .cfi_rel_offset 4, 104 + sw $v1, 100($sp) + .cfi_rel_offset 3, 100 + sw $v0, 96($sp) + .cfi_rel_offset 2, 96 + + la $t9, artReadBarrierMark + + sdc1 $f18, 88($sp) + sdc1 $f16, 80($sp) + sdc1 $f14, 72($sp) + sdc1 $f12, 64($sp) + sdc1 $f10, 56($sp) + sdc1 $f8, 48($sp) + sdc1 $f6, 40($sp) + sdc1 $f4, 32($sp) + sdc1 $f2, 24($sp) + sdc1 $f0, 16($sp) + + jalr $t9 # $v0 <- artReadBarrierMark(reference) + move $a0, $t8 # Pass reference in $a0. + move $t8, $v0 + + lw $ra, 156($sp) + .cfi_restore 31 + lw $gp, 152($sp) # $gp = address of the exit point. + lw $t7, 148($sp) + .cfi_restore 15 + lw $t6, 144($sp) + .cfi_restore 14 + lw $t5, 140($sp) + .cfi_restore 13 + lw $t4, 136($sp) + .cfi_restore 12 + lw $t3, 132($sp) + .cfi_restore 11 + lw $t2, 128($sp) + .cfi_restore 10 + lw $t1, 124($sp) + .cfi_restore 9 + lw $t0, 120($sp) + .cfi_restore 8 + lw $a3, 116($sp) + .cfi_restore 7 + lw $a2, 112($sp) + .cfi_restore 6 + lw $a1, 108($sp) + .cfi_restore 5 + lw $a0, 104($sp) + .cfi_restore 4 + lw $v1, 100($sp) + .cfi_restore 3 + lw $v0, 96($sp) + .cfi_restore 2 + + ldc1 $f18, 88($sp) + ldc1 $f16, 80($sp) + ldc1 $f14, 72($sp) + ldc1 $f12, 64($sp) + ldc1 $f10, 56($sp) + ldc1 $f8, 48($sp) + ldc1 $f6, 40($sp) + ldc1 $f4, 32($sp) + ldc1 $f2, 24($sp) + ldc1 $f0, 16($sp) + + // Return through the exit point. + jalr $zero, $gp # Move $t8 to `out` and return. + addiu $sp, $sp, 160 + .cfi_adjust_cfa_offset -160 + +.Lintrospection_exits: + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT $v0 + BRB_FIELD_EXIT $v1 + BRB_FIELD_EXIT $a0 + BRB_FIELD_EXIT $a1 + BRB_FIELD_EXIT $a2 + BRB_FIELD_EXIT $a3 + BRB_FIELD_EXIT $t0 + BRB_FIELD_EXIT $t1 + BRB_FIELD_EXIT $t2 + BRB_FIELD_EXIT $t3 + BRB_FIELD_EXIT $t4 + BRB_FIELD_EXIT $t5 + BRB_FIELD_EXIT $t6 + BRB_FIELD_EXIT $t7 + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT $s2 + BRB_FIELD_EXIT $s3 + BRB_FIELD_EXIT $s4 + BRB_FIELD_EXIT $s5 + BRB_FIELD_EXIT $s6 + BRB_FIELD_EXIT $s7 + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT $s8 + BRB_FIELD_EXIT_BREAK +END art_quick_read_barrier_mark_introspection + .extern artInvokePolymorphic ENTRY art_quick_invoke_polymorphic SETUP_SAVE_REFS_AND_ARGS_FRAME diff --git a/runtime/arch/mips64/asm_support_mips64.S b/runtime/arch/mips64/asm_support_mips64.S index ef82bd239d..a6b249ae56 100644 --- a/runtime/arch/mips64/asm_support_mips64.S +++ b/runtime/arch/mips64/asm_support_mips64.S @@ -83,6 +83,13 @@ #endif // USE_HEAP_POISONING .endm +// Byte size of the instructions (un)poisoning heap references. +#ifdef USE_HEAP_POISONING +#define HEAP_POISON_INSTR_SIZE 8 +#else +#define HEAP_POISON_INSTR_SIZE 0 +#endif // USE_HEAP_POISONING + // Based on contents of creg select the minimum integer // At the end of the macro the original value of creg is lost .macro MINint dreg,rreg,sreg,creg diff --git a/runtime/arch/mips64/asm_support_mips64.h b/runtime/arch/mips64/asm_support_mips64.h index 9063d20ecf..7185da550c 100644 --- a/runtime/arch/mips64/asm_support_mips64.h +++ b/runtime/arch/mips64/asm_support_mips64.h @@ -28,4 +28,24 @@ // $f0-$f31, $at, $v0-$v1, $a0-$a7, $t0-$t3, $s0-$s7, $t8-$t9, $gp, $s8, $ra + padding + method* #define FRAME_SIZE_SAVE_EVERYTHING 496 +// &art_quick_read_barrier_mark_introspection is the first of many entry points: +// 20 entry points for long field offsets, large array indices and variable array indices +// (see macro BRB_FIELD_LONG_OFFSET_ENTRY) +// 20 entry points for short field offsets and small array indices +// (see macro BRB_FIELD_SHORT_OFFSET_ENTRY) +// 20 entry points for GC roots +// (see macro BRB_GC_ROOT_ENTRY) + +// There are as many entry points of each kind as there are registers that +// can hold a reference: V0-V1, A0-A7, T0-T2, S2-S8. +#define BAKER_MARK_INTROSPECTION_REGISTER_COUNT 20 + +#define BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE (8 * 4) // 8 instructions in + // BRB_FIELD_*_OFFSET_ENTRY. + +#define BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET \ + (2 * BAKER_MARK_INTROSPECTION_REGISTER_COUNT * BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE) + +#define BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRY_SIZE (4 * 4) // 4 instructions in BRB_GC_ROOT_ENTRY. + #endif // ART_RUNTIME_ARCH_MIPS64_ASM_SUPPORT_MIPS64_H_ diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc index 007f7b3915..5e588274d8 100644 --- a/runtime/arch/mips64/entrypoints_init_mips64.cc +++ b/runtime/arch/mips64/entrypoints_init_mips64.cc @@ -17,6 +17,7 @@ #include <math.h> #include <string.h> +#include "arch/mips64/asm_support_mips64.h" #include "atomic.h" #include "entrypoints/jni/jni_entrypoints.h" #include "entrypoints/quick/quick_alloc_entrypoints.h" @@ -59,6 +60,10 @@ extern "C" mirror::Object* art_quick_read_barrier_mark_reg21(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg22(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg29(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots(mirror::Object*); +extern "C" void art_quick_read_barrier_mark_introspection_end_of_entries(void); + // Math entrypoints. extern int32_t CmpgDouble(double a, double b); extern int32_t CmplDouble(double a, double b); @@ -88,6 +93,21 @@ extern "C" int64_t __moddi3(int64_t, int64_t); // No read barrier entrypoints for marking registers. void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { + intptr_t introspection_field_array_entries_size = + reinterpret_cast<intptr_t>(&art_quick_read_barrier_mark_introspection_gc_roots) - + reinterpret_cast<intptr_t>(&art_quick_read_barrier_mark_introspection); + static_assert( + BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET == 2 * + BAKER_MARK_INTROSPECTION_REGISTER_COUNT * BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE, + "Expecting equal"); + DCHECK_EQ(introspection_field_array_entries_size, + BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET); + intptr_t introspection_gc_root_entries_size = + reinterpret_cast<intptr_t>(&art_quick_read_barrier_mark_introspection_end_of_entries) - + reinterpret_cast<intptr_t>(&art_quick_read_barrier_mark_introspection_gc_roots); + DCHECK_EQ(introspection_gc_root_entries_size, + BAKER_MARK_INTROSPECTION_REGISTER_COUNT * BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRY_SIZE); + qpoints->pReadBarrierMarkReg00 = is_active ? art_quick_read_barrier_mark_introspection : nullptr; qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr; qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr; qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr; @@ -173,7 +193,6 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Cannot use the following registers to pass arguments: // 0(ZERO), 1(AT), 15(T3), 16(S0), 17(S1), 24(T8), 25(T9), 26(K0), 27(K1), 28(GP), 29(SP), 31(RA). // Note that there are 30 entry points only: 00 for register 1(AT), ..., 29 for register 30(S8). - qpoints->pReadBarrierMarkReg00 = nullptr; qpoints->pReadBarrierMarkReg14 = nullptr; qpoints->pReadBarrierMarkReg15 = nullptr; qpoints->pReadBarrierMarkReg16 = nullptr; diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S index d427fe320b..fcbed0e2e1 100644 --- a/runtime/arch/mips64/quick_entrypoints_mips64.S +++ b/runtime/arch/mips64/quick_entrypoints_mips64.S @@ -847,7 +847,7 @@ ENTRY_NO_GP_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, FRAME dla $t9, artThrowNullPointerExceptionFromSignal jalr $zero, $t9 # artThrowNullPointerExceptionFromSignal(uinptr_t, Thread*) move $a1, rSELF # pass Thread::Current -END art_quick_throw_null_pointer_exception +END art_quick_throw_null_pointer_exception_from_signal /* * Called by managed code to create and deliver an ArithmeticException @@ -2567,6 +2567,375 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, $s7 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, $s8 // RA (register 31) is reserved. +// Caller code: +// Short constant offset/index: +// ld $t9, pReadBarrierMarkReg00 +// beqzc $t9, skip_call +// nop +// jialc $t9, thunk_disp +// skip_call: +// lwu `out`, ofs(`obj`) +// [dsubu `out`, $zero, `out` +// dext `out`, `out`, 0, 32] # Unpoison reference. +.macro BRB_FIELD_SHORT_OFFSET_ENTRY obj + # Explicit null check. May be redundant (for array elements or when the field + # offset is larger than the page size, 4KB). + # $ra will be adjusted to point to lwu's stack map when throwing NPE. + beqzc \obj, .Lintrospection_throw_npe + lapc $t3, .Lintrospection_exits # $t3 = address of .Lintrospection_exits. + .set push + .set noat + lw $at, MIRROR_OBJECT_LOCK_WORD_OFFSET(\obj) + sll $at, $at, 31 - LOCK_WORD_READ_BARRIER_STATE_SHIFT # Move barrier state bit + # to sign bit. + bltz $at, .Lintrospection_field_array # If gray, load reference, mark. + move $t8, \obj # Move `obj` to $t8 for common code. + .set pop + jalr $zero, $ra # Otherwise, load-load barrier and return. + sync +.endm + +// Caller code: +// Long constant offset/index: | Variable index: +// ld $t9, pReadBarrierMarkReg00 +// beqz $t9, skip_call | beqz $t9, skip_call +// daui $t8, `obj`, ofs_hi | dlsa $t8, `index`, `obj`, 2 +// jialc $t9, thunk_disp | jialc $t9, thunk_disp +// skip_call: | skip_call: +// lwu `out`, ofs_lo($t8) | lwu `out`, ofs($t8) +// [dsubu `out`, $zero, `out` | [dsubu `out`, $zero, `out` +// dext `out`, `out`, 0, 32] | dext `out`, `out`, 0, 32] # Unpoison reference. +.macro BRB_FIELD_LONG_OFFSET_ENTRY obj + # No explicit null check for variable indices or large constant indices/offsets + # as it must have been done earlier. + lapc $t3, .Lintrospection_exits # $t3 = address of .Lintrospection_exits. + .set push + .set noat + lw $at, MIRROR_OBJECT_LOCK_WORD_OFFSET(\obj) + sll $at, $at, 31 - LOCK_WORD_READ_BARRIER_STATE_SHIFT # Move barrier state bit + # to sign bit. + bltzc $at, .Lintrospection_field_array # If gray, load reference, mark. + .set pop + sync # Otherwise, load-load barrier and return. + jic $ra, 0 + break # Padding to 8 instructions. + break +.endm + +.macro BRB_GC_ROOT_ENTRY root + lapc $t3, .Lintrospection_exit_\root # $t3 = exit point address. + bnez \root, .Lintrospection_common + move $t8, \root # Move reference to $t8 for common code. + jic $ra, 0 # Return if null. +.endm + +.macro BRB_FIELD_EXIT out +.Lintrospection_exit_\out: + jalr $zero, $ra + move \out, $t8 # Return reference in expected register. +.endm + +.macro BRB_FIELD_EXIT_BREAK + break + break +.endm + +ENTRY_NO_GP art_quick_read_barrier_mark_introspection + # Entry points for offsets/indices not fitting into int16_t and for variable indices. + BRB_FIELD_LONG_OFFSET_ENTRY $v0 + BRB_FIELD_LONG_OFFSET_ENTRY $v1 + BRB_FIELD_LONG_OFFSET_ENTRY $a0 + BRB_FIELD_LONG_OFFSET_ENTRY $a1 + BRB_FIELD_LONG_OFFSET_ENTRY $a2 + BRB_FIELD_LONG_OFFSET_ENTRY $a3 + BRB_FIELD_LONG_OFFSET_ENTRY $a4 + BRB_FIELD_LONG_OFFSET_ENTRY $a5 + BRB_FIELD_LONG_OFFSET_ENTRY $a6 + BRB_FIELD_LONG_OFFSET_ENTRY $a7 + BRB_FIELD_LONG_OFFSET_ENTRY $t0 + BRB_FIELD_LONG_OFFSET_ENTRY $t1 + BRB_FIELD_LONG_OFFSET_ENTRY $t2 + BRB_FIELD_LONG_OFFSET_ENTRY $s2 + BRB_FIELD_LONG_OFFSET_ENTRY $s3 + BRB_FIELD_LONG_OFFSET_ENTRY $s4 + BRB_FIELD_LONG_OFFSET_ENTRY $s5 + BRB_FIELD_LONG_OFFSET_ENTRY $s6 + BRB_FIELD_LONG_OFFSET_ENTRY $s7 + BRB_FIELD_LONG_OFFSET_ENTRY $s8 + + # Entry points for offsets/indices fitting into int16_t. + BRB_FIELD_SHORT_OFFSET_ENTRY $v0 + BRB_FIELD_SHORT_OFFSET_ENTRY $v1 + BRB_FIELD_SHORT_OFFSET_ENTRY $a0 + BRB_FIELD_SHORT_OFFSET_ENTRY $a1 + BRB_FIELD_SHORT_OFFSET_ENTRY $a2 + BRB_FIELD_SHORT_OFFSET_ENTRY $a3 + BRB_FIELD_SHORT_OFFSET_ENTRY $a4 + BRB_FIELD_SHORT_OFFSET_ENTRY $a5 + BRB_FIELD_SHORT_OFFSET_ENTRY $a6 + BRB_FIELD_SHORT_OFFSET_ENTRY $a7 + BRB_FIELD_SHORT_OFFSET_ENTRY $t0 + BRB_FIELD_SHORT_OFFSET_ENTRY $t1 + BRB_FIELD_SHORT_OFFSET_ENTRY $t2 + BRB_FIELD_SHORT_OFFSET_ENTRY $s2 + BRB_FIELD_SHORT_OFFSET_ENTRY $s3 + BRB_FIELD_SHORT_OFFSET_ENTRY $s4 + BRB_FIELD_SHORT_OFFSET_ENTRY $s5 + BRB_FIELD_SHORT_OFFSET_ENTRY $s6 + BRB_FIELD_SHORT_OFFSET_ENTRY $s7 + BRB_FIELD_SHORT_OFFSET_ENTRY $s8 + + .global art_quick_read_barrier_mark_introspection_gc_roots +art_quick_read_barrier_mark_introspection_gc_roots: + # Entry points for GC roots. + BRB_GC_ROOT_ENTRY $v0 + BRB_GC_ROOT_ENTRY $v1 + BRB_GC_ROOT_ENTRY $a0 + BRB_GC_ROOT_ENTRY $a1 + BRB_GC_ROOT_ENTRY $a2 + BRB_GC_ROOT_ENTRY $a3 + BRB_GC_ROOT_ENTRY $a4 + BRB_GC_ROOT_ENTRY $a5 + BRB_GC_ROOT_ENTRY $a6 + BRB_GC_ROOT_ENTRY $a7 + BRB_GC_ROOT_ENTRY $t0 + BRB_GC_ROOT_ENTRY $t1 + BRB_GC_ROOT_ENTRY $t2 + BRB_GC_ROOT_ENTRY $s2 + BRB_GC_ROOT_ENTRY $s3 + BRB_GC_ROOT_ENTRY $s4 + BRB_GC_ROOT_ENTRY $s5 + BRB_GC_ROOT_ENTRY $s6 + BRB_GC_ROOT_ENTRY $s7 + BRB_GC_ROOT_ENTRY $s8 + .global art_quick_read_barrier_mark_introspection_end_of_entries +art_quick_read_barrier_mark_introspection_end_of_entries: + +.Lintrospection_throw_npe: + b art_quick_throw_null_pointer_exception + daddiu $ra, $ra, 4 # Skip lwu, make $ra point to lwu's stack map. + + .set push + .set noat + + // Fields and array elements. + +.Lintrospection_field_array: + // Get the field/element address using $t8 and the offset from the lwu instruction. + lh $at, 0($ra) # $ra points to lwu: $at = low 16 bits of field/element offset. + daddiu $ra, $ra, 4 + HEAP_POISON_INSTR_SIZE # Skip lwu(+dsubu+dext). + daddu $t8, $t8, $at # $t8 = field/element address. + + // Calculate the address of the exit point, store it in $t3 and load the reference into $t8. + lb $at, (-HEAP_POISON_INSTR_SIZE - 2)($ra) # $ra-HEAP_POISON_INSTR_SIZE-4 points to + # "lwu `out`, ...". + andi $at, $at, 31 # Extract `out` from lwu. + + lwu $t8, 0($t8) # $t8 = reference. + UNPOISON_HEAP_REF $t8 + + // Return if null reference. + bnez $t8, .Lintrospection_common + dlsa $t3, $at, $t3, 3 # $t3 = address of the exit point + # (BRB_FIELD_EXIT* macro is 8 bytes). + + // Early return through the exit point. +.Lintrospection_return_early: + jic $t3, 0 # Move $t8 to `out` and return. + + // Code common for GC roots, fields and array elements. + +.Lintrospection_common: + // Check lock word for mark bit, if marked return. + lw $t9, MIRROR_OBJECT_LOCK_WORD_OFFSET($t8) + sll $at, $t9, 31 - LOCK_WORD_MARK_BIT_SHIFT # Move mark bit to sign bit. + bltzc $at, .Lintrospection_return_early +#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) + // The below code depends on the lock word state being in the highest bits + // and the "forwarding address" state having all bits set. +#error "Unexpected lock word state shift or forwarding address state value." +#endif + // Test that both the forwarding state bits are 1. + sll $at, $t9, 1 + and $at, $at, $t9 # Sign bit = 1 IFF both bits are 1. + bgezc $at, .Lintrospection_mark + + .set pop + + // Shift left by the forwarding address shift. This clears out the state bits since they are + // in the top 2 bits of the lock word. + sll $t8, $t9, LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT + jalr $zero, $t3 # Move $t8 to `out` and return. + dext $t8, $t8, 0, 32 # Make sure the address is zero-extended. + +.Lintrospection_mark: + // Partially set up the stack frame preserving only $ra. + daddiu $sp, $sp, -320 + .cfi_adjust_cfa_offset 320 + sd $ra, 312($sp) + .cfi_rel_offset 31, 312 + + // Set up $gp, clobbering $ra. + lapc $ra, 1f +1: + .cpsetup $ra, 304, 1b # Save old $gp in 304($sp). + + // Finalize the stack frame and call. + sd $t3, 296($sp) # Preserve the exit point address. + sd $t2, 288($sp) + .cfi_rel_offset 14, 288 + sd $t1, 280($sp) + .cfi_rel_offset 13, 280 + sd $t0, 272($sp) + .cfi_rel_offset 12, 272 + sd $a7, 264($sp) + .cfi_rel_offset 11, 264 + sd $a6, 256($sp) + .cfi_rel_offset 10, 256 + sd $a5, 248($sp) + .cfi_rel_offset 9, 248 + sd $a4, 240($sp) + .cfi_rel_offset 8, 240 + sd $a3, 232($sp) + .cfi_rel_offset 7, 232 + sd $a2, 224($sp) + .cfi_rel_offset 6, 224 + sd $a1, 216($sp) + .cfi_rel_offset 5, 216 + sd $a0, 208($sp) + .cfi_rel_offset 4, 208 + sd $v1, 200($sp) + .cfi_rel_offset 3, 200 + sd $v0, 192($sp) + .cfi_rel_offset 2, 192 + + dla $t9, artReadBarrierMark + + sdc1 $f23, 184($sp) + sdc1 $f22, 176($sp) + sdc1 $f21, 168($sp) + sdc1 $f20, 160($sp) + sdc1 $f19, 152($sp) + sdc1 $f18, 144($sp) + sdc1 $f17, 136($sp) + sdc1 $f16, 128($sp) + sdc1 $f15, 120($sp) + sdc1 $f14, 112($sp) + sdc1 $f13, 104($sp) + sdc1 $f12, 96($sp) + sdc1 $f11, 88($sp) + sdc1 $f10, 80($sp) + sdc1 $f9, 72($sp) + sdc1 $f8, 64($sp) + sdc1 $f7, 56($sp) + sdc1 $f6, 48($sp) + sdc1 $f5, 40($sp) + sdc1 $f4, 32($sp) + sdc1 $f3, 24($sp) + sdc1 $f2, 16($sp) + sdc1 $f1, 8($sp) + sdc1 $f0, 0($sp) + + jalr $t9 # $v0 <- artReadBarrierMark(reference) + move $a0, $t8 # Pass reference in $a0. + move $t8, $v0 + + ld $ra, 312($sp) + .cfi_restore 31 + .cpreturn # Restore old $gp from 304($sp). + ld $t3, 296($sp) # $t3 = address of the exit point. + ld $t2, 288($sp) + .cfi_restore 14 + ld $t1, 280($sp) + .cfi_restore 13 + ld $t0, 272($sp) + .cfi_restore 12 + ld $a7, 264($sp) + .cfi_restore 11 + ld $a6, 256($sp) + .cfi_restore 10 + ld $a5, 248($sp) + .cfi_restore 9 + ld $a4, 240($sp) + .cfi_restore 8 + ld $a3, 232($sp) + .cfi_restore 7 + ld $a2, 224($sp) + .cfi_restore 6 + ld $a1, 216($sp) + .cfi_restore 5 + ld $a0, 208($sp) + .cfi_restore 4 + ld $v1, 200($sp) + .cfi_restore 3 + ld $v0, 192($sp) + .cfi_restore 2 + + ldc1 $f23, 184($sp) + ldc1 $f22, 176($sp) + ldc1 $f21, 168($sp) + ldc1 $f20, 160($sp) + ldc1 $f19, 152($sp) + ldc1 $f18, 144($sp) + ldc1 $f17, 136($sp) + ldc1 $f16, 128($sp) + ldc1 $f15, 120($sp) + ldc1 $f14, 112($sp) + ldc1 $f13, 104($sp) + ldc1 $f12, 96($sp) + ldc1 $f11, 88($sp) + ldc1 $f10, 80($sp) + ldc1 $f9, 72($sp) + ldc1 $f8, 64($sp) + ldc1 $f7, 56($sp) + ldc1 $f6, 48($sp) + ldc1 $f5, 40($sp) + ldc1 $f4, 32($sp) + ldc1 $f3, 24($sp) + ldc1 $f2, 16($sp) + ldc1 $f1, 8($sp) + ldc1 $f0, 0($sp) + + // Return through the exit point. + jalr $zero, $t3 # Move $t8 to `out` and return. + daddiu $sp, $sp, 320 + .cfi_adjust_cfa_offset -320 + +.Lintrospection_exits: + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT $v0 + BRB_FIELD_EXIT $v1 + BRB_FIELD_EXIT $a0 + BRB_FIELD_EXIT $a1 + BRB_FIELD_EXIT $a2 + BRB_FIELD_EXIT $a3 + BRB_FIELD_EXIT $a4 + BRB_FIELD_EXIT $a5 + BRB_FIELD_EXIT $a6 + BRB_FIELD_EXIT $a7 + BRB_FIELD_EXIT $t0 + BRB_FIELD_EXIT $t1 + BRB_FIELD_EXIT $t2 + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT $s2 + BRB_FIELD_EXIT $s3 + BRB_FIELD_EXIT $s4 + BRB_FIELD_EXIT $s5 + BRB_FIELD_EXIT $s6 + BRB_FIELD_EXIT $s7 + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT_BREAK + BRB_FIELD_EXIT $s8 + BRB_FIELD_EXIT_BREAK +END art_quick_read_barrier_mark_introspection + .extern artInvokePolymorphic ENTRY art_quick_invoke_polymorphic SETUP_SAVE_REFS_AND_ARGS_FRAME diff --git a/runtime/oat.h b/runtime/oat.h index de4b942bad..c4a983e78b 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,8 +32,8 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - // Last oat version changed reason: update kMultiDexSeparator from ':' to '!'. - static constexpr uint8_t kOatVersion[] = { '1', '3', '0', '\0' }; + // Last oat version changed reason: MIPS Baker thunks. + static constexpr uint8_t kOatVersion[] = { '1', '3', '1', '\0' }; static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; |