diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 40 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips.cc | 447 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips.h | 8 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips64.cc | 360 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips64.h | 5 | ||||
-rw-r--r-- | compiler/optimizing/common_arm.h | 8 | ||||
-rw-r--r-- | compiler/optimizing/instruction_builder.cc | 64 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier.cc | 18 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier_arm.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/reference_type_propagation.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/scheduler_arm.cc | 341 | ||||
-rw-r--r-- | compiler/optimizing/scheduler_arm.h | 11 |
12 files changed, 1090 insertions, 220 deletions
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index b9d4700511..430cdde1f7 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -8269,19 +8269,41 @@ void InstructionCodeGeneratorARMVIXL::VisitDataProcWithShifterOp( const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); if (instruction->GetType() == Primitive::kPrimInt) { - DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind)); - + const vixl32::Register first = InputRegisterAt(instruction, 0); + const vixl32::Register output = OutputRegister(instruction); const vixl32::Register second = instruction->InputAt(1)->GetType() == Primitive::kPrimLong ? LowRegisterFrom(locations->InAt(1)) : InputRegisterAt(instruction, 1); - GenerateDataProcInstruction(kind, - OutputRegister(instruction), - InputRegisterAt(instruction, 0), - Operand(second, - ShiftFromOpKind(op_kind), - instruction->GetShiftAmount()), - codegen_); + if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { + DCHECK_EQ(kind, HInstruction::kAdd); + + switch (op_kind) { + case HDataProcWithShifterOp::kUXTB: + __ Uxtab(output, first, second); + break; + case HDataProcWithShifterOp::kUXTH: + __ Uxtah(output, first, second); + break; + case HDataProcWithShifterOp::kSXTB: + __ Sxtab(output, first, second); + break; + case HDataProcWithShifterOp::kSXTH: + __ Sxtah(output, first, second); + break; + default: + LOG(FATAL) << "Unexpected operation kind: " << op_kind; + UNREACHABLE(); + } + } else { + GenerateDataProcInstruction(kind, + output, + first, + Operand(second, + ShiftFromOpKind(op_kind), + instruction->GetShiftAmount()), + codegen_); + } } else { DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong); diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 23d188d630..b6eb5c1d1d 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -16,6 +16,7 @@ #include "code_generator_mips.h" +#include "arch/mips/asm_support_mips.h" #include "arch/mips/entrypoints_direct_mips.h" #include "arch/mips/instruction_set_features_mips.h" #include "art_method.h" @@ -40,6 +41,11 @@ namespace mips { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = A0; +// Flags controlling the use of thunks for Baker read barriers. +constexpr bool kBakerReadBarrierThunksEnableForFields = true; +constexpr bool kBakerReadBarrierThunksEnableForArrays = true; +constexpr bool kBakerReadBarrierThunksEnableForGcRoots = true; + Location MipsReturnLocation(Primitive::Type return_type) { switch (return_type) { case Primitive::kPrimBoolean: @@ -1486,7 +1492,8 @@ void CodeGeneratorMIPS::MoveLocation(Location destination, __ Mfc1(dst_low, src); __ MoveFromFpuHigh(dst_high, src); } else { - DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination; + DCHECK(source.IsDoubleStackSlot()) + << "Cannot move from " << source << " to " << destination; int32_t off = source.GetStackIndex(); Register r = destination.AsRegisterPairLow<Register>(); __ LoadFromOffset(kLoadDoubleword, r, SP, off); @@ -1539,7 +1546,8 @@ void CodeGeneratorMIPS::MoveLocation(Location destination, } else if (source.IsFpuRegister()) { __ StoreDToOffset(source.AsFpuRegister<FRegister>(), SP, dst_offset); } else { - DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination; + DCHECK(source.IsDoubleStackSlot()) + << "Cannot move from " << source << " to " << destination; __ LoadFromOffset(kLoadWord, TMP, SP, source.GetStackIndex()); __ StoreToOffset(kStoreWord, TMP, SP, dst_offset); __ LoadFromOffset(kLoadWord, TMP, SP, source.GetStackIndex() + 4); @@ -1763,8 +1771,10 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo } // A following instruction will add the sign-extended low half of the 32-bit // offset to `out` (e.g. lw, jialc, addiu). - DCHECK_EQ(info_low->patch_info_high, info_high); - __ Bind(&info_low->label); + if (info_low != nullptr) { + DCHECK_EQ(info_low->patch_info_high, info_high); + __ Bind(&info_low->label); + } } CodeGeneratorMIPS::JitPatchInfo* CodeGeneratorMIPS::NewJitRootStringPatch( @@ -1791,25 +1801,26 @@ void CodeGeneratorMIPS::PatchJitRootUse(uint8_t* code, const uint8_t* roots_data, const CodeGeneratorMIPS::JitPatchInfo& info, uint64_t index_in_table) const { - uint32_t literal_offset = GetAssembler().GetLabelLocation(&info.high_label); + uint32_t high_literal_offset = GetAssembler().GetLabelLocation(&info.high_label); + uint32_t low_literal_offset = GetAssembler().GetLabelLocation(&info.low_label); uintptr_t address = reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); uint32_t addr32 = dchecked_integral_cast<uint32_t>(address); // lui reg, addr32_high - DCHECK_EQ(code[literal_offset + 0], 0x34); - DCHECK_EQ(code[literal_offset + 1], 0x12); - DCHECK_EQ((code[literal_offset + 2] & 0xE0), 0x00); - DCHECK_EQ(code[literal_offset + 3], 0x3C); + DCHECK_EQ(code[high_literal_offset + 0], 0x34); + DCHECK_EQ(code[high_literal_offset + 1], 0x12); + DCHECK_EQ((code[high_literal_offset + 2] & 0xE0), 0x00); + DCHECK_EQ(code[high_literal_offset + 3], 0x3C); // instr reg, reg, addr32_low - DCHECK_EQ(code[literal_offset + 4], 0x78); - DCHECK_EQ(code[literal_offset + 5], 0x56); + DCHECK_EQ(code[low_literal_offset + 0], 0x78); + DCHECK_EQ(code[low_literal_offset + 1], 0x56); addr32 += (addr32 & 0x8000) << 1; // Account for sign extension in "instr reg, reg, addr32_low". // lui reg, addr32_high - code[literal_offset + 0] = static_cast<uint8_t>(addr32 >> 16); - code[literal_offset + 1] = static_cast<uint8_t>(addr32 >> 24); + code[high_literal_offset + 0] = static_cast<uint8_t>(addr32 >> 16); + code[high_literal_offset + 1] = static_cast<uint8_t>(addr32 >> 24); // instr reg, reg, addr32_low - code[literal_offset + 4] = static_cast<uint8_t>(addr32 >> 0); - code[literal_offset + 5] = static_cast<uint8_t>(addr32 >> 8); + code[low_literal_offset + 0] = static_cast<uint8_t>(addr32 >> 0); + code[low_literal_offset + 1] = static_cast<uint8_t>(addr32 >> 8); } void CodeGeneratorMIPS::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { @@ -2545,7 +2556,12 @@ void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier. if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - locations->AddTemp(Location::RequiresRegister()); + bool temp_needed = instruction->GetIndex()->IsConstant() + ? !kBakerReadBarrierThunksEnableForFields + : !kBakerReadBarrierThunksEnableForArrays; + if (temp_needed) { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -2681,16 +2697,32 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); + bool temp_needed = index.IsConstant() + ? !kBakerReadBarrierThunksEnableForFields + : !kBakerReadBarrierThunksEnableForArrays; + Location temp = temp_needed ? locations->GetTemp(0) : Location::NoLocation(); // Note that a potential implicit null check is handled in this // CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier call. - codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, - out_loc, - obj, - data_offset, - index, - temp, - /* needs_null_check */ true); + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); + if (index.IsConstant()) { + // Array load with a constant index can be treated as a field load. + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + offset, + temp, + /* needs_null_check */ false); + } else { + codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + index, + temp, + /* needs_null_check */ false); + } } else { Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { @@ -3093,6 +3125,7 @@ void InstructionCodeGeneratorMIPS::VisitBoundsCheck(HBoundsCheck* instruction) { // Temp is used for read barrier. static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { if (kEmitCompilerReadBarrier && + !(kUseBakerReadBarrier && kBakerReadBarrierThunksEnableForFields) && (kUseBakerReadBarrier || type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -6096,7 +6129,9 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + if (!kBakerReadBarrierThunksEnableForFields) { + locations->AddTemp(Location::RequiresRegister()); + } } } } @@ -6171,7 +6206,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, if (type == Primitive::kPrimNot) { // /* HeapReference<Object> */ dst = *(obj + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = locations->GetTemp(0); + Location temp_loc = + kBakerReadBarrierThunksEnableForFields ? Location::NoLocation() : locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, @@ -6395,7 +6431,9 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadOneRegister( Register out_reg = out.AsRegister<Register>(); if (read_barrier_option == kWithReadBarrier) { CHECK(kEmitCompilerReadBarrier); - DCHECK(maybe_temp.IsRegister()) << maybe_temp; + if (!kUseBakerReadBarrier || !kBakerReadBarrierThunksEnableForFields) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + } if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) @@ -6435,7 +6473,9 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters( if (read_barrier_option == kWithReadBarrier) { CHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; + if (!kBakerReadBarrierThunksEnableForFields) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + } // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, @@ -6458,67 +6498,172 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters( } } +static inline int GetBakerMarkThunkNumber(Register reg) { + static_assert(BAKER_MARK_INTROSPECTION_REGISTER_COUNT == 21, "Expecting equal"); + if (reg >= V0 && reg <= T7) { // 14 consequtive regs. + return reg - V0; + } else if (reg >= S2 && reg <= S7) { // 6 consequtive regs. + return 14 + (reg - S2); + } else if (reg == FP) { // One more. + return 20; + } + LOG(FATAL) << "Unexpected register " << reg; + UNREACHABLE(); +} + +static inline int GetBakerMarkFieldArrayThunkDisplacement(Register reg, bool short_offset) { + int num = GetBakerMarkThunkNumber(reg) + + (short_offset ? BAKER_MARK_INTROSPECTION_REGISTER_COUNT : 0); + return num * BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE; +} + +static inline int GetBakerMarkGcRootThunkDisplacement(Register reg) { + return GetBakerMarkThunkNumber(reg) * BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRY_SIZE + + BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET; +} + void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruction, Location root, Register obj, uint32_t offset, - ReadBarrierOption read_barrier_option) { + ReadBarrierOption read_barrier_option, + MipsLabel* label_low) { + bool reordering; + if (label_low != nullptr) { + DCHECK_EQ(offset, 0x5678u); + } Register root_reg = root.AsRegister<Register>(); if (read_barrier_option == kWithReadBarrier) { DCHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used: - // - // root = obj.field; - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // if (temp != null) { - // root = temp(root) - // } - - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - __ LoadFromOffset(kLoadWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - // Slow path marking the GC root `root`. - Location temp = Location::RegisterLocation(T9); - SlowPathCodeMIPS* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS( - instruction, - root, - /*entrypoint*/ temp); - codegen_->AddSlowPath(slow_path); + if (kBakerReadBarrierThunksEnableForGcRoots) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { + // temp = &gc_root_thunk<root_reg> + // root = temp(root) + // } + + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(0); + const int thunk_disp = GetBakerMarkGcRootThunkDisplacement(root_reg); + int16_t offset_low = Low16Bits(offset); + int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign + // extension in lw. + bool short_offset = IsInt<16>(static_cast<int32_t>(offset)); + Register base = short_offset ? obj : TMP; + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); + reordering = __ SetReorder(false); + if (!short_offset) { + DCHECK(!label_low); + __ AddUpper(base, obj, offset_high); + } + __ Beqz(T9, (isR6 ? 2 : 4)); // Skip jialc / addiu+jalr+nop. + if (label_low != nullptr) { + DCHECK(short_offset); + __ Bind(label_low); + } + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, base, offset_low); // Single instruction + // in delay slot. + if (isR6) { + __ Jialc(T9, thunk_disp); + } else { + __ Addiu(T9, T9, thunk_disp); + __ Jalr(T9); + __ Nop(); + } + __ SetReorder(reordering); + } else { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (T9) the read barrier mark entry point corresponding + // to register `root`. If `temp` is null, it means that `GetIsGcMarking()` + // is false, and vice versa. + // + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp != null) { + // root = temp(root) + // } + + if (label_low != nullptr) { + reordering = __ SetReorder(false); + __ Bind(label_low); + } + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + if (label_low != nullptr) { + __ SetReorder(reordering); + } + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path marking the GC root `root`. + Location temp = Location::RegisterLocation(T9); + SlowPathCodeMIPS* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS( + instruction, + root, + /*entrypoint*/ temp); + codegen_->AddSlowPath(slow_path); - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ Bnez(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); + __ Bnez(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } else { + if (label_low != nullptr) { + reordering = __ SetReorder(false); + __ Bind(label_low); + } // GC root loaded through a slow path for read barriers other // than Baker's. // /* GcRoot<mirror::Object>* */ root = obj + offset __ Addiu32(root_reg, obj, offset); + if (label_low != nullptr) { + __ SetReorder(reordering); + } // /* mirror::Object* */ root = root->Read() codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); } } else { + if (label_low != nullptr) { + reordering = __ SetReorder(false); + __ Bind(label_low); + } // Plain GC root load with no read barrier. // /* GcRoot<mirror::Object> */ root = *(obj + offset) __ LoadFromOffset(kLoadWord, root_reg, obj, offset); // Note that GC roots are not affected by heap poisoning, thus we // do not have to unpoison `root_reg` here. + if (label_low != nullptr) { + __ SetReorder(reordering); + } } } @@ -6531,6 +6676,88 @@ void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* inst DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierThunksEnableForFields) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. If the holder is not gray, + // it issues a load-load memory barrier and returns to the original + // reference load. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // if (temp != nullptr) { + // temp = &field_array_thunk<holder_reg> + // temp() + // } + // not_gray_return_address: + // // If the offset is too large to fit into the lw instruction, we + // // use an adjusted base register (TMP) here. This register + // // receives bits 16 ... 31 of the offset before the thunk invocation + // // and the thunk benefits from it. + // HeapReference<mirror::Object> reference = *(obj+offset); // Original reference load. + // gray_return_address: + + DCHECK(temp.IsInvalid()); + bool isR6 = GetInstructionSetFeatures().IsR6(); + int16_t offset_low = Low16Bits(offset); + int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign extension in lw. + bool short_offset = IsInt<16>(static_cast<int32_t>(offset)); + bool reordering = __ SetReorder(false); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(0); + // There may have or may have not been a null check if the field offset is smaller than + // the page size. + // There must've been a null check in case it's actually a load from an array. + // We will, however, perform an explicit null check in the thunk as it's easier to + // do it than not. + if (instruction->IsArrayGet()) { + DCHECK(!needs_null_check); + } + const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, short_offset); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); + Register ref_reg = ref.AsRegister<Register>(); + Register base = short_offset ? obj : TMP; + if (short_offset) { + if (isR6) { + __ Beqzc(T9, 2); // Skip jialc. + __ Nop(); // In forbidden slot. + __ Jialc(T9, thunk_disp); + } else { + __ Beqz(T9, 3); // Skip jalr+nop. + __ Addiu(T9, T9, thunk_disp); // In delay slot. + __ Jalr(T9); + __ Nop(); // In delay slot. + } + } else { + if (isR6) { + __ Beqz(T9, 2); // Skip jialc. + __ Aui(base, obj, offset_high); // In delay slot. + __ Jialc(T9, thunk_disp); + } else { + __ Lui(base, offset_high); + __ Beqz(T9, 2); // Skip jalr. + __ Addiu(T9, T9, thunk_disp); // In delay slot. + __ Jalr(T9); + __ Addu(base, base, obj); // In delay slot. + } + } + // /* HeapReference<Object> */ ref = *(obj + offset) + __ LoadFromOffset(kLoadWord, ref_reg, base, offset_low); // Single instruction. + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + __ MaybeUnpoisonHeapReference(ref_reg); + __ SetReorder(reordering); + return; + } + // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); ScaleFactor no_scale_factor = TIMES_1; @@ -6557,9 +6784,69 @@ void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* inst static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + ScaleFactor scale_factor = TIMES_4; + + if (kBakerReadBarrierThunksEnableForArrays) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. If the holder is not gray, + // it issues a load-load memory barrier and returns to the original + // reference load. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // if (temp != nullptr) { + // temp = &field_array_thunk<holder_reg> + // temp() + // } + // not_gray_return_address: + // // The element address is pre-calculated in the TMP register before the + // // thunk invocation and the thunk benefits from it. + // HeapReference<mirror::Object> reference = data[index]; // Original reference load. + // gray_return_address: + + DCHECK(temp.IsInvalid()); + DCHECK(index.IsValid()); + bool reordering = __ SetReorder(false); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(0); + // We will not do the explicit null check in the thunk as some form of a null check + // must've been done earlier. + DCHECK(!needs_null_check); + const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset */ false); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); + Register ref_reg = ref.AsRegister<Register>(); + Register index_reg = index.IsRegisterPair() + ? index.AsRegisterPairLow<Register>() + : index.AsRegister<Register>(); + if (GetInstructionSetFeatures().IsR6()) { + __ Beqz(T9, 2); // Skip jialc. + __ Lsa(TMP, index_reg, obj, scale_factor); // In delay slot. + __ Jialc(T9, thunk_disp); + } else { + __ Sll(TMP, index_reg, scale_factor); + __ Beqz(T9, 2); // Skip jalr. + __ Addiu(T9, T9, thunk_disp); // In delay slot. + __ Jalr(T9); + __ Addu(TMP, TMP, obj); // In delay slot. + } + // /* HeapReference<Object> */ ref = *(obj + data_offset + (index << scale_factor)) + DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))) << data_offset; + __ LoadFromOffset(kLoadWord, ref_reg, TMP, data_offset); // Single instruction. + __ MaybeUnpoisonHeapReference(ref_reg); + __ SetReorder(reordering); + return; + } + // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - ScaleFactor scale_factor = TIMES_4; GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, @@ -7461,10 +7748,14 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF bool reordering = __ SetReorder(false); codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high, temp, - base_or_current_method_reg, - info_low); - GenerateGcRootFieldLoad(cls, out_loc, temp, /* placeholder */ 0x5678, read_barrier_option); + base_or_current_method_reg); __ SetReorder(reordering); + GenerateGcRootFieldLoad(cls, + out_loc, + temp, + /* placeholder */ 0x5678, + read_barrier_option, + &info_low->label); generate_null_check = true; break; } @@ -7475,8 +7766,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF bool reordering = __ SetReorder(false); __ Bind(&info->high_label); __ Lui(out, /* placeholder */ 0x1234); - GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option); __ SetReorder(reordering); + GenerateGcRootFieldLoad(cls, + out_loc, + out, + /* placeholder */ 0x5678, + read_barrier_option, + &info->low_label); break; } case HLoadClass::LoadKind::kRuntimeCall: @@ -7623,14 +7919,14 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ bool reordering = __ SetReorder(false); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, temp, - base_or_current_method_reg, - info_low); + base_or_current_method_reg); + __ SetReorder(reordering); GenerateGcRootFieldLoad(load, out_loc, temp, /* placeholder */ 0x5678, - kCompilerReadBarrierOption); - __ SetReorder(reordering); + kCompilerReadBarrierOption, + &info_low->label); SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load, info_high); codegen_->AddSlowPath(slow_path); @@ -7646,12 +7942,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ bool reordering = __ SetReorder(false); __ Bind(&info->high_label); __ Lui(out, /* placeholder */ 0x1234); + __ SetReorder(reordering); GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678, - kCompilerReadBarrierOption); - __ SetReorder(reordering); + kCompilerReadBarrierOption, + &info->low_label); return; } default: diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 52ee852269..7195b9d89d 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -285,7 +285,8 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { Location root, Register obj, uint32_t offset, - ReadBarrierOption read_barrier_option); + ReadBarrierOption read_barrier_option, + MipsLabel* label_low = nullptr); void GenerateIntCompare(IfCondition cond, LocationSummary* locations); // When the function returns `false` it means that the condition holds if `dst` is non-zero @@ -637,7 +638,7 @@ class CodeGeneratorMIPS : public CodeGenerator { void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high, Register out, Register base, - PcRelativePatchInfo* info_low); + PcRelativePatchInfo* info_low = nullptr); // The JitPatchInfo is used for JIT string and class loads. struct JitPatchInfo { @@ -649,8 +650,9 @@ class CodeGeneratorMIPS : public CodeGenerator { // String/type index. uint64_t index; // Label for the instruction loading the most significant half of the address. - // The least significant half is loaded with the instruction that follows immediately. MipsLabel high_label; + // Label for the instruction supplying the least significant half of the address. + MipsLabel low_label; }; void PatchJitRootUse(uint8_t* code, diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 454a2ddc14..3e79f474b6 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -16,6 +16,7 @@ #include "code_generator_mips64.h" +#include "arch/mips64/asm_support_mips64.h" #include "art_method.h" #include "code_generator_utils.h" #include "compiled_method.h" @@ -38,6 +39,11 @@ namespace mips64 { static constexpr int kCurrentMethodStackOffset = 0; static constexpr GpuRegister kMethodRegisterArgument = A0; +// Flags controlling the use of thunks for Baker read barriers. +constexpr bool kBakerReadBarrierThunksEnableForFields = true; +constexpr bool kBakerReadBarrierThunksEnableForArrays = true; +constexpr bool kBakerReadBarrierThunksEnableForGcRoots = true; + Location Mips64ReturnLocation(Primitive::Type return_type) { switch (return_type) { case Primitive::kPrimBoolean: @@ -1649,8 +1655,10 @@ void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchIn __ Auipc(out, /* placeholder */ 0x1234); // A following instruction will add the sign-extended low half of the 32-bit // offset to `out` (e.g. ld, jialc, daddiu). - DCHECK_EQ(info_low->patch_info_high, info_high); - __ Bind(&info_low->label); + if (info_low != nullptr) { + DCHECK_EQ(info_low->patch_info_high, info_high); + __ Bind(&info_low->label); + } } Literal* CodeGeneratorMIPS64::DeduplicateJitStringLiteral(const DexFile& dex_file, @@ -2117,7 +2125,12 @@ void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier. if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - locations->AddTemp(Location::RequiresRegister()); + bool temp_needed = instruction->GetIndex()->IsConstant() + ? !kBakerReadBarrierThunksEnableForFields + : !kBakerReadBarrierThunksEnableForArrays; + if (temp_needed) { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -2254,16 +2267,32 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); + bool temp_needed = index.IsConstant() + ? !kBakerReadBarrierThunksEnableForFields + : !kBakerReadBarrierThunksEnableForArrays; + Location temp = temp_needed ? locations->GetTemp(0) : Location::NoLocation(); // Note that a potential implicit null check is handled in this // CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier call. - codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, - out_loc, - obj, - data_offset, - index, - temp, - /* needs_null_check */ true); + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); + if (index.IsConstant()) { + // Array load with a constant index can be treated as a field load. + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + offset, + temp, + /* needs_null_check */ false); + } else { + codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + index, + temp, + /* needs_null_check */ false); + } } else { GpuRegister out = out_loc.AsRegister<GpuRegister>(); if (index.IsConstant()) { @@ -2666,6 +2695,7 @@ void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) // Temp is used for read barrier. static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { if (kEmitCompilerReadBarrier && + !(kUseBakerReadBarrier && kBakerReadBarrierThunksEnableForFields) && (kUseBakerReadBarrier || type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -4118,7 +4148,9 @@ void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction, if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + if (!kBakerReadBarrierThunksEnableForFields) { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -4168,7 +4200,8 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction, if (type == Primitive::kPrimNot) { // /* HeapReference<Object> */ dst = *(obj + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = locations->GetTemp(0); + Location temp_loc = + kBakerReadBarrierThunksEnableForFields ? Location::NoLocation() : locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, @@ -4318,7 +4351,9 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadOneRegister( GpuRegister out_reg = out.AsRegister<GpuRegister>(); if (read_barrier_option == kWithReadBarrier) { CHECK(kEmitCompilerReadBarrier); - DCHECK(maybe_temp.IsRegister()) << maybe_temp; + if (!kUseBakerReadBarrier || !kBakerReadBarrierThunksEnableForFields) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + } if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) @@ -4358,7 +4393,9 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters( if (read_barrier_option == kWithReadBarrier) { CHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; + if (!kBakerReadBarrierThunksEnableForFields) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + } // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, @@ -4381,55 +4418,134 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters( } } -void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad( - HInstruction* instruction, - Location root, - GpuRegister obj, - uint32_t offset, - ReadBarrierOption read_barrier_option) { +static inline int GetBakerMarkThunkNumber(GpuRegister reg) { + static_assert(BAKER_MARK_INTROSPECTION_REGISTER_COUNT == 20, "Expecting equal"); + if (reg >= V0 && reg <= T2) { // 13 consequtive regs. + return reg - V0; + } else if (reg >= S2 && reg <= S7) { // 6 consequtive regs. + return 13 + (reg - S2); + } else if (reg == S8) { // One more. + return 19; + } + LOG(FATAL) << "Unexpected register " << reg; + UNREACHABLE(); +} + +static inline int GetBakerMarkFieldArrayThunkDisplacement(GpuRegister reg, bool short_offset) { + int num = GetBakerMarkThunkNumber(reg) + + (short_offset ? BAKER_MARK_INTROSPECTION_REGISTER_COUNT : 0); + return num * BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE; +} + +static inline int GetBakerMarkGcRootThunkDisplacement(GpuRegister reg) { + return GetBakerMarkThunkNumber(reg) * BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRY_SIZE + + BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET; +} + +void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + GpuRegister obj, + uint32_t offset, + ReadBarrierOption read_barrier_option, + Mips64Label* label_low) { + if (label_low != nullptr) { + DCHECK_EQ(offset, 0x5678u); + } GpuRegister root_reg = root.AsRegister<GpuRegister>(); if (read_barrier_option == kWithReadBarrier) { DCHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used: - // - // root = obj.field; - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // if (temp != null) { - // root = temp(root) - // } - - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - // Slow path marking the GC root `root`. - Location temp = Location::RegisterLocation(T9); - SlowPathCodeMIPS64* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64( - instruction, - root, - /*entrypoint*/ temp); - codegen_->AddSlowPath(slow_path); + if (kBakerReadBarrierThunksEnableForGcRoots) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { + // temp = &gc_root_thunk<root_reg> + // root = temp(root) + // } + + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(0); + const int thunk_disp = GetBakerMarkGcRootThunkDisplacement(root_reg); + int16_t offset_low = Low16Bits(offset); + int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign + // extension in lwu. + bool short_offset = IsInt<16>(static_cast<int32_t>(offset)); + GpuRegister base = short_offset ? obj : TMP; + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); + if (!short_offset) { + DCHECK(!label_low); + __ Daui(base, obj, offset_high); + } + __ Beqz(T9, 2); // Skip jialc. + if (label_low != nullptr) { + DCHECK(short_offset); + __ Bind(label_low); + } + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, root_reg, base, offset_low); // Single instruction + // in delay slot. + __ Jialc(T9, thunk_disp); + } else { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (T9) the read barrier mark entry point corresponding + // to register `root`. If `temp` is null, it means that `GetIsGcMarking()` + // is false, and vice versa. + // + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp != null) { + // root = temp(root) + // } + + if (label_low != nullptr) { + __ Bind(label_low); + } + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path marking the GC root `root`. + Location temp = Location::RegisterLocation(T9); + SlowPathCodeMIPS64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64( + instruction, + root, + /*entrypoint*/ temp); + codegen_->AddSlowPath(slow_path); - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset); - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ Bnezc(temp.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset); + __ Bnezc(temp.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } else { + if (label_low != nullptr) { + __ Bind(label_low); + } // GC root loaded through a slow path for read barriers other // than Baker's. // /* GcRoot<mirror::Object>* */ root = obj + offset @@ -4438,6 +4554,9 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad( codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); } } else { + if (label_low != nullptr) { + __ Bind(label_low); + } // Plain GC root load with no read barrier. // /* GcRoot<mirror::Object> */ root = *(obj + offset) __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset); @@ -4455,6 +4574,71 @@ void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierThunksEnableForFields) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. If the holder is not gray, + // it issues a load-load memory barrier and returns to the original + // reference load. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // if (temp != nullptr) { + // temp = &field_array_thunk<holder_reg> + // temp() + // } + // not_gray_return_address: + // // If the offset is too large to fit into the lw instruction, we + // // use an adjusted base register (TMP) here. This register + // // receives bits 16 ... 31 of the offset before the thunk invocation + // // and the thunk benefits from it. + // HeapReference<mirror::Object> reference = *(obj+offset); // Original reference load. + // gray_return_address: + + DCHECK(temp.IsInvalid()); + bool short_offset = IsInt<16>(static_cast<int32_t>(offset)); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(0); + // There may have or may have not been a null check if the field offset is smaller than + // the page size. + // There must've been a null check in case it's actually a load from an array. + // We will, however, perform an explicit null check in the thunk as it's easier to + // do it than not. + if (instruction->IsArrayGet()) { + DCHECK(!needs_null_check); + } + const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, short_offset); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); + GpuRegister ref_reg = ref.AsRegister<GpuRegister>(); + if (short_offset) { + __ Beqzc(T9, 2); // Skip jialc. + __ Nop(); // In forbidden slot. + __ Jialc(T9, thunk_disp); + // /* HeapReference<Object> */ ref = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, offset); // Single instruction. + } else { + int16_t offset_low = Low16Bits(offset); + int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign extension in lwu. + __ Beqz(T9, 2); // Skip jialc. + __ Daui(TMP, obj, offset_high); // In delay slot. + __ Jialc(T9, thunk_disp); + // /* HeapReference<Object> */ ref = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, offset_low); // Single instruction. + } + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + __ MaybeUnpoisonHeapReference(ref_reg); + return; + } + // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); ScaleFactor no_scale_factor = TIMES_1; @@ -4481,9 +4665,57 @@ void CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + ScaleFactor scale_factor = TIMES_4; + + if (kBakerReadBarrierThunksEnableForArrays) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. If the holder is not gray, + // it issues a load-load memory barrier and returns to the original + // reference load. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // if (temp != nullptr) { + // temp = &field_array_thunk<holder_reg> + // temp() + // } + // not_gray_return_address: + // // The element address is pre-calculated in the TMP register before the + // // thunk invocation and the thunk benefits from it. + // HeapReference<mirror::Object> reference = data[index]; // Original reference load. + // gray_return_address: + + DCHECK(temp.IsInvalid()); + DCHECK(index.IsValid()); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(0); + // We will not do the explicit null check in the thunk as some form of a null check + // must've been done earlier. + DCHECK(!needs_null_check); + const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset */ false); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); + __ Beqz(T9, 2); // Skip jialc. + GpuRegister ref_reg = ref.AsRegister<GpuRegister>(); + GpuRegister index_reg = index.AsRegister<GpuRegister>(); + __ Dlsa(TMP, index_reg, obj, scale_factor); // In delay slot. + __ Jialc(T9, thunk_disp); + // /* HeapReference<Object> */ ref = *(obj + data_offset + (index << scale_factor)) + DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))) << data_offset; + __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, data_offset); // Single instruction. + __ MaybeUnpoisonHeapReference(ref_reg); + return; + } + // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - ScaleFactor scale_factor = TIMES_4; GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, @@ -5278,8 +5510,13 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S GpuRegister temp = non_baker_read_barrier ? out : locations->GetTemp(0).AsRegister<GpuRegister>(); - codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high, temp, info_low); - GenerateGcRootFieldLoad(cls, out_loc, temp, /* placeholder */ 0x5678, read_barrier_option); + codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high, temp); + GenerateGcRootFieldLoad(cls, + out_loc, + temp, + /* placeholder */ 0x5678, + read_barrier_option, + &info_low->label); generate_null_check = true; break; } @@ -5399,12 +5636,13 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA GpuRegister temp = non_baker_read_barrier ? out : locations->GetTemp(0).AsRegister<GpuRegister>(); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, temp, info_low); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, temp); GenerateGcRootFieldLoad(load, out_loc, temp, /* placeholder */ 0x5678, - kCompilerReadBarrierOption); + kCompilerReadBarrierOption, + &info_low->label); SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load, info_high); codegen_->AddSlowPath(slow_path); diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index c94cc93dad..d03a9eabd4 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -281,7 +281,8 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { Location root, GpuRegister obj, uint32_t offset, - ReadBarrierOption read_barrier_option); + ReadBarrierOption read_barrier_option, + Mips64Label* label_low = nullptr); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, @@ -592,7 +593,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high, GpuRegister out, - PcRelativePatchInfo* info_low); + PcRelativePatchInfo* info_low = nullptr); void PatchJitRootUse(uint8_t* code, const uint8_t* roots_data, diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h index 01304ac35b..8fcceedcf6 100644 --- a/compiler/optimizing/common_arm.h +++ b/compiler/optimizing/common_arm.h @@ -227,14 +227,6 @@ inline Location LocationFrom(const vixl::aarch32::SRegister& low, return Location::FpuRegisterPairLocation(low.GetCode(), high.GetCode()); } -inline bool ShifterOperandSupportsExtension(HInstruction* instruction) { - DCHECK(HasShifterOperand(instruction, kArm)); - // TODO: HAdd applied to the other integral types could make use of - // the SXTAB, SXTAH, UXTAB and UXTAH instructions. - return instruction->GetType() == Primitive::kPrimLong && - (instruction->IsAdd() || instruction->IsSub()); -} - } // namespace helpers } // namespace arm } // namespace art diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 839f328a4f..8054140924 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -664,10 +664,7 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction, // TODO: remove redundant constructor fences (b/36656456). if (RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)) { // Compiling instance constructor. - if (kIsDebugBuild) { - std::string method_name = graph_->GetMethodName(); - CHECK_EQ(std::string("<init>"), method_name); - } + DCHECK_STREQ("<init>", graph_->GetMethodName()); HInstruction* fence_target = current_this_parameter_; DCHECK(fence_target != nullptr); @@ -710,29 +707,18 @@ static InvokeType GetInvokeTypeFromOpCode(Instruction::Code opcode) { ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) { ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<2> hs(soa.Self()); ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker(); Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); - Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass())); - // We fetch the referenced class eagerly (that is, the class pointed by in the MethodId - // at method_idx), as `CanAccessResolvedMethod` expects it be be in the dex cache. - Handle<mirror::Class> methods_class(hs.NewHandle(class_linker->ResolveReferencedClassOfMethod( - method_idx, dex_compilation_unit_->GetDexCache(), class_loader))); - - if (UNLIKELY(methods_class == nullptr)) { - // Clean up any exception left by type resolution. - soa.Self()->ClearException(); - return nullptr; - } - ArtMethod* resolved_method = class_linker->ResolveMethod<ClassLinker::kForceICCECheck>( - *dex_compilation_unit_->GetDexFile(), - method_idx, - dex_compilation_unit_->GetDexCache(), - class_loader, - /* referrer */ nullptr, - invoke_type); + ArtMethod* resolved_method = + class_linker->ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>( + *dex_compilation_unit_->GetDexFile(), + method_idx, + dex_compilation_unit_->GetDexCache(), + class_loader, + graph_->GetArtMethod(), + invoke_type); if (UNLIKELY(resolved_method == nullptr)) { // Clean up any exception left by type resolution. @@ -740,17 +726,14 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in return nullptr; } - // Check access. The class linker has a fast path for looking into the dex cache - // and does not check the access if it hits it. - if (compiling_class == nullptr) { + // The referrer may be unresolved for AOT if we're compiling a class that cannot be + // resolved because, for example, we don't find a superclass in the classpath. + if (graph_->GetArtMethod() == nullptr) { + // The class linker cannot check access without a referrer, so we have to do it. + // Fall back to HInvokeUnresolved if the method isn't public. if (!resolved_method->IsPublic()) { return nullptr; } - } else if (!compiling_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(), - resolved_method, - dex_compilation_unit_->GetDexCache().Get(), - method_idx)) { - return nullptr; } // We have to special case the invoke-super case, as ClassLinker::ResolveMethod does not. @@ -758,19 +741,26 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in // make this an invoke-unresolved to handle cross-dex invokes or abstract super methods, both of // which require runtime handling. if (invoke_type == kSuper) { + ObjPtr<mirror::Class> compiling_class = GetCompilingClass(); if (compiling_class == nullptr) { // We could not determine the method's class we need to wait until runtime. DCHECK(Runtime::Current()->IsAotCompiler()); return nullptr; } - if (!methods_class->IsAssignableFrom(compiling_class.Get())) { + ObjPtr<mirror::Class> referenced_class = class_linker->LookupResolvedType( + *dex_compilation_unit_->GetDexFile(), + dex_compilation_unit_->GetDexFile()->GetMethodId(method_idx).class_idx_, + dex_compilation_unit_->GetDexCache().Get(), + class_loader.Get()); + DCHECK(referenced_class != nullptr); // We have already resolved a method from this class. + if (!referenced_class->IsAssignableFrom(compiling_class)) { // We cannot statically determine the target method. The runtime will throw a // NoSuchMethodError on this one. return nullptr; } ArtMethod* actual_method; - if (methods_class->IsInterface()) { - actual_method = methods_class->FindVirtualMethodForInterfaceSuper( + if (referenced_class->IsInterface()) { + actual_method = referenced_class->FindVirtualMethodForInterfaceSuper( resolved_method, class_linker->GetImagePointerSize()); } else { uint16_t vtable_index = resolved_method->GetMethodIndex(); @@ -797,12 +787,6 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in resolved_method = actual_method; } - // Check for incompatible class changes. The class linker has a fast path for - // looking into the dex cache and does not check incompatible class changes if it hits it. - if (resolved_method->CheckIncompatibleClassChange(invoke_type)) { - return nullptr; - } - return resolved_method; } diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index d14716601c..f2a8cc0333 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -1867,33 +1867,35 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) ArtMethod* method = nullptr; switch (source_component_type) { case Primitive::kPrimBoolean: - method = system->FindDeclaredDirectMethod("arraycopy", "([ZI[ZII)V", image_size); + method = system->FindClassMethod("arraycopy", "([ZI[ZII)V", image_size); break; case Primitive::kPrimByte: - method = system->FindDeclaredDirectMethod("arraycopy", "([BI[BII)V", image_size); + method = system->FindClassMethod("arraycopy", "([BI[BII)V", image_size); break; case Primitive::kPrimChar: - method = system->FindDeclaredDirectMethod("arraycopy", "([CI[CII)V", image_size); + method = system->FindClassMethod("arraycopy", "([CI[CII)V", image_size); break; case Primitive::kPrimShort: - method = system->FindDeclaredDirectMethod("arraycopy", "([SI[SII)V", image_size); + method = system->FindClassMethod("arraycopy", "([SI[SII)V", image_size); break; case Primitive::kPrimInt: - method = system->FindDeclaredDirectMethod("arraycopy", "([II[III)V", image_size); + method = system->FindClassMethod("arraycopy", "([II[III)V", image_size); break; case Primitive::kPrimFloat: - method = system->FindDeclaredDirectMethod("arraycopy", "([FI[FII)V", image_size); + method = system->FindClassMethod("arraycopy", "([FI[FII)V", image_size); break; case Primitive::kPrimLong: - method = system->FindDeclaredDirectMethod("arraycopy", "([JI[JII)V", image_size); + method = system->FindClassMethod("arraycopy", "([JI[JII)V", image_size); break; case Primitive::kPrimDouble: - method = system->FindDeclaredDirectMethod("arraycopy", "([DI[DII)V", image_size); + method = system->FindClassMethod("arraycopy", "([DI[DII)V", image_size); break; default: LOG(FATAL) << "Unreachable"; } DCHECK(method != nullptr); + DCHECK(method->IsStatic()); + DCHECK(method->GetDeclaringClass() == system); invoke->SetResolvedMethod(method); // Sharpen the new invoke. Note that we do not update the dex method index of // the invoke, as we would need to look it up in the current dex file, and it diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc index fe22595258..a025fb10ce 100644 --- a/compiler/optimizing/instruction_simplifier_arm.cc +++ b/compiler/optimizing/instruction_simplifier_arm.cc @@ -29,8 +29,6 @@ using helpers::HasShifterOperand; namespace arm { -using helpers::ShifterOperandSupportsExtension; - bool InstructionSimplifierArmVisitor::TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, bool do_merge) { @@ -76,7 +74,7 @@ bool InstructionSimplifierArmVisitor::TryMergeIntoShifterOperand(HInstruction* u : kMaxLongShiftDistance; if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { - if (!ShifterOperandSupportsExtension(use)) { + if (!use->IsAdd() && (!use->IsSub() || use->GetType() != Primitive::kPrimLong)) { return false; } // Shift by 1 is a special case that results in the same number and type of instructions diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 98332d35fb..f172e16ff9 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -525,8 +525,8 @@ void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* inst // Use a null loader. We should probably use the compiling method's class loader, // but then we would need to pass it to RTPVisitor just for this debug check. Since // the method is from the String class, the null loader is good enough. - Handle<mirror::ClassLoader> loader; - ArtMethod* method = cl->ResolveMethod<ClassLinker::kNoICCECheckForCache>( + Handle<mirror::ClassLoader> loader(hs.NewHandle<mirror::ClassLoader>(nullptr)); + ArtMethod* method = cl->ResolveMethod<ClassLinker::ResolveMode::kNoChecks>( dex_file, invoke->GetDexMethodIndex(), dex_cache, loader, nullptr, kDirect); DCHECK(method != nullptr); mirror::Class* declaring_class = method->GetDeclaringClass(); diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc index e78cd78aa2..f025c0a2d4 100644 --- a/compiler/optimizing/scheduler_arm.cc +++ b/compiler/optimizing/scheduler_arm.cc @@ -167,22 +167,346 @@ void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) { HandleShiftLatencies(instr); } -void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) { - switch (instr->GetLeft()->GetType()) { - case Primitive::kPrimLong: - last_visited_internal_latency_ = 4 * kArmIntegerOpLatency; +void SchedulingLatencyVisitorARM::HandleGenerateConditionWithZero(IfCondition condition) { + switch (condition) { + case kCondEQ: + case kCondBE: + case kCondNE: + case kCondA: + last_visited_internal_latency_ += kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - last_visited_internal_latency_ = 2 * kArmFloatingPointOpLatency; + case kCondGE: + // Mvn + last_visited_internal_latency_ += kArmIntegerOpLatency; + FALLTHROUGH_INTENDED; + case kCondLT: + // Lsr + last_visited_latency_ = kArmIntegerOpLatency; + break; + case kCondAE: + // Trivially true. + // Mov + last_visited_latency_ = kArmIntegerOpLatency; + break; + case kCondB: + // Trivially false. + // Mov + last_visited_latency_ = kArmIntegerOpLatency; break; default: - last_visited_internal_latency_ = 2 * kArmIntegerOpLatency; + LOG(FATAL) << "Unexpected condition " << condition; + UNREACHABLE(); + } +} + +void SchedulingLatencyVisitorARM::HandleGenerateLongTestConstant(HCondition* condition) { + DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong); + + IfCondition cond = condition->GetCondition(); + + HInstruction* right = condition->InputAt(1); + + int64_t value = Uint64ConstantFrom(right); + + // Comparisons against 0 are common enough, so codegen has special handling for them. + if (value == 0) { + switch (cond) { + case kCondNE: + case kCondA: + case kCondEQ: + case kCondBE: + // Orrs + last_visited_internal_latency_ += kArmIntegerOpLatency; + return; + case kCondLT: + case kCondGE: + // Cmp + last_visited_internal_latency_ += kArmIntegerOpLatency; + return; + case kCondB: + case kCondAE: + // Cmp + last_visited_internal_latency_ += kArmIntegerOpLatency; + return; + default: + break; + } + } + + switch (cond) { + case kCondEQ: + case kCondNE: + case kCondB: + case kCondBE: + case kCondA: + case kCondAE: { + // Cmp, IT, Cmp + last_visited_internal_latency_ += 3 * kArmIntegerOpLatency; + break; + } + case kCondLE: + case kCondGT: + // Trivially true or false. + if (value == std::numeric_limits<int64_t>::max()) { + // Cmp + last_visited_internal_latency_ += kArmIntegerOpLatency; + break; + } + FALLTHROUGH_INTENDED; + case kCondGE: + case kCondLT: { + // Cmp, Sbcs + last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; + break; + } + default: + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); + } +} + +void SchedulingLatencyVisitorARM::HandleGenerateLongTest(HCondition* condition) { + DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong); + + IfCondition cond = condition->GetCondition(); + + switch (cond) { + case kCondEQ: + case kCondNE: + case kCondB: + case kCondBE: + case kCondA: + case kCondAE: { + // Cmp, IT, Cmp + last_visited_internal_latency_ += 3 * kArmIntegerOpLatency; break; + } + case kCondLE: + case kCondGT: + case kCondGE: + case kCondLT: { + // Cmp, Sbcs + last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; + break; + } + default: + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); } +} + +// The GenerateTest series of function all counted as internal latency. +void SchedulingLatencyVisitorARM::HandleGenerateTest(HCondition* condition) { + const Primitive::Type type = condition->GetLeft()->GetType(); + + if (type == Primitive::kPrimLong) { + condition->InputAt(1)->IsConstant() + ? HandleGenerateLongTestConstant(condition) + : HandleGenerateLongTest(condition); + } else if (Primitive::IsFloatingPointType(type)) { + // GenerateVcmp + Vmrs + last_visited_internal_latency_ += 2 * kArmFloatingPointOpLatency; + } else { + // Cmp + last_visited_internal_latency_ += kArmIntegerOpLatency; + } +} + +bool SchedulingLatencyVisitorARM::CanGenerateTest(HCondition* condition) { + if (condition->GetLeft()->GetType() == Primitive::kPrimLong) { + HInstruction* right = condition->InputAt(1); + + if (right->IsConstant()) { + IfCondition c = condition->GetCondition(); + const uint64_t value = Uint64ConstantFrom(right); + + if (c < kCondLT || c > kCondGE) { + if (value != 0) { + return false; + } + } else if (c == kCondLE || c == kCondGT) { + if (value < std::numeric_limits<int64_t>::max() && + !codegen_->GetAssembler()->ShifterOperandCanHold(SBC, High32Bits(value + 1), kCcSet)) { + return false; + } + } else if (!codegen_->GetAssembler()->ShifterOperandCanHold(SBC, High32Bits(value), kCcSet)) { + return false; + } + } + } + + return true; +} + +void SchedulingLatencyVisitorARM::HandleGenerateConditionGeneric(HCondition* cond) { + HandleGenerateTest(cond); + + // Unlike codegen pass, we cannot check 'out' register IsLow() here, + // because scheduling is before liveness(location builder) and register allocator, + // so we can only choose to follow one path of codegen by assuming otu.IsLow() is true. + last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; } +void SchedulingLatencyVisitorARM::HandleGenerateEqualLong(HCondition* cond) { + DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); + + IfCondition condition = cond->GetCondition(); + + last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; + + if (condition == kCondNE) { + // Orrs, IT, Mov + last_visited_internal_latency_ += 3 * kArmIntegerOpLatency; + } else { + last_visited_internal_latency_ += kArmIntegerOpLatency; + HandleGenerateConditionWithZero(condition); + } +} + +void SchedulingLatencyVisitorARM::HandleGenerateLongComparesAndJumps() { + last_visited_internal_latency_ += 4 * kArmIntegerOpLatency; + last_visited_internal_latency_ += kArmBranchLatency; +} + +void SchedulingLatencyVisitorARM::HandleGenerateConditionLong(HCondition* cond) { + DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); + + IfCondition condition = cond->GetCondition(); + HInstruction* right = cond->InputAt(1); + + if (right->IsConstant()) { + // Comparisons against 0 are common enough, so codegen has special handling for them. + if (Uint64ConstantFrom(right) == 0) { + switch (condition) { + case kCondNE: + case kCondA: + case kCondEQ: + case kCondBE: + // Orr + last_visited_internal_latency_ += kArmIntegerOpLatency; + HandleGenerateConditionWithZero(condition); + return; + case kCondLT: + case kCondGE: + FALLTHROUGH_INTENDED; + case kCondAE: + case kCondB: + HandleGenerateConditionWithZero(condition); + return; + case kCondLE: + case kCondGT: + default: + break; + } + } + } + + if ((condition == kCondEQ || condition == kCondNE) && + !CanGenerateTest(cond)) { + HandleGenerateEqualLong(cond); + return; + } + + if (CanGenerateTest(cond)) { + HandleGenerateConditionGeneric(cond); + return; + } + + HandleGenerateLongComparesAndJumps(); + + last_visited_internal_latency_ += kArmIntegerOpLatency; + last_visited_latency_ = kArmBranchLatency;; +} + +void SchedulingLatencyVisitorARM::HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond) { + const Primitive::Type type = cond->GetLeft()->GetType(); + + DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; + + if (type == Primitive::kPrimLong) { + HandleGenerateConditionLong(cond); + return; + } + + IfCondition condition = cond->GetCondition(); + HInstruction* right = cond->InputAt(1); + int64_t value; + + if (right->IsConstant()) { + value = Uint64ConstantFrom(right); + + // Comparisons against 0 are common enough, so codegen has special handling for them. + if (value == 0) { + switch (condition) { + case kCondNE: + case kCondA: + case kCondEQ: + case kCondBE: + case kCondLT: + case kCondGE: + case kCondAE: + case kCondB: + HandleGenerateConditionWithZero(condition); + return; + case kCondLE: + case kCondGT: + default: + break; + } + } + } + + if (condition == kCondEQ || condition == kCondNE) { + if (condition == kCondNE) { + // CMP, IT, MOV.ne + last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } else { + last_visited_internal_latency_ += kArmIntegerOpLatency; + HandleGenerateConditionWithZero(condition); + } + return; + } + + HandleGenerateConditionGeneric(cond); +} + +void SchedulingLatencyVisitorARM::HandleCondition(HCondition* cond) { + if (cond->IsEmittedAtUseSite()) { + last_visited_latency_ = 0; + return; + } + + const Primitive::Type type = cond->GetLeft()->GetType(); + + if (Primitive::IsFloatingPointType(type)) { + HandleGenerateConditionGeneric(cond); + return; + } + + DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; + + const IfCondition condition = cond->GetCondition(); + + if (type == Primitive::kPrimBoolean && + cond->GetRight()->GetType() == Primitive::kPrimBoolean && + (condition == kCondEQ || condition == kCondNE)) { + if (condition == kCondEQ) { + last_visited_internal_latency_ = kArmIntegerOpLatency; + } + last_visited_latency_ = kArmIntegerOpLatency; + return; + } + + HandleGenerateConditionIntegralOrNonPrimitive(cond); +} + +void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) { + HandleCondition(instr); +} + void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) { Primitive::Type type = instr->InputAt(0)->GetType(); switch (type) { @@ -269,7 +593,6 @@ void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifte const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); if (instruction->GetType() == Primitive::kPrimInt) { - DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind)); HandleGenerateDataProcInstruction(); } else { DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong); diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h index a9f2295c35..fe274d29f9 100644 --- a/compiler/optimizing/scheduler_arm.h +++ b/compiler/optimizing/scheduler_arm.h @@ -109,6 +109,17 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { #undef DECLARE_VISIT_INSTRUCTION private: + bool CanGenerateTest(HCondition* cond); + void HandleGenerateConditionWithZero(IfCondition cond); + void HandleGenerateLongTestConstant(HCondition* cond); + void HandleGenerateLongTest(HCondition* cond); + void HandleGenerateLongComparesAndJumps(); + void HandleGenerateTest(HCondition* cond); + void HandleGenerateConditionGeneric(HCondition* cond); + void HandleGenerateEqualLong(HCondition* cond); + void HandleGenerateConditionLong(HCondition* cond); + void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond); + void HandleCondition(HCondition* instr); void HandleBinaryOperationLantencies(HBinaryOperation* instr); void HandleBitwiseOperationLantencies(HBinaryOperation* instr); void HandleShiftLatencies(HBinaryOperation* instr); |