summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc40
-rw-r--r--compiler/optimizing/code_generator_mips.cc447
-rw-r--r--compiler/optimizing/code_generator_mips.h8
-rw-r--r--compiler/optimizing/code_generator_mips64.cc360
-rw-r--r--compiler/optimizing/code_generator_mips64.h5
-rw-r--r--compiler/optimizing/common_arm.h8
-rw-r--r--compiler/optimizing/instruction_builder.cc64
-rw-r--r--compiler/optimizing/instruction_simplifier.cc18
-rw-r--r--compiler/optimizing/instruction_simplifier_arm.cc4
-rw-r--r--compiler/optimizing/reference_type_propagation.cc4
-rw-r--r--compiler/optimizing/scheduler_arm.cc341
-rw-r--r--compiler/optimizing/scheduler_arm.h11
12 files changed, 1090 insertions, 220 deletions
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index b9d4700511..430cdde1f7 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -8269,19 +8269,41 @@ void InstructionCodeGeneratorARMVIXL::VisitDataProcWithShifterOp(
const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
if (instruction->GetType() == Primitive::kPrimInt) {
- DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind));
-
+ const vixl32::Register first = InputRegisterAt(instruction, 0);
+ const vixl32::Register output = OutputRegister(instruction);
const vixl32::Register second = instruction->InputAt(1)->GetType() == Primitive::kPrimLong
? LowRegisterFrom(locations->InAt(1))
: InputRegisterAt(instruction, 1);
- GenerateDataProcInstruction(kind,
- OutputRegister(instruction),
- InputRegisterAt(instruction, 0),
- Operand(second,
- ShiftFromOpKind(op_kind),
- instruction->GetShiftAmount()),
- codegen_);
+ if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
+ DCHECK_EQ(kind, HInstruction::kAdd);
+
+ switch (op_kind) {
+ case HDataProcWithShifterOp::kUXTB:
+ __ Uxtab(output, first, second);
+ break;
+ case HDataProcWithShifterOp::kUXTH:
+ __ Uxtah(output, first, second);
+ break;
+ case HDataProcWithShifterOp::kSXTB:
+ __ Sxtab(output, first, second);
+ break;
+ case HDataProcWithShifterOp::kSXTH:
+ __ Sxtah(output, first, second);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected operation kind: " << op_kind;
+ UNREACHABLE();
+ }
+ } else {
+ GenerateDataProcInstruction(kind,
+ output,
+ first,
+ Operand(second,
+ ShiftFromOpKind(op_kind),
+ instruction->GetShiftAmount()),
+ codegen_);
+ }
} else {
DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 23d188d630..b6eb5c1d1d 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -16,6 +16,7 @@
#include "code_generator_mips.h"
+#include "arch/mips/asm_support_mips.h"
#include "arch/mips/entrypoints_direct_mips.h"
#include "arch/mips/instruction_set_features_mips.h"
#include "art_method.h"
@@ -40,6 +41,11 @@ namespace mips {
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr Register kMethodRegisterArgument = A0;
+// Flags controlling the use of thunks for Baker read barriers.
+constexpr bool kBakerReadBarrierThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierThunksEnableForArrays = true;
+constexpr bool kBakerReadBarrierThunksEnableForGcRoots = true;
+
Location MipsReturnLocation(Primitive::Type return_type) {
switch (return_type) {
case Primitive::kPrimBoolean:
@@ -1486,7 +1492,8 @@ void CodeGeneratorMIPS::MoveLocation(Location destination,
__ Mfc1(dst_low, src);
__ MoveFromFpuHigh(dst_high, src);
} else {
- DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination;
+ DCHECK(source.IsDoubleStackSlot())
+ << "Cannot move from " << source << " to " << destination;
int32_t off = source.GetStackIndex();
Register r = destination.AsRegisterPairLow<Register>();
__ LoadFromOffset(kLoadDoubleword, r, SP, off);
@@ -1539,7 +1546,8 @@ void CodeGeneratorMIPS::MoveLocation(Location destination,
} else if (source.IsFpuRegister()) {
__ StoreDToOffset(source.AsFpuRegister<FRegister>(), SP, dst_offset);
} else {
- DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination;
+ DCHECK(source.IsDoubleStackSlot())
+ << "Cannot move from " << source << " to " << destination;
__ LoadFromOffset(kLoadWord, TMP, SP, source.GetStackIndex());
__ StoreToOffset(kStoreWord, TMP, SP, dst_offset);
__ LoadFromOffset(kLoadWord, TMP, SP, source.GetStackIndex() + 4);
@@ -1763,8 +1771,10 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo
}
// A following instruction will add the sign-extended low half of the 32-bit
// offset to `out` (e.g. lw, jialc, addiu).
- DCHECK_EQ(info_low->patch_info_high, info_high);
- __ Bind(&info_low->label);
+ if (info_low != nullptr) {
+ DCHECK_EQ(info_low->patch_info_high, info_high);
+ __ Bind(&info_low->label);
+ }
}
CodeGeneratorMIPS::JitPatchInfo* CodeGeneratorMIPS::NewJitRootStringPatch(
@@ -1791,25 +1801,26 @@ void CodeGeneratorMIPS::PatchJitRootUse(uint8_t* code,
const uint8_t* roots_data,
const CodeGeneratorMIPS::JitPatchInfo& info,
uint64_t index_in_table) const {
- uint32_t literal_offset = GetAssembler().GetLabelLocation(&info.high_label);
+ uint32_t high_literal_offset = GetAssembler().GetLabelLocation(&info.high_label);
+ uint32_t low_literal_offset = GetAssembler().GetLabelLocation(&info.low_label);
uintptr_t address =
reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
uint32_t addr32 = dchecked_integral_cast<uint32_t>(address);
// lui reg, addr32_high
- DCHECK_EQ(code[literal_offset + 0], 0x34);
- DCHECK_EQ(code[literal_offset + 1], 0x12);
- DCHECK_EQ((code[literal_offset + 2] & 0xE0), 0x00);
- DCHECK_EQ(code[literal_offset + 3], 0x3C);
+ DCHECK_EQ(code[high_literal_offset + 0], 0x34);
+ DCHECK_EQ(code[high_literal_offset + 1], 0x12);
+ DCHECK_EQ((code[high_literal_offset + 2] & 0xE0), 0x00);
+ DCHECK_EQ(code[high_literal_offset + 3], 0x3C);
// instr reg, reg, addr32_low
- DCHECK_EQ(code[literal_offset + 4], 0x78);
- DCHECK_EQ(code[literal_offset + 5], 0x56);
+ DCHECK_EQ(code[low_literal_offset + 0], 0x78);
+ DCHECK_EQ(code[low_literal_offset + 1], 0x56);
addr32 += (addr32 & 0x8000) << 1; // Account for sign extension in "instr reg, reg, addr32_low".
// lui reg, addr32_high
- code[literal_offset + 0] = static_cast<uint8_t>(addr32 >> 16);
- code[literal_offset + 1] = static_cast<uint8_t>(addr32 >> 24);
+ code[high_literal_offset + 0] = static_cast<uint8_t>(addr32 >> 16);
+ code[high_literal_offset + 1] = static_cast<uint8_t>(addr32 >> 24);
// instr reg, reg, addr32_low
- code[literal_offset + 4] = static_cast<uint8_t>(addr32 >> 0);
- code[literal_offset + 5] = static_cast<uint8_t>(addr32 >> 8);
+ code[low_literal_offset + 0] = static_cast<uint8_t>(addr32 >> 0);
+ code[low_literal_offset + 1] = static_cast<uint8_t>(addr32 >> 8);
}
void CodeGeneratorMIPS::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
@@ -2545,7 +2556,12 @@ void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) {
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier.
if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
- locations->AddTemp(Location::RequiresRegister());
+ bool temp_needed = instruction->GetIndex()->IsConstant()
+ ? !kBakerReadBarrierThunksEnableForFields
+ : !kBakerReadBarrierThunksEnableForArrays;
+ if (temp_needed) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
}
@@ -2681,16 +2697,32 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp = locations->GetTemp(0);
+ bool temp_needed = index.IsConstant()
+ ? !kBakerReadBarrierThunksEnableForFields
+ : !kBakerReadBarrierThunksEnableForArrays;
+ Location temp = temp_needed ? locations->GetTemp(0) : Location::NoLocation();
// Note that a potential implicit null check is handled in this
// CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier call.
- codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction,
- out_loc,
- obj,
- data_offset,
- index,
- temp,
- /* needs_null_check */ true);
+ DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
+ if (index.IsConstant()) {
+ // Array load with a constant index can be treated as a field load.
+ size_t offset =
+ (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ offset,
+ temp,
+ /* needs_null_check */ false);
+ } else {
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ data_offset,
+ index,
+ temp,
+ /* needs_null_check */ false);
+ }
} else {
Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
@@ -3093,6 +3125,7 @@ void InstructionCodeGeneratorMIPS::VisitBoundsCheck(HBoundsCheck* instruction) {
// Temp is used for read barrier.
static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
if (kEmitCompilerReadBarrier &&
+ !(kUseBakerReadBarrier && kBakerReadBarrierThunksEnableForFields) &&
(kUseBakerReadBarrier ||
type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
@@ -6096,7 +6129,9 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field
if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
+ if (!kBakerReadBarrierThunksEnableForFields) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
}
}
@@ -6171,7 +6206,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
if (type == Primitive::kPrimNot) {
// /* HeapReference<Object> */ dst = *(obj + offset)
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp_loc = locations->GetTemp(0);
+ Location temp_loc =
+ kBakerReadBarrierThunksEnableForFields ? Location::NoLocation() : locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
@@ -6395,7 +6431,9 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadOneRegister(
Register out_reg = out.AsRegister<Register>();
if (read_barrier_option == kWithReadBarrier) {
CHECK(kEmitCompilerReadBarrier);
- DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+ if (!kUseBakerReadBarrier || !kBakerReadBarrierThunksEnableForFields) {
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+ }
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
@@ -6435,7 +6473,9 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters(
if (read_barrier_option == kWithReadBarrier) {
CHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
- DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+ if (!kBakerReadBarrierThunksEnableForFields) {
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+ }
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
@@ -6458,67 +6498,172 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters(
}
}
+static inline int GetBakerMarkThunkNumber(Register reg) {
+ static_assert(BAKER_MARK_INTROSPECTION_REGISTER_COUNT == 21, "Expecting equal");
+ if (reg >= V0 && reg <= T7) { // 14 consequtive regs.
+ return reg - V0;
+ } else if (reg >= S2 && reg <= S7) { // 6 consequtive regs.
+ return 14 + (reg - S2);
+ } else if (reg == FP) { // One more.
+ return 20;
+ }
+ LOG(FATAL) << "Unexpected register " << reg;
+ UNREACHABLE();
+}
+
+static inline int GetBakerMarkFieldArrayThunkDisplacement(Register reg, bool short_offset) {
+ int num = GetBakerMarkThunkNumber(reg) +
+ (short_offset ? BAKER_MARK_INTROSPECTION_REGISTER_COUNT : 0);
+ return num * BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE;
+}
+
+static inline int GetBakerMarkGcRootThunkDisplacement(Register reg) {
+ return GetBakerMarkThunkNumber(reg) * BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRY_SIZE +
+ BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET;
+}
+
void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
Register obj,
uint32_t offset,
- ReadBarrierOption read_barrier_option) {
+ ReadBarrierOption read_barrier_option,
+ MipsLabel* label_low) {
+ bool reordering;
+ if (label_low != nullptr) {
+ DCHECK_EQ(offset, 0x5678u);
+ }
Register root_reg = root.AsRegister<Register>();
if (read_barrier_option == kWithReadBarrier) {
DCHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used:
- //
- // root = obj.field;
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // if (temp != null) {
- // root = temp(root)
- // }
-
- // /* GcRoot<mirror::Object> */ root = *(obj + offset)
- __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
- static_assert(
- sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
- "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
- "have different sizes.");
- static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::CompressedReference<mirror::Object> and int32_t "
- "have different sizes.");
-
- // Slow path marking the GC root `root`.
- Location temp = Location::RegisterLocation(T9);
- SlowPathCodeMIPS* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS(
- instruction,
- root,
- /*entrypoint*/ temp);
- codegen_->AddSlowPath(slow_path);
+ if (kBakerReadBarrierThunksEnableForGcRoots) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded GC root or not. Instead, we
+ // load into `temp` (T9) the read barrier mark introspection entrypoint.
+ // If `temp` is null, it means that `GetIsGcMarking()` is false, and
+ // vice versa.
+ //
+ // We use thunks for the slow path. That thunk checks the reference
+ // and jumps to the entrypoint if needed.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkReg00
+ // // AKA &art_quick_read_barrier_mark_introspection.
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) {
+ // temp = &gc_root_thunk<root_reg>
+ // root = temp(root)
+ // }
+
+ bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+ const int32_t entry_point_offset =
+ Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(0);
+ const int thunk_disp = GetBakerMarkGcRootThunkDisplacement(root_reg);
+ int16_t offset_low = Low16Bits(offset);
+ int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign
+ // extension in lw.
+ bool short_offset = IsInt<16>(static_cast<int32_t>(offset));
+ Register base = short_offset ? obj : TMP;
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
+ reordering = __ SetReorder(false);
+ if (!short_offset) {
+ DCHECK(!label_low);
+ __ AddUpper(base, obj, offset_high);
+ }
+ __ Beqz(T9, (isR6 ? 2 : 4)); // Skip jialc / addiu+jalr+nop.
+ if (label_low != nullptr) {
+ DCHECK(short_offset);
+ __ Bind(label_low);
+ }
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, root_reg, base, offset_low); // Single instruction
+ // in delay slot.
+ if (isR6) {
+ __ Jialc(T9, thunk_disp);
+ } else {
+ __ Addiu(T9, T9, thunk_disp);
+ __ Jalr(T9);
+ __ Nop();
+ }
+ __ SetReorder(reordering);
+ } else {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded GC root or not. Instead, we
+ // load into `temp` (T9) the read barrier mark entry point corresponding
+ // to register `root`. If `temp` is null, it means that `GetIsGcMarking()`
+ // is false, and vice versa.
+ //
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp != null) {
+ // root = temp(root)
+ // }
+
+ if (label_low != nullptr) {
+ reordering = __ SetReorder(false);
+ __ Bind(label_low);
+ }
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ if (label_low != nullptr) {
+ __ SetReorder(reordering);
+ }
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // Slow path marking the GC root `root`.
+ Location temp = Location::RegisterLocation(T9);
+ SlowPathCodeMIPS* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS(
+ instruction,
+ root,
+ /*entrypoint*/ temp);
+ codegen_->AddSlowPath(slow_path);
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- const int32_t entry_point_offset =
- Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1);
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
- // The entrypoint is null when the GC is not marking, this prevents one load compared to
- // checking GetIsGcMarking.
- __ Bnez(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
+ const int32_t entry_point_offset =
+ Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1);
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+ __ Bnez(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ }
} else {
+ if (label_low != nullptr) {
+ reordering = __ SetReorder(false);
+ __ Bind(label_low);
+ }
// GC root loaded through a slow path for read barriers other
// than Baker's.
// /* GcRoot<mirror::Object>* */ root = obj + offset
__ Addiu32(root_reg, obj, offset);
+ if (label_low != nullptr) {
+ __ SetReorder(reordering);
+ }
// /* mirror::Object* */ root = root->Read()
codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
}
} else {
+ if (label_low != nullptr) {
+ reordering = __ SetReorder(false);
+ __ Bind(label_low);
+ }
// Plain GC root load with no read barrier.
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
__ LoadFromOffset(kLoadWord, root_reg, obj, offset);
// Note that GC roots are not affected by heap poisoning, thus we
// do not have to unpoison `root_reg` here.
+ if (label_low != nullptr) {
+ __ SetReorder(reordering);
+ }
}
}
@@ -6531,6 +6676,88 @@ void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* inst
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
+ if (kBakerReadBarrierThunksEnableForFields) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (T9) the read barrier mark introspection entrypoint.
+ // If `temp` is null, it means that `GetIsGcMarking()` is false, and
+ // vice versa.
+ //
+ // We use thunks for the slow path. That thunk checks the reference
+ // and jumps to the entrypoint if needed. If the holder is not gray,
+ // it issues a load-load memory barrier and returns to the original
+ // reference load.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkReg00
+ // // AKA &art_quick_read_barrier_mark_introspection.
+ // if (temp != nullptr) {
+ // temp = &field_array_thunk<holder_reg>
+ // temp()
+ // }
+ // not_gray_return_address:
+ // // If the offset is too large to fit into the lw instruction, we
+ // // use an adjusted base register (TMP) here. This register
+ // // receives bits 16 ... 31 of the offset before the thunk invocation
+ // // and the thunk benefits from it.
+ // HeapReference<mirror::Object> reference = *(obj+offset); // Original reference load.
+ // gray_return_address:
+
+ DCHECK(temp.IsInvalid());
+ bool isR6 = GetInstructionSetFeatures().IsR6();
+ int16_t offset_low = Low16Bits(offset);
+ int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign extension in lw.
+ bool short_offset = IsInt<16>(static_cast<int32_t>(offset));
+ bool reordering = __ SetReorder(false);
+ const int32_t entry_point_offset =
+ Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(0);
+ // There may have or may have not been a null check if the field offset is smaller than
+ // the page size.
+ // There must've been a null check in case it's actually a load from an array.
+ // We will, however, perform an explicit null check in the thunk as it's easier to
+ // do it than not.
+ if (instruction->IsArrayGet()) {
+ DCHECK(!needs_null_check);
+ }
+ const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, short_offset);
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
+ Register ref_reg = ref.AsRegister<Register>();
+ Register base = short_offset ? obj : TMP;
+ if (short_offset) {
+ if (isR6) {
+ __ Beqzc(T9, 2); // Skip jialc.
+ __ Nop(); // In forbidden slot.
+ __ Jialc(T9, thunk_disp);
+ } else {
+ __ Beqz(T9, 3); // Skip jalr+nop.
+ __ Addiu(T9, T9, thunk_disp); // In delay slot.
+ __ Jalr(T9);
+ __ Nop(); // In delay slot.
+ }
+ } else {
+ if (isR6) {
+ __ Beqz(T9, 2); // Skip jialc.
+ __ Aui(base, obj, offset_high); // In delay slot.
+ __ Jialc(T9, thunk_disp);
+ } else {
+ __ Lui(base, offset_high);
+ __ Beqz(T9, 2); // Skip jalr.
+ __ Addiu(T9, T9, thunk_disp); // In delay slot.
+ __ Jalr(T9);
+ __ Addu(base, base, obj); // In delay slot.
+ }
+ }
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, ref_reg, base, offset_low); // Single instruction.
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ __ MaybeUnpoisonHeapReference(ref_reg);
+ __ SetReorder(reordering);
+ return;
+ }
+
// /* HeapReference<Object> */ ref = *(obj + offset)
Location no_index = Location::NoLocation();
ScaleFactor no_scale_factor = TIMES_1;
@@ -6557,9 +6784,69 @@ void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* inst
static_assert(
sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ ScaleFactor scale_factor = TIMES_4;
+
+ if (kBakerReadBarrierThunksEnableForArrays) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (T9) the read barrier mark introspection entrypoint.
+ // If `temp` is null, it means that `GetIsGcMarking()` is false, and
+ // vice versa.
+ //
+ // We use thunks for the slow path. That thunk checks the reference
+ // and jumps to the entrypoint if needed. If the holder is not gray,
+ // it issues a load-load memory barrier and returns to the original
+ // reference load.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkReg00
+ // // AKA &art_quick_read_barrier_mark_introspection.
+ // if (temp != nullptr) {
+ // temp = &field_array_thunk<holder_reg>
+ // temp()
+ // }
+ // not_gray_return_address:
+ // // The element address is pre-calculated in the TMP register before the
+ // // thunk invocation and the thunk benefits from it.
+ // HeapReference<mirror::Object> reference = data[index]; // Original reference load.
+ // gray_return_address:
+
+ DCHECK(temp.IsInvalid());
+ DCHECK(index.IsValid());
+ bool reordering = __ SetReorder(false);
+ const int32_t entry_point_offset =
+ Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(0);
+ // We will not do the explicit null check in the thunk as some form of a null check
+ // must've been done earlier.
+ DCHECK(!needs_null_check);
+ const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset */ false);
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
+ Register ref_reg = ref.AsRegister<Register>();
+ Register index_reg = index.IsRegisterPair()
+ ? index.AsRegisterPairLow<Register>()
+ : index.AsRegister<Register>();
+ if (GetInstructionSetFeatures().IsR6()) {
+ __ Beqz(T9, 2); // Skip jialc.
+ __ Lsa(TMP, index_reg, obj, scale_factor); // In delay slot.
+ __ Jialc(T9, thunk_disp);
+ } else {
+ __ Sll(TMP, index_reg, scale_factor);
+ __ Beqz(T9, 2); // Skip jalr.
+ __ Addiu(T9, T9, thunk_disp); // In delay slot.
+ __ Jalr(T9);
+ __ Addu(TMP, TMP, obj); // In delay slot.
+ }
+ // /* HeapReference<Object> */ ref = *(obj + data_offset + (index << scale_factor))
+ DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))) << data_offset;
+ __ LoadFromOffset(kLoadWord, ref_reg, TMP, data_offset); // Single instruction.
+ __ MaybeUnpoisonHeapReference(ref_reg);
+ __ SetReorder(reordering);
+ return;
+ }
+
// /* HeapReference<Object> */ ref =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- ScaleFactor scale_factor = TIMES_4;
GenerateReferenceLoadWithBakerReadBarrier(instruction,
ref,
obj,
@@ -7461,10 +7748,14 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
bool reordering = __ SetReorder(false);
codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high,
temp,
- base_or_current_method_reg,
- info_low);
- GenerateGcRootFieldLoad(cls, out_loc, temp, /* placeholder */ 0x5678, read_barrier_option);
+ base_or_current_method_reg);
__ SetReorder(reordering);
+ GenerateGcRootFieldLoad(cls,
+ out_loc,
+ temp,
+ /* placeholder */ 0x5678,
+ read_barrier_option,
+ &info_low->label);
generate_null_check = true;
break;
}
@@ -7475,8 +7766,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
bool reordering = __ SetReorder(false);
__ Bind(&info->high_label);
__ Lui(out, /* placeholder */ 0x1234);
- GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
__ SetReorder(reordering);
+ GenerateGcRootFieldLoad(cls,
+ out_loc,
+ out,
+ /* placeholder */ 0x5678,
+ read_barrier_option,
+ &info->low_label);
break;
}
case HLoadClass::LoadKind::kRuntimeCall:
@@ -7623,14 +7919,14 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
bool reordering = __ SetReorder(false);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
temp,
- base_or_current_method_reg,
- info_low);
+ base_or_current_method_reg);
+ __ SetReorder(reordering);
GenerateGcRootFieldLoad(load,
out_loc,
temp,
/* placeholder */ 0x5678,
- kCompilerReadBarrierOption);
- __ SetReorder(reordering);
+ kCompilerReadBarrierOption,
+ &info_low->label);
SlowPathCodeMIPS* slow_path =
new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load, info_high);
codegen_->AddSlowPath(slow_path);
@@ -7646,12 +7942,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
bool reordering = __ SetReorder(false);
__ Bind(&info->high_label);
__ Lui(out, /* placeholder */ 0x1234);
+ __ SetReorder(reordering);
GenerateGcRootFieldLoad(load,
out_loc,
out,
/* placeholder */ 0x5678,
- kCompilerReadBarrierOption);
- __ SetReorder(reordering);
+ kCompilerReadBarrierOption,
+ &info->low_label);
return;
}
default:
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 52ee852269..7195b9d89d 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -285,7 +285,8 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
Location root,
Register obj,
uint32_t offset,
- ReadBarrierOption read_barrier_option);
+ ReadBarrierOption read_barrier_option,
+ MipsLabel* label_low = nullptr);
void GenerateIntCompare(IfCondition cond, LocationSummary* locations);
// When the function returns `false` it means that the condition holds if `dst` is non-zero
@@ -637,7 +638,7 @@ class CodeGeneratorMIPS : public CodeGenerator {
void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high,
Register out,
Register base,
- PcRelativePatchInfo* info_low);
+ PcRelativePatchInfo* info_low = nullptr);
// The JitPatchInfo is used for JIT string and class loads.
struct JitPatchInfo {
@@ -649,8 +650,9 @@ class CodeGeneratorMIPS : public CodeGenerator {
// String/type index.
uint64_t index;
// Label for the instruction loading the most significant half of the address.
- // The least significant half is loaded with the instruction that follows immediately.
MipsLabel high_label;
+ // Label for the instruction supplying the least significant half of the address.
+ MipsLabel low_label;
};
void PatchJitRootUse(uint8_t* code,
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 454a2ddc14..3e79f474b6 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -16,6 +16,7 @@
#include "code_generator_mips64.h"
+#include "arch/mips64/asm_support_mips64.h"
#include "art_method.h"
#include "code_generator_utils.h"
#include "compiled_method.h"
@@ -38,6 +39,11 @@ namespace mips64 {
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr GpuRegister kMethodRegisterArgument = A0;
+// Flags controlling the use of thunks for Baker read barriers.
+constexpr bool kBakerReadBarrierThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierThunksEnableForArrays = true;
+constexpr bool kBakerReadBarrierThunksEnableForGcRoots = true;
+
Location Mips64ReturnLocation(Primitive::Type return_type) {
switch (return_type) {
case Primitive::kPrimBoolean:
@@ -1649,8 +1655,10 @@ void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchIn
__ Auipc(out, /* placeholder */ 0x1234);
// A following instruction will add the sign-extended low half of the 32-bit
// offset to `out` (e.g. ld, jialc, daddiu).
- DCHECK_EQ(info_low->patch_info_high, info_high);
- __ Bind(&info_low->label);
+ if (info_low != nullptr) {
+ DCHECK_EQ(info_low->patch_info_high, info_high);
+ __ Bind(&info_low->label);
+ }
}
Literal* CodeGeneratorMIPS64::DeduplicateJitStringLiteral(const DexFile& dex_file,
@@ -2117,7 +2125,12 @@ void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) {
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier.
if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
- locations->AddTemp(Location::RequiresRegister());
+ bool temp_needed = instruction->GetIndex()->IsConstant()
+ ? !kBakerReadBarrierThunksEnableForFields
+ : !kBakerReadBarrierThunksEnableForArrays;
+ if (temp_needed) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
}
@@ -2254,16 +2267,32 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp = locations->GetTemp(0);
+ bool temp_needed = index.IsConstant()
+ ? !kBakerReadBarrierThunksEnableForFields
+ : !kBakerReadBarrierThunksEnableForArrays;
+ Location temp = temp_needed ? locations->GetTemp(0) : Location::NoLocation();
// Note that a potential implicit null check is handled in this
// CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier call.
- codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction,
- out_loc,
- obj,
- data_offset,
- index,
- temp,
- /* needs_null_check */ true);
+ DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
+ if (index.IsConstant()) {
+ // Array load with a constant index can be treated as a field load.
+ size_t offset =
+ (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ offset,
+ temp,
+ /* needs_null_check */ false);
+ } else {
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ data_offset,
+ index,
+ temp,
+ /* needs_null_check */ false);
+ }
} else {
GpuRegister out = out_loc.AsRegister<GpuRegister>();
if (index.IsConstant()) {
@@ -2666,6 +2695,7 @@ void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction)
// Temp is used for read barrier.
static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
if (kEmitCompilerReadBarrier &&
+ !(kUseBakerReadBarrier && kBakerReadBarrierThunksEnableForFields) &&
(kUseBakerReadBarrier ||
type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
@@ -4118,7 +4148,9 @@ void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction,
if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
+ if (!kBakerReadBarrierThunksEnableForFields) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
}
@@ -4168,7 +4200,8 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction,
if (type == Primitive::kPrimNot) {
// /* HeapReference<Object> */ dst = *(obj + offset)
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp_loc = locations->GetTemp(0);
+ Location temp_loc =
+ kBakerReadBarrierThunksEnableForFields ? Location::NoLocation() : locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
@@ -4318,7 +4351,9 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadOneRegister(
GpuRegister out_reg = out.AsRegister<GpuRegister>();
if (read_barrier_option == kWithReadBarrier) {
CHECK(kEmitCompilerReadBarrier);
- DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+ if (!kUseBakerReadBarrier || !kBakerReadBarrierThunksEnableForFields) {
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+ }
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
@@ -4358,7 +4393,9 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters(
if (read_barrier_option == kWithReadBarrier) {
CHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
- DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+ if (!kBakerReadBarrierThunksEnableForFields) {
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+ }
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
@@ -4381,55 +4418,134 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters(
}
}
-void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(
- HInstruction* instruction,
- Location root,
- GpuRegister obj,
- uint32_t offset,
- ReadBarrierOption read_barrier_option) {
+static inline int GetBakerMarkThunkNumber(GpuRegister reg) {
+ static_assert(BAKER_MARK_INTROSPECTION_REGISTER_COUNT == 20, "Expecting equal");
+ if (reg >= V0 && reg <= T2) { // 13 consequtive regs.
+ return reg - V0;
+ } else if (reg >= S2 && reg <= S7) { // 6 consequtive regs.
+ return 13 + (reg - S2);
+ } else if (reg == S8) { // One more.
+ return 19;
+ }
+ LOG(FATAL) << "Unexpected register " << reg;
+ UNREACHABLE();
+}
+
+static inline int GetBakerMarkFieldArrayThunkDisplacement(GpuRegister reg, bool short_offset) {
+ int num = GetBakerMarkThunkNumber(reg) +
+ (short_offset ? BAKER_MARK_INTROSPECTION_REGISTER_COUNT : 0);
+ return num * BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE;
+}
+
+static inline int GetBakerMarkGcRootThunkDisplacement(GpuRegister reg) {
+ return GetBakerMarkThunkNumber(reg) * BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRY_SIZE +
+ BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET;
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ GpuRegister obj,
+ uint32_t offset,
+ ReadBarrierOption read_barrier_option,
+ Mips64Label* label_low) {
+ if (label_low != nullptr) {
+ DCHECK_EQ(offset, 0x5678u);
+ }
GpuRegister root_reg = root.AsRegister<GpuRegister>();
if (read_barrier_option == kWithReadBarrier) {
DCHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used:
- //
- // root = obj.field;
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // if (temp != null) {
- // root = temp(root)
- // }
-
- // /* GcRoot<mirror::Object> */ root = *(obj + offset)
- __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset);
- static_assert(
- sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
- "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
- "have different sizes.");
- static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::CompressedReference<mirror::Object> and int32_t "
- "have different sizes.");
-
- // Slow path marking the GC root `root`.
- Location temp = Location::RegisterLocation(T9);
- SlowPathCodeMIPS64* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64(
- instruction,
- root,
- /*entrypoint*/ temp);
- codegen_->AddSlowPath(slow_path);
+ if (kBakerReadBarrierThunksEnableForGcRoots) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded GC root or not. Instead, we
+ // load into `temp` (T9) the read barrier mark introspection entrypoint.
+ // If `temp` is null, it means that `GetIsGcMarking()` is false, and
+ // vice versa.
+ //
+ // We use thunks for the slow path. That thunk checks the reference
+ // and jumps to the entrypoint if needed.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkReg00
+ // // AKA &art_quick_read_barrier_mark_introspection.
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) {
+ // temp = &gc_root_thunk<root_reg>
+ // root = temp(root)
+ // }
+
+ const int32_t entry_point_offset =
+ Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(0);
+ const int thunk_disp = GetBakerMarkGcRootThunkDisplacement(root_reg);
+ int16_t offset_low = Low16Bits(offset);
+ int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign
+ // extension in lwu.
+ bool short_offset = IsInt<16>(static_cast<int32_t>(offset));
+ GpuRegister base = short_offset ? obj : TMP;
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
+ if (!short_offset) {
+ DCHECK(!label_low);
+ __ Daui(base, obj, offset_high);
+ }
+ __ Beqz(T9, 2); // Skip jialc.
+ if (label_low != nullptr) {
+ DCHECK(short_offset);
+ __ Bind(label_low);
+ }
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ LoadFromOffset(kLoadUnsignedWord, root_reg, base, offset_low); // Single instruction
+ // in delay slot.
+ __ Jialc(T9, thunk_disp);
+ } else {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded GC root or not. Instead, we
+ // load into `temp` (T9) the read barrier mark entry point corresponding
+ // to register `root`. If `temp` is null, it means that `GetIsGcMarking()`
+ // is false, and vice versa.
+ //
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp != null) {
+ // root = temp(root)
+ // }
+
+ if (label_low != nullptr) {
+ __ Bind(label_low);
+ }
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset);
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // Slow path marking the GC root `root`.
+ Location temp = Location::RegisterLocation(T9);
+ SlowPathCodeMIPS64* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64(
+ instruction,
+ root,
+ /*entrypoint*/ temp);
+ codegen_->AddSlowPath(slow_path);
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- const int32_t entry_point_offset =
- Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1);
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset);
- // The entrypoint is null when the GC is not marking, this prevents one load compared to
- // checking GetIsGcMarking.
- __ Bnezc(temp.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
+ const int32_t entry_point_offset =
+ Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1);
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset);
+ __ Bnezc(temp.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ }
} else {
+ if (label_low != nullptr) {
+ __ Bind(label_low);
+ }
// GC root loaded through a slow path for read barriers other
// than Baker's.
// /* GcRoot<mirror::Object>* */ root = obj + offset
@@ -4438,6 +4554,9 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(
codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
}
} else {
+ if (label_low != nullptr) {
+ __ Bind(label_low);
+ }
// Plain GC root load with no read barrier.
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
__ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset);
@@ -4455,6 +4574,71 @@ void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
+ if (kBakerReadBarrierThunksEnableForFields) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (T9) the read barrier mark introspection entrypoint.
+ // If `temp` is null, it means that `GetIsGcMarking()` is false, and
+ // vice versa.
+ //
+ // We use thunks for the slow path. That thunk checks the reference
+ // and jumps to the entrypoint if needed. If the holder is not gray,
+ // it issues a load-load memory barrier and returns to the original
+ // reference load.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkReg00
+ // // AKA &art_quick_read_barrier_mark_introspection.
+ // if (temp != nullptr) {
+ // temp = &field_array_thunk<holder_reg>
+ // temp()
+ // }
+ // not_gray_return_address:
+ // // If the offset is too large to fit into the lw instruction, we
+ // // use an adjusted base register (TMP) here. This register
+ // // receives bits 16 ... 31 of the offset before the thunk invocation
+ // // and the thunk benefits from it.
+ // HeapReference<mirror::Object> reference = *(obj+offset); // Original reference load.
+ // gray_return_address:
+
+ DCHECK(temp.IsInvalid());
+ bool short_offset = IsInt<16>(static_cast<int32_t>(offset));
+ const int32_t entry_point_offset =
+ Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(0);
+ // There may have or may have not been a null check if the field offset is smaller than
+ // the page size.
+ // There must've been a null check in case it's actually a load from an array.
+ // We will, however, perform an explicit null check in the thunk as it's easier to
+ // do it than not.
+ if (instruction->IsArrayGet()) {
+ DCHECK(!needs_null_check);
+ }
+ const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, short_offset);
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
+ GpuRegister ref_reg = ref.AsRegister<GpuRegister>();
+ if (short_offset) {
+ __ Beqzc(T9, 2); // Skip jialc.
+ __ Nop(); // In forbidden slot.
+ __ Jialc(T9, thunk_disp);
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, offset); // Single instruction.
+ } else {
+ int16_t offset_low = Low16Bits(offset);
+ int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign extension in lwu.
+ __ Beqz(T9, 2); // Skip jialc.
+ __ Daui(TMP, obj, offset_high); // In delay slot.
+ __ Jialc(T9, thunk_disp);
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, offset_low); // Single instruction.
+ }
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ __ MaybeUnpoisonHeapReference(ref_reg);
+ return;
+ }
+
// /* HeapReference<Object> */ ref = *(obj + offset)
Location no_index = Location::NoLocation();
ScaleFactor no_scale_factor = TIMES_1;
@@ -4481,9 +4665,57 @@ void CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in
static_assert(
sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ ScaleFactor scale_factor = TIMES_4;
+
+ if (kBakerReadBarrierThunksEnableForArrays) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (T9) the read barrier mark introspection entrypoint.
+ // If `temp` is null, it means that `GetIsGcMarking()` is false, and
+ // vice versa.
+ //
+ // We use thunks for the slow path. That thunk checks the reference
+ // and jumps to the entrypoint if needed. If the holder is not gray,
+ // it issues a load-load memory barrier and returns to the original
+ // reference load.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkReg00
+ // // AKA &art_quick_read_barrier_mark_introspection.
+ // if (temp != nullptr) {
+ // temp = &field_array_thunk<holder_reg>
+ // temp()
+ // }
+ // not_gray_return_address:
+ // // The element address is pre-calculated in the TMP register before the
+ // // thunk invocation and the thunk benefits from it.
+ // HeapReference<mirror::Object> reference = data[index]; // Original reference load.
+ // gray_return_address:
+
+ DCHECK(temp.IsInvalid());
+ DCHECK(index.IsValid());
+ const int32_t entry_point_offset =
+ Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(0);
+ // We will not do the explicit null check in the thunk as some form of a null check
+ // must've been done earlier.
+ DCHECK(!needs_null_check);
+ const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset */ false);
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
+ __ Beqz(T9, 2); // Skip jialc.
+ GpuRegister ref_reg = ref.AsRegister<GpuRegister>();
+ GpuRegister index_reg = index.AsRegister<GpuRegister>();
+ __ Dlsa(TMP, index_reg, obj, scale_factor); // In delay slot.
+ __ Jialc(T9, thunk_disp);
+ // /* HeapReference<Object> */ ref = *(obj + data_offset + (index << scale_factor))
+ DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))) << data_offset;
+ __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, data_offset); // Single instruction.
+ __ MaybeUnpoisonHeapReference(ref_reg);
+ return;
+ }
+
// /* HeapReference<Object> */ ref =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- ScaleFactor scale_factor = TIMES_4;
GenerateReferenceLoadWithBakerReadBarrier(instruction,
ref,
obj,
@@ -5278,8 +5510,13 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
GpuRegister temp = non_baker_read_barrier
? out
: locations->GetTemp(0).AsRegister<GpuRegister>();
- codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high, temp, info_low);
- GenerateGcRootFieldLoad(cls, out_loc, temp, /* placeholder */ 0x5678, read_barrier_option);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high, temp);
+ GenerateGcRootFieldLoad(cls,
+ out_loc,
+ temp,
+ /* placeholder */ 0x5678,
+ read_barrier_option,
+ &info_low->label);
generate_null_check = true;
break;
}
@@ -5399,12 +5636,13 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
GpuRegister temp = non_baker_read_barrier
? out
: locations->GetTemp(0).AsRegister<GpuRegister>();
- codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, temp, info_low);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, temp);
GenerateGcRootFieldLoad(load,
out_loc,
temp,
/* placeholder */ 0x5678,
- kCompilerReadBarrierOption);
+ kCompilerReadBarrierOption,
+ &info_low->label);
SlowPathCodeMIPS64* slow_path =
new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load, info_high);
codegen_->AddSlowPath(slow_path);
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index c94cc93dad..d03a9eabd4 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -281,7 +281,8 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
Location root,
GpuRegister obj,
uint32_t offset,
- ReadBarrierOption read_barrier_option);
+ ReadBarrierOption read_barrier_option,
+ Mips64Label* label_low = nullptr);
void GenerateTestAndBranch(HInstruction* instruction,
size_t condition_input_index,
@@ -592,7 +593,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high,
GpuRegister out,
- PcRelativePatchInfo* info_low);
+ PcRelativePatchInfo* info_low = nullptr);
void PatchJitRootUse(uint8_t* code,
const uint8_t* roots_data,
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index 01304ac35b..8fcceedcf6 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -227,14 +227,6 @@ inline Location LocationFrom(const vixl::aarch32::SRegister& low,
return Location::FpuRegisterPairLocation(low.GetCode(), high.GetCode());
}
-inline bool ShifterOperandSupportsExtension(HInstruction* instruction) {
- DCHECK(HasShifterOperand(instruction, kArm));
- // TODO: HAdd applied to the other integral types could make use of
- // the SXTAB, SXTAH, UXTAB and UXTAH instructions.
- return instruction->GetType() == Primitive::kPrimLong &&
- (instruction->IsAdd() || instruction->IsSub());
-}
-
} // namespace helpers
} // namespace arm
} // namespace art
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 839f328a4f..8054140924 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -664,10 +664,7 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction,
// TODO: remove redundant constructor fences (b/36656456).
if (RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)) {
// Compiling instance constructor.
- if (kIsDebugBuild) {
- std::string method_name = graph_->GetMethodName();
- CHECK_EQ(std::string("<init>"), method_name);
- }
+ DCHECK_STREQ("<init>", graph_->GetMethodName());
HInstruction* fence_target = current_this_parameter_;
DCHECK(fence_target != nullptr);
@@ -710,29 +707,18 @@ static InvokeType GetInvokeTypeFromOpCode(Instruction::Code opcode) {
ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) {
ScopedObjectAccess soa(Thread::Current());
- StackHandleScope<2> hs(soa.Self());
ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
- Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
- // We fetch the referenced class eagerly (that is, the class pointed by in the MethodId
- // at method_idx), as `CanAccessResolvedMethod` expects it be be in the dex cache.
- Handle<mirror::Class> methods_class(hs.NewHandle(class_linker->ResolveReferencedClassOfMethod(
- method_idx, dex_compilation_unit_->GetDexCache(), class_loader)));
-
- if (UNLIKELY(methods_class == nullptr)) {
- // Clean up any exception left by type resolution.
- soa.Self()->ClearException();
- return nullptr;
- }
- ArtMethod* resolved_method = class_linker->ResolveMethod<ClassLinker::kForceICCECheck>(
- *dex_compilation_unit_->GetDexFile(),
- method_idx,
- dex_compilation_unit_->GetDexCache(),
- class_loader,
- /* referrer */ nullptr,
- invoke_type);
+ ArtMethod* resolved_method =
+ class_linker->ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>(
+ *dex_compilation_unit_->GetDexFile(),
+ method_idx,
+ dex_compilation_unit_->GetDexCache(),
+ class_loader,
+ graph_->GetArtMethod(),
+ invoke_type);
if (UNLIKELY(resolved_method == nullptr)) {
// Clean up any exception left by type resolution.
@@ -740,17 +726,14 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in
return nullptr;
}
- // Check access. The class linker has a fast path for looking into the dex cache
- // and does not check the access if it hits it.
- if (compiling_class == nullptr) {
+ // The referrer may be unresolved for AOT if we're compiling a class that cannot be
+ // resolved because, for example, we don't find a superclass in the classpath.
+ if (graph_->GetArtMethod() == nullptr) {
+ // The class linker cannot check access without a referrer, so we have to do it.
+ // Fall back to HInvokeUnresolved if the method isn't public.
if (!resolved_method->IsPublic()) {
return nullptr;
}
- } else if (!compiling_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(),
- resolved_method,
- dex_compilation_unit_->GetDexCache().Get(),
- method_idx)) {
- return nullptr;
}
// We have to special case the invoke-super case, as ClassLinker::ResolveMethod does not.
@@ -758,19 +741,26 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in
// make this an invoke-unresolved to handle cross-dex invokes or abstract super methods, both of
// which require runtime handling.
if (invoke_type == kSuper) {
+ ObjPtr<mirror::Class> compiling_class = GetCompilingClass();
if (compiling_class == nullptr) {
// We could not determine the method's class we need to wait until runtime.
DCHECK(Runtime::Current()->IsAotCompiler());
return nullptr;
}
- if (!methods_class->IsAssignableFrom(compiling_class.Get())) {
+ ObjPtr<mirror::Class> referenced_class = class_linker->LookupResolvedType(
+ *dex_compilation_unit_->GetDexFile(),
+ dex_compilation_unit_->GetDexFile()->GetMethodId(method_idx).class_idx_,
+ dex_compilation_unit_->GetDexCache().Get(),
+ class_loader.Get());
+ DCHECK(referenced_class != nullptr); // We have already resolved a method from this class.
+ if (!referenced_class->IsAssignableFrom(compiling_class)) {
// We cannot statically determine the target method. The runtime will throw a
// NoSuchMethodError on this one.
return nullptr;
}
ArtMethod* actual_method;
- if (methods_class->IsInterface()) {
- actual_method = methods_class->FindVirtualMethodForInterfaceSuper(
+ if (referenced_class->IsInterface()) {
+ actual_method = referenced_class->FindVirtualMethodForInterfaceSuper(
resolved_method, class_linker->GetImagePointerSize());
} else {
uint16_t vtable_index = resolved_method->GetMethodIndex();
@@ -797,12 +787,6 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in
resolved_method = actual_method;
}
- // Check for incompatible class changes. The class linker has a fast path for
- // looking into the dex cache and does not check incompatible class changes if it hits it.
- if (resolved_method->CheckIncompatibleClassChange(invoke_type)) {
- return nullptr;
- }
-
return resolved_method;
}
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index d14716601c..f2a8cc0333 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -1867,33 +1867,35 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction)
ArtMethod* method = nullptr;
switch (source_component_type) {
case Primitive::kPrimBoolean:
- method = system->FindDeclaredDirectMethod("arraycopy", "([ZI[ZII)V", image_size);
+ method = system->FindClassMethod("arraycopy", "([ZI[ZII)V", image_size);
break;
case Primitive::kPrimByte:
- method = system->FindDeclaredDirectMethod("arraycopy", "([BI[BII)V", image_size);
+ method = system->FindClassMethod("arraycopy", "([BI[BII)V", image_size);
break;
case Primitive::kPrimChar:
- method = system->FindDeclaredDirectMethod("arraycopy", "([CI[CII)V", image_size);
+ method = system->FindClassMethod("arraycopy", "([CI[CII)V", image_size);
break;
case Primitive::kPrimShort:
- method = system->FindDeclaredDirectMethod("arraycopy", "([SI[SII)V", image_size);
+ method = system->FindClassMethod("arraycopy", "([SI[SII)V", image_size);
break;
case Primitive::kPrimInt:
- method = system->FindDeclaredDirectMethod("arraycopy", "([II[III)V", image_size);
+ method = system->FindClassMethod("arraycopy", "([II[III)V", image_size);
break;
case Primitive::kPrimFloat:
- method = system->FindDeclaredDirectMethod("arraycopy", "([FI[FII)V", image_size);
+ method = system->FindClassMethod("arraycopy", "([FI[FII)V", image_size);
break;
case Primitive::kPrimLong:
- method = system->FindDeclaredDirectMethod("arraycopy", "([JI[JII)V", image_size);
+ method = system->FindClassMethod("arraycopy", "([JI[JII)V", image_size);
break;
case Primitive::kPrimDouble:
- method = system->FindDeclaredDirectMethod("arraycopy", "([DI[DII)V", image_size);
+ method = system->FindClassMethod("arraycopy", "([DI[DII)V", image_size);
break;
default:
LOG(FATAL) << "Unreachable";
}
DCHECK(method != nullptr);
+ DCHECK(method->IsStatic());
+ DCHECK(method->GetDeclaringClass() == system);
invoke->SetResolvedMethod(method);
// Sharpen the new invoke. Note that we do not update the dex method index of
// the invoke, as we would need to look it up in the current dex file, and it
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index fe22595258..a025fb10ce 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -29,8 +29,6 @@ using helpers::HasShifterOperand;
namespace arm {
-using helpers::ShifterOperandSupportsExtension;
-
bool InstructionSimplifierArmVisitor::TryMergeIntoShifterOperand(HInstruction* use,
HInstruction* bitfield_op,
bool do_merge) {
@@ -76,7 +74,7 @@ bool InstructionSimplifierArmVisitor::TryMergeIntoShifterOperand(HInstruction* u
: kMaxLongShiftDistance;
if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
- if (!ShifterOperandSupportsExtension(use)) {
+ if (!use->IsAdd() && (!use->IsSub() || use->GetType() != Primitive::kPrimLong)) {
return false;
}
// Shift by 1 is a special case that results in the same number and type of instructions
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 98332d35fb..f172e16ff9 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -525,8 +525,8 @@ void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* inst
// Use a null loader. We should probably use the compiling method's class loader,
// but then we would need to pass it to RTPVisitor just for this debug check. Since
// the method is from the String class, the null loader is good enough.
- Handle<mirror::ClassLoader> loader;
- ArtMethod* method = cl->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+ Handle<mirror::ClassLoader> loader(hs.NewHandle<mirror::ClassLoader>(nullptr));
+ ArtMethod* method = cl->ResolveMethod<ClassLinker::ResolveMode::kNoChecks>(
dex_file, invoke->GetDexMethodIndex(), dex_cache, loader, nullptr, kDirect);
DCHECK(method != nullptr);
mirror::Class* declaring_class = method->GetDeclaringClass();
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
index e78cd78aa2..f025c0a2d4 100644
--- a/compiler/optimizing/scheduler_arm.cc
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -167,22 +167,346 @@ void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) {
HandleShiftLatencies(instr);
}
-void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) {
- switch (instr->GetLeft()->GetType()) {
- case Primitive::kPrimLong:
- last_visited_internal_latency_ = 4 * kArmIntegerOpLatency;
+void SchedulingLatencyVisitorARM::HandleGenerateConditionWithZero(IfCondition condition) {
+ switch (condition) {
+ case kCondEQ:
+ case kCondBE:
+ case kCondNE:
+ case kCondA:
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- last_visited_internal_latency_ = 2 * kArmFloatingPointOpLatency;
+ case kCondGE:
+ // Mvn
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ FALLTHROUGH_INTENDED;
+ case kCondLT:
+ // Lsr
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case kCondAE:
+ // Trivially true.
+ // Mov
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case kCondB:
+ // Trivially false.
+ // Mov
+ last_visited_latency_ = kArmIntegerOpLatency;
break;
default:
- last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+ LOG(FATAL) << "Unexpected condition " << condition;
+ UNREACHABLE();
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateLongTestConstant(HCondition* condition) {
+ DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
+
+ IfCondition cond = condition->GetCondition();
+
+ HInstruction* right = condition->InputAt(1);
+
+ int64_t value = Uint64ConstantFrom(right);
+
+ // Comparisons against 0 are common enough, so codegen has special handling for them.
+ if (value == 0) {
+ switch (cond) {
+ case kCondNE:
+ case kCondA:
+ case kCondEQ:
+ case kCondBE:
+ // Orrs
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ return;
+ case kCondLT:
+ case kCondGE:
+ // Cmp
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ return;
+ case kCondB:
+ case kCondAE:
+ // Cmp
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ return;
+ default:
+ break;
+ }
+ }
+
+ switch (cond) {
+ case kCondEQ:
+ case kCondNE:
+ case kCondB:
+ case kCondBE:
+ case kCondA:
+ case kCondAE: {
+ // Cmp, IT, Cmp
+ last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
+ break;
+ }
+ case kCondLE:
+ case kCondGT:
+ // Trivially true or false.
+ if (value == std::numeric_limits<int64_t>::max()) {
+ // Cmp
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ break;
+ }
+ FALLTHROUGH_INTENDED;
+ case kCondGE:
+ case kCondLT: {
+ // Cmp, Sbcs
+ last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateLongTest(HCondition* condition) {
+ DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
+
+ IfCondition cond = condition->GetCondition();
+
+ switch (cond) {
+ case kCondEQ:
+ case kCondNE:
+ case kCondB:
+ case kCondBE:
+ case kCondA:
+ case kCondAE: {
+ // Cmp, IT, Cmp
+ last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
break;
+ }
+ case kCondLE:
+ case kCondGT:
+ case kCondGE:
+ case kCondLT: {
+ // Cmp, Sbcs
+ last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
}
+}
+
+// The GenerateTest series of function all counted as internal latency.
+void SchedulingLatencyVisitorARM::HandleGenerateTest(HCondition* condition) {
+ const Primitive::Type type = condition->GetLeft()->GetType();
+
+ if (type == Primitive::kPrimLong) {
+ condition->InputAt(1)->IsConstant()
+ ? HandleGenerateLongTestConstant(condition)
+ : HandleGenerateLongTest(condition);
+ } else if (Primitive::IsFloatingPointType(type)) {
+ // GenerateVcmp + Vmrs
+ last_visited_internal_latency_ += 2 * kArmFloatingPointOpLatency;
+ } else {
+ // Cmp
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ }
+}
+
+bool SchedulingLatencyVisitorARM::CanGenerateTest(HCondition* condition) {
+ if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
+ HInstruction* right = condition->InputAt(1);
+
+ if (right->IsConstant()) {
+ IfCondition c = condition->GetCondition();
+ const uint64_t value = Uint64ConstantFrom(right);
+
+ if (c < kCondLT || c > kCondGE) {
+ if (value != 0) {
+ return false;
+ }
+ } else if (c == kCondLE || c == kCondGT) {
+ if (value < std::numeric_limits<int64_t>::max() &&
+ !codegen_->GetAssembler()->ShifterOperandCanHold(SBC, High32Bits(value + 1), kCcSet)) {
+ return false;
+ }
+ } else if (!codegen_->GetAssembler()->ShifterOperandCanHold(SBC, High32Bits(value), kCcSet)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateConditionGeneric(HCondition* cond) {
+ HandleGenerateTest(cond);
+
+ // Unlike codegen pass, we cannot check 'out' register IsLow() here,
+ // because scheduling is before liveness(location builder) and register allocator,
+ // so we can only choose to follow one path of codegen by assuming otu.IsLow() is true.
+ last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
}
+void SchedulingLatencyVisitorARM::HandleGenerateEqualLong(HCondition* cond) {
+ DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
+
+ IfCondition condition = cond->GetCondition();
+
+ last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
+
+ if (condition == kCondNE) {
+ // Orrs, IT, Mov
+ last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
+ } else {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ HandleGenerateConditionWithZero(condition);
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateLongComparesAndJumps() {
+ last_visited_internal_latency_ += 4 * kArmIntegerOpLatency;
+ last_visited_internal_latency_ += kArmBranchLatency;
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateConditionLong(HCondition* cond) {
+ DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
+
+ IfCondition condition = cond->GetCondition();
+ HInstruction* right = cond->InputAt(1);
+
+ if (right->IsConstant()) {
+ // Comparisons against 0 are common enough, so codegen has special handling for them.
+ if (Uint64ConstantFrom(right) == 0) {
+ switch (condition) {
+ case kCondNE:
+ case kCondA:
+ case kCondEQ:
+ case kCondBE:
+ // Orr
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ HandleGenerateConditionWithZero(condition);
+ return;
+ case kCondLT:
+ case kCondGE:
+ FALLTHROUGH_INTENDED;
+ case kCondAE:
+ case kCondB:
+ HandleGenerateConditionWithZero(condition);
+ return;
+ case kCondLE:
+ case kCondGT:
+ default:
+ break;
+ }
+ }
+ }
+
+ if ((condition == kCondEQ || condition == kCondNE) &&
+ !CanGenerateTest(cond)) {
+ HandleGenerateEqualLong(cond);
+ return;
+ }
+
+ if (CanGenerateTest(cond)) {
+ HandleGenerateConditionGeneric(cond);
+ return;
+ }
+
+ HandleGenerateLongComparesAndJumps();
+
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ last_visited_latency_ = kArmBranchLatency;;
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond) {
+ const Primitive::Type type = cond->GetLeft()->GetType();
+
+ DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+
+ if (type == Primitive::kPrimLong) {
+ HandleGenerateConditionLong(cond);
+ return;
+ }
+
+ IfCondition condition = cond->GetCondition();
+ HInstruction* right = cond->InputAt(1);
+ int64_t value;
+
+ if (right->IsConstant()) {
+ value = Uint64ConstantFrom(right);
+
+ // Comparisons against 0 are common enough, so codegen has special handling for them.
+ if (value == 0) {
+ switch (condition) {
+ case kCondNE:
+ case kCondA:
+ case kCondEQ:
+ case kCondBE:
+ case kCondLT:
+ case kCondGE:
+ case kCondAE:
+ case kCondB:
+ HandleGenerateConditionWithZero(condition);
+ return;
+ case kCondLE:
+ case kCondGT:
+ default:
+ break;
+ }
+ }
+ }
+
+ if (condition == kCondEQ || condition == kCondNE) {
+ if (condition == kCondNE) {
+ // CMP, IT, MOV.ne
+ last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ HandleGenerateConditionWithZero(condition);
+ }
+ return;
+ }
+
+ HandleGenerateConditionGeneric(cond);
+}
+
+void SchedulingLatencyVisitorARM::HandleCondition(HCondition* cond) {
+ if (cond->IsEmittedAtUseSite()) {
+ last_visited_latency_ = 0;
+ return;
+ }
+
+ const Primitive::Type type = cond->GetLeft()->GetType();
+
+ if (Primitive::IsFloatingPointType(type)) {
+ HandleGenerateConditionGeneric(cond);
+ return;
+ }
+
+ DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+
+ const IfCondition condition = cond->GetCondition();
+
+ if (type == Primitive::kPrimBoolean &&
+ cond->GetRight()->GetType() == Primitive::kPrimBoolean &&
+ (condition == kCondEQ || condition == kCondNE)) {
+ if (condition == kCondEQ) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ }
+ last_visited_latency_ = kArmIntegerOpLatency;
+ return;
+ }
+
+ HandleGenerateConditionIntegralOrNonPrimitive(cond);
+}
+
+void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) {
+ HandleCondition(instr);
+}
+
void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) {
Primitive::Type type = instr->InputAt(0)->GetType();
switch (type) {
@@ -269,7 +593,6 @@ void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifte
const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
if (instruction->GetType() == Primitive::kPrimInt) {
- DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind));
HandleGenerateDataProcInstruction();
} else {
DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
index a9f2295c35..fe274d29f9 100644
--- a/compiler/optimizing/scheduler_arm.h
+++ b/compiler/optimizing/scheduler_arm.h
@@ -109,6 +109,17 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
#undef DECLARE_VISIT_INSTRUCTION
private:
+ bool CanGenerateTest(HCondition* cond);
+ void HandleGenerateConditionWithZero(IfCondition cond);
+ void HandleGenerateLongTestConstant(HCondition* cond);
+ void HandleGenerateLongTest(HCondition* cond);
+ void HandleGenerateLongComparesAndJumps();
+ void HandleGenerateTest(HCondition* cond);
+ void HandleGenerateConditionGeneric(HCondition* cond);
+ void HandleGenerateEqualLong(HCondition* cond);
+ void HandleGenerateConditionLong(HCondition* cond);
+ void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond);
+ void HandleCondition(HCondition* instr);
void HandleBinaryOperationLantencies(HBinaryOperation* instr);
void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
void HandleShiftLatencies(HBinaryOperation* instr);