ARM: VIXL32: Implement Invoke, LoadClass, LoadString dispatch.
Implemented dispatch optimizations for InvokeStaticOrDirect,
LoadClass, LoadString (excluding cases that use Literals).
Performed a cleanup of VIXL backend.
Test: export ART_USE_VIXL_ARM_BACKEND=true && \
mma test-art-host dist && mma test-art-target dist
Change-Id: Ib37a6b7e7657196b13caec999d190be747857c1d
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 1ca439e..3b7751f 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -63,7 +63,9 @@
// We expected this for both core and fpu register pairs.
return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
}
-
+// Use a local definition to prevent copying mistakes.
+static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize);
+static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr size_t kArmInstrMaxSizeInBytes = 4u;
static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
@@ -438,6 +440,62 @@
DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL);
};
+class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+ explicit LoadStringSlowPathARMVIXL(HLoadString* instruction)
+ : SlowPathCodeARMVIXL(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+ HLoadString* load = instruction_->AsLoadString();
+ const uint32_t string_index = load->GetStringIndex().index_;
+ vixl32::Register out = OutputRegister(load);
+ vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+ constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier);
+
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ // In the unlucky case that the `temp` is R0, we preserve the address in `out` across
+ // the kSaveEverything call (or use `out` for the address after non-kSaveEverything call).
+ bool temp_is_r0 = (temp.Is(calling_convention.GetRegisterAt(0)));
+ vixl32::Register entry_address = temp_is_r0 ? out : temp;
+ DCHECK(!entry_address.Is(calling_convention.GetRegisterAt(0)));
+ if (call_saves_everything_except_r0 && temp_is_r0) {
+ __ Mov(entry_address, temp);
+ }
+
+ __ Mov(calling_convention.GetRegisterAt(0), string_index);
+ arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+
+ // Store the resolved String to the .bss entry.
+ if (call_saves_everything_except_r0) {
+ // The string entry address was preserved in `entry_address` thanks to kSaveEverything.
+ __ Str(r0, MemOperand(entry_address));
+ } else {
+ // For non-Baker read barrier, we need to re-calculate the address of the string entry.
+ CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
+ arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
+ arm_codegen->EmitMovwMovtPlaceholder(labels, out);
+ __ Str(r0, MemOperand(entry_address));
+ }
+
+ arm_codegen->Move32(locations->Out(), LocationFrom(r0));
+ RestoreLiveRegisters(codegen, locations);
+
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARMVIXL"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARMVIXL);
+};
+
class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal)
@@ -630,9 +688,30 @@
return mask;
}
-size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- GetAssembler()->LoadSFromOffset(vixl32::SRegister(reg_id), sp, stack_index);
- return kArmWordSize;
+// Saves the register in the stack. Returns the size taken on stack.
+size_t CodeGeneratorARMVIXL::SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
+ uint32_t reg_id ATTRIBUTE_UNUSED) {
+ TODO_VIXL32(FATAL);
+ return 0;
+}
+
+// Restores the register from the stack. Returns the size taken on stack.
+size_t CodeGeneratorARMVIXL::RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
+ uint32_t reg_id ATTRIBUTE_UNUSED) {
+ TODO_VIXL32(FATAL);
+ return 0;
+}
+
+size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
+ uint32_t reg_id ATTRIBUTE_UNUSED) {
+ TODO_VIXL32(FATAL);
+ return 0;
+}
+
+size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
+ uint32_t reg_id ATTRIBUTE_UNUSED) {
+ TODO_VIXL32(FATAL);
+ return 0;
}
#undef __
@@ -655,7 +734,11 @@
instruction_visitor_(graph, this),
move_resolver_(graph->GetArena(), this),
assembler_(graph->GetArena()),
- isa_features_(isa_features) {
+ isa_features_(isa_features),
+ relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
// Always save the LR register to mimic Quick.
AddAllocatedRegister(Location::RegisterLocation(LR));
// Give d14 and d15 as scratch registers to VIXL.
@@ -853,6 +936,116 @@
__ Bind(GetLabelOf(block));
}
+Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(Primitive::Type type) {
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimNot: {
+ uint32_t index = gp_index_++;
+ uint32_t stack_index = stack_index_++;
+ if (index < calling_convention.GetNumberOfRegisters()) {
+ return LocationFrom(calling_convention.GetRegisterAt(index));
+ } else {
+ return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
+ }
+ }
+
+ case Primitive::kPrimLong: {
+ uint32_t index = gp_index_;
+ uint32_t stack_index = stack_index_;
+ gp_index_ += 2;
+ stack_index_ += 2;
+ if (index + 1 < calling_convention.GetNumberOfRegisters()) {
+ if (calling_convention.GetRegisterAt(index).Is(r1)) {
+ // Skip R1, and use R2_R3 instead.
+ gp_index_++;
+ index++;
+ }
+ }
+ if (index + 1 < calling_convention.GetNumberOfRegisters()) {
+ DCHECK_EQ(calling_convention.GetRegisterAt(index).GetCode() + 1,
+ calling_convention.GetRegisterAt(index + 1).GetCode());
+
+ return LocationFrom(calling_convention.GetRegisterAt(index),
+ calling_convention.GetRegisterAt(index + 1));
+ } else {
+ return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
+ }
+ }
+
+ case Primitive::kPrimFloat: {
+ uint32_t stack_index = stack_index_++;
+ if (float_index_ % 2 == 0) {
+ float_index_ = std::max(double_index_, float_index_);
+ }
+ if (float_index_ < calling_convention.GetNumberOfFpuRegisters()) {
+ return LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
+ } else {
+ return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index));
+ }
+ }
+
+ case Primitive::kPrimDouble: {
+ double_index_ = std::max(double_index_, RoundUp(float_index_, 2));
+ uint32_t stack_index = stack_index_;
+ stack_index_ += 2;
+ if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) {
+ uint32_t index = double_index_;
+ double_index_ += 2;
+ Location result = LocationFrom(
+ calling_convention.GetFpuRegisterAt(index),
+ calling_convention.GetFpuRegisterAt(index + 1));
+ DCHECK(ExpectedPairLayout(result));
+ return result;
+ } else {
+ return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index));
+ }
+ }
+
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unexpected parameter type " << type;
+ break;
+ }
+ return Location::NoLocation();
+}
+
+Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(Primitive::Type type) const {
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimNot: {
+ return LocationFrom(r0);
+ }
+
+ case Primitive::kPrimFloat: {
+ return LocationFrom(s0);
+ }
+
+ case Primitive::kPrimLong: {
+ return LocationFrom(r0, r1);
+ }
+
+ case Primitive::kPrimDouble: {
+ return LocationFrom(s0, s1);
+ }
+
+ case Primitive::kPrimVoid:
+ return Location::NoLocation();
+ }
+
+ UNREACHABLE();
+}
+
+Location InvokeDexCallingConventionVisitorARMVIXL::GetMethodLocation() const {
+ return LocationFrom(kMethodRegister);
+}
+
void CodeGeneratorARMVIXL::Move32(Location destination, Location source) {
if (source.Equals(destination)) {
return;
@@ -1270,6 +1463,19 @@
/* false_target */ nullptr);
}
+void LocationsBuilderARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+ LocationSummary* locations = new (GetGraph()->GetArena())
+ LocationSummary(flag, LocationSummary::kNoCall);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+ GetAssembler()->LoadFromOffset(kLoadWord,
+ OutputRegister(flag),
+ sp,
+ codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
+}
+
void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
if (Primitive::IsFloatingPointType(select->GetType())) {
@@ -1575,7 +1781,10 @@
HandleInvoke(invoke);
- // TODO(VIXL): invoke->HasPcRelativeDexCache()
+ // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
+ if (invoke->HasPcRelativeDexCache()) {
+ invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
+ }
}
static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
@@ -1597,15 +1806,13 @@
}
LocationSummary* locations = invoke->GetLocations();
- DCHECK(locations->HasTemps());
- codegen_->GenerateStaticOrDirectCall(invoke, locations->GetTemp(0));
- // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the
- // previous instruction.
+ codegen_->GenerateStaticOrDirectCall(
+ invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
- InvokeDexCallingConventionVisitorARM calling_convention_visitor;
+ InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
}
@@ -4985,12 +5192,37 @@
TODO_VIXL32(FATAL);
}
-// Check if the desired_class_load_kind is supported. If it is, return it,
-// otherwise return a fall-back kind that should be used instead.
HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
- HLoadClass::LoadKind desired_class_load_kind ATTRIBUTE_UNUSED) {
- // TODO(VIXL): Implement optimized code paths.
- return HLoadClass::LoadKind::kDexCacheViaMethod;
+ HLoadClass::LoadKind desired_class_load_kind) {
+ switch (desired_class_load_kind) {
+ case HLoadClass::LoadKind::kReferrersClass:
+ break;
+ case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
+ // TODO(VIXL): Enable it back when literal pools are fixed in VIXL.
+ return HLoadClass::LoadKind::kDexCacheViaMethod;
+ case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+ DCHECK(GetCompilerOptions().GetCompilePic());
+ break;
+ case HLoadClass::LoadKind::kBootImageAddress:
+ // TODO(VIXL): Enable it back when literal pools are fixed in VIXL.
+ return HLoadClass::LoadKind::kDexCacheViaMethod;
+ case HLoadClass::LoadKind::kDexCacheAddress:
+ // TODO(VIXL): Enable it back when literal pools are fixed in VIXL.
+ return HLoadClass::LoadKind::kDexCacheViaMethod;
+ case HLoadClass::LoadKind::kDexCachePcRelative:
+ DCHECK(!Runtime::Current()->UseJitCompilation());
+ // We disable pc-relative load when there is an irreducible loop, as the optimization
+ // is incompatible with it.
+ // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods
+ // with irreducible loops.
+ if (GetGraph()->HasIrreducibleLoops()) {
+ return HLoadClass::LoadKind::kDexCacheViaMethod;
+ }
+ break;
+ case HLoadClass::LoadKind::kDexCacheViaMethod:
+ break;
+ }
+ return desired_class_load_kind;
}
void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
@@ -5004,11 +5236,15 @@
return;
}
- // TODO(VIXL): read barrier code.
- LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+ const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+ LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+ if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
+ TODO_VIXL32(FATAL);
+ }
+
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
load_kind == HLoadClass::LoadKind::kDexCacheViaMethod ||
@@ -5030,7 +5266,9 @@
Location out_loc = locations->Out();
vixl32::Register out = OutputRegister(cls);
- // TODO(VIXL): read barrier code.
+ const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+ ? kWithoutReadBarrier
+ : kCompilerReadBarrierOption;
bool generate_null_check = false;
switch (cls->GetLoadKind()) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -5042,7 +5280,35 @@
out_loc,
current_method,
ArtMethod::DeclaringClassOffset().Int32Value(),
- kEmitCompilerReadBarrier);
+ read_barrier_option);
+ break;
+ }
+ case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
+ TODO_VIXL32(FATAL);
+ break;
+ }
+ case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
+ CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
+ codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+ codegen_->EmitMovwMovtPlaceholder(labels, out);
+ break;
+ }
+ case HLoadClass::LoadKind::kBootImageAddress: {
+ TODO_VIXL32(FATAL);
+ break;
+ }
+ case HLoadClass::LoadKind::kDexCacheAddress: {
+ TODO_VIXL32(FATAL);
+ break;
+ }
+ case HLoadClass::LoadKind::kDexCachePcRelative: {
+ vixl32::Register base_reg = InputRegisterAt(cls, 0);
+ HArmDexCacheArraysBase* base = cls->InputAt(0)->AsArmDexCacheArraysBase();
+ int32_t offset = cls->GetDexCacheElementOffset() - base->GetElementOffset();
+ // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset)
+ GenerateGcRootFieldLoad(cls, out_loc, base_reg, offset, read_barrier_option);
+ generate_null_check = !cls->IsInDexCache();
break;
}
case HLoadClass::LoadKind::kDexCacheViaMethod: {
@@ -5054,7 +5320,7 @@
GetAssembler()->LoadFromOffset(kLoadWord, out, current_method, resolved_types_offset);
// /* GcRoot<mirror::Class> */ out = out[type_index]
size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_);
- GenerateGcRootFieldLoad(cls, out_loc, out, offset, kEmitCompilerReadBarrier);
+ GenerateGcRootFieldLoad(cls, out_loc, out, offset, read_barrier_option);
generate_null_check = !cls->IsInDexCache();
break;
}
@@ -5114,37 +5380,101 @@
__ Bind(slow_path->GetExitLabel());
}
-// Check if the desired_string_load_kind is supported. If it is, return it,
-// otherwise return a fall-back kind that should be used instead.
HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
- HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) {
- // TODO(VIXL): Implement optimized code paths. For now we always use the simpler fallback code.
- return HLoadString::LoadKind::kDexCacheViaMethod;
+ HLoadString::LoadKind desired_string_load_kind) {
+ switch (desired_string_load_kind) {
+ case HLoadString::LoadKind::kBootImageLinkTimeAddress:
+ // TODO(VIXL): Implement missing optimization.
+ return HLoadString::LoadKind::kDexCacheViaMethod;
+ case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ DCHECK(GetCompilerOptions().GetCompilePic());
+ break;
+ case HLoadString::LoadKind::kBootImageAddress:
+ // TODO(VIXL): Implement missing optimization.
+ return HLoadString::LoadKind::kDexCacheViaMethod;
+ case HLoadString::LoadKind::kBssEntry:
+ DCHECK(!Runtime::Current()->UseJitCompilation());
+ break;
+ case HLoadString::LoadKind::kJitTableAddress:
+ DCHECK(Runtime::Current()->UseJitCompilation());
+ // TODO(VIXL): Implement missing optimization.
+ return HLoadString::LoadKind::kDexCacheViaMethod;
+ case HLoadString::LoadKind::kDexCacheViaMethod:
+ break;
+ }
+ return desired_string_load_kind;
}
void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
- LocationSummary::CallKind call_kind = load->NeedsEnvironment()
- ? LocationSummary::kCallOnMainOnly
- : LocationSummary::kNoCall;
+ LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-
- // TODO(VIXL): Implement optimized code paths.
- // See InstructionCodeGeneratorARMVIXL::VisitLoadString.
HLoadString::LoadKind load_kind = load->GetLoadKind();
if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
- locations->SetInAt(0, Location::RequiresRegister());
- // TODO(VIXL): Use InvokeRuntimeCallingConventionARMVIXL instead.
locations->SetOut(LocationFrom(r0));
} else {
locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadString::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the pResolveString and/or marking to save everything, including temps.
+ // Note that IP may theoretically be clobbered by saving/restoring the live register
+ // (only one thanks to the custom calling convention), so we request a different temp.
+ locations->AddTemp(Location::RequiresRegister());
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
+ // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
+ // that the the kPrimNot result register is the same as the first argument register.
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
}
}
void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) {
- // TODO(VIXL): Implement optimized code paths.
- // We implemented the simplest solution to get first ART tests passing, we deferred the
- // optimized path until later, we should implement it using ARM64 implementation as a
- // reference. The same related to LocationsBuilderARMVIXL::VisitLoadString.
+ LocationSummary* locations = load->GetLocations();
+ Location out_loc = locations->Out();
+ vixl32::Register out = OutputRegister(load);
+ HLoadString::LoadKind load_kind = load->GetLoadKind();
+
+ switch (load_kind) {
+ case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
+ TODO_VIXL32(FATAL);
+ break;
+ }
+ case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+ DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+ CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
+ codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
+ codegen_->EmitMovwMovtPlaceholder(labels, out);
+ return; // No dex cache slow path.
+ }
+ case HLoadString::LoadKind::kBootImageAddress: {
+ TODO_VIXL32(FATAL);
+ break;
+ }
+ case HLoadString::LoadKind::kBssEntry: {
+ DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+ CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
+ codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_);
+ codegen_->EmitMovwMovtPlaceholder(labels, temp);
+ GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kCompilerReadBarrierOption);
+ LoadStringSlowPathARMVIXL* slow_path =
+ new (GetGraph()->GetArena()) LoadStringSlowPathARMVIXL(load);
+ codegen_->AddSlowPath(slow_path);
+ __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
+ }
+ case HLoadString::LoadKind::kJitTableAddress: {
+ TODO_VIXL32(FATAL);
+ break;
+ }
+ default:
+ break;
+ }
// TODO: Re-add the compiler code to do string dex cache lookup again.
DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod);
@@ -5999,9 +6329,9 @@
Location root,
vixl32::Register obj,
uint32_t offset,
- bool requires_read_barrier) {
+ ReadBarrierOption read_barrier_option) {
vixl32::Register root_reg = RegisterFrom(root);
- if (requires_read_barrier) {
+ if (read_barrier_option == kWithReadBarrier) {
TODO_VIXL32(FATAL);
} else {
// Plain GC root load with no read barrier.
@@ -6062,15 +6392,51 @@
// Check if the desired_dispatch_info is supported. If it is, return it,
// otherwise return a fall-back info that should be used instead.
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
- const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info ATTRIBUTE_UNUSED,
- HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
+ const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+ HInvokeStaticOrDirect* invoke) {
// TODO(VIXL): Implement optimized code paths.
- return {
- HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
- HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
- 0u,
- 0u
- };
+ if (desired_dispatch_info.method_load_kind ==
+ HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup ||
+ desired_dispatch_info.code_ptr_location ==
+ HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup) {
+ return {
+ HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
+ HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+ 0u,
+ 0u
+ };
+ }
+
+ HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
+ // We disable pc-relative load when there is an irreducible loop, as the optimization
+ // is incompatible with it.
+ // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods
+ // with irreducible loops.
+ if (GetGraph()->HasIrreducibleLoops() &&
+ (dispatch_info.method_load_kind ==
+ HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative)) {
+ dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
+ }
+
+ if (dispatch_info.code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) {
+ const DexFile& outer_dex_file = GetGraph()->GetDexFile();
+ if (&outer_dex_file != invoke->GetTargetMethod().dex_file) {
+ // Calls across dex files are more likely to exceed the available BL range,
+ // so use absolute patch with fixup if available and kCallArtMethod otherwise.
+ HInvokeStaticOrDirect::CodePtrLocation code_ptr_location =
+ (desired_dispatch_info.method_load_kind ==
+ HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup)
+ ? HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup
+ : HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+ return HInvokeStaticOrDirect::DispatchInfo {
+ dispatch_info.method_load_kind,
+ code_ptr_location,
+ dispatch_info.method_load_data,
+ 0u
+ };
+ }
+ }
+ return dispatch_info;
}
vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter(
@@ -6101,59 +6467,107 @@
void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
HInvokeStaticOrDirect* invoke, Location temp) {
- Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
- vixl32::Register temp_reg = RegisterFrom(temp);
+ // For better instruction scheduling we load the direct code pointer before the method pointer.
+ switch (invoke->GetCodePtrLocation()) {
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+ // LR = code address from literal pool with link-time patch.
+ TODO_VIXL32(FATAL);
+ break;
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+ // LR = invoke->GetDirectCodePtr();
+ __ Mov(lr, Operand::From(invoke->GetDirectCodePtr()));
+ break;
+ default:
+ break;
+ }
+ Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
switch (invoke->GetMethodLoadKind()) {
case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
uint32_t offset =
GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
// temp = thread->string_init_entrypoint
- GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, tr, offset);
+ GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, offset);
+ break;
+ }
+ case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
+ callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+ break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
+ __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress()));
+ break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+ TODO_VIXL32(FATAL);
+ break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+ HArmDexCacheArraysBase* base =
+ invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase();
+ vixl32::Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, RegisterFrom(temp));
+ int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset();
+ GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), base_reg, offset);
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
vixl32::Register method_reg;
+ vixl32::Register reg = RegisterFrom(temp);
if (current_method.IsRegister()) {
method_reg = RegisterFrom(current_method);
} else {
DCHECK(invoke->GetLocations()->Intrinsified());
DCHECK(!current_method.IsValid());
- method_reg = temp_reg;
- GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, sp, kCurrentMethodStackOffset);
+ method_reg = reg;
+ GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, kCurrentMethodStackOffset);
}
// /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
GetAssembler()->LoadFromOffset(
kLoadWord,
- temp_reg,
+ reg,
method_reg,
ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value());
// temp = temp[index_in_cache];
// Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
uint32_t index_in_cache = invoke->GetDexMethodIndex();
GetAssembler()->LoadFromOffset(
- kLoadWord, temp_reg, temp_reg, CodeGenerator::GetCachePointerOffset(index_in_cache));
+ kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache));
break;
}
- default:
- TODO_VIXL32(FATAL);
}
- // TODO(VIXL): Support `CodePtrLocation` values other than `kCallArtMethod`.
- if (invoke->GetCodePtrLocation() != HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod) {
- TODO_VIXL32(FATAL);
+ switch (invoke->GetCodePtrLocation()) {
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
+ __ Bl(GetFrameEntryLabel());
+ break;
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
+ relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+ invoke->GetTargetMethod().dex_method_index);
+ {
+ AssemblerAccurateScope aas(GetVIXLAssembler(),
+ kArmInstrMaxSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ bind(&relative_call_patches_.back().label);
+ // Arbitrarily branch to the BL itself, override at link time.
+ __ bl(&relative_call_patches_.back().label);
+ }
+ break;
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+ // LR prepared above for better instruction scheduling.
+ // LR()
+ __ Blx(lr);
+ break;
+ case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
+ // LR = callee_method->entry_point_from_quick_compiled_code_
+ GetAssembler()->LoadFromOffset(
+ kLoadWord,
+ lr,
+ RegisterFrom(callee_method),
+ ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
+ // LR()
+ __ Blx(lr);
+ break;
}
- // LR = callee_method->entry_point_from_quick_compiled_code_
- GetAssembler()->LoadFromOffset(
- kLoadWord,
- lr,
- RegisterFrom(callee_method),
- ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
- // LR()
- __ Blx(lr);
-
DCHECK(!IsLeafMethod());
}
@@ -6191,6 +6605,73 @@
__ Blx(lr);
}
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch(
+ const DexFile& dex_file, uint32_t string_index) {
+ return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_);
+}
+
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeTypePatch(
+ const DexFile& dex_file, dex::TypeIndex type_index) {
+ return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_);
+}
+
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeDexCacheArrayPatch(
+ const DexFile& dex_file, uint32_t element_offset) {
+ return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
+}
+
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch(
+ const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) {
+ patches->emplace_back(dex_file, offset_or_index);
+ return &patches->back();
+}
+
+template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
+ const ArenaDeque<PcRelativePatchInfo>& infos,
+ ArenaVector<LinkerPatch>* linker_patches) {
+ for (const PcRelativePatchInfo& info : infos) {
+ const DexFile& dex_file = info.target_dex_file;
+ size_t offset_or_index = info.offset_or_index;
+ DCHECK(info.add_pc_label.IsBound());
+ uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.GetLocation());
+ // Add MOVW patch.
+ DCHECK(info.movw_label.IsBound());
+ uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.GetLocation());
+ linker_patches->push_back(Factory(movw_offset, &dex_file, add_pc_offset, offset_or_index));
+ // Add MOVT patch.
+ DCHECK(info.movt_label.IsBound());
+ uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.GetLocation());
+ linker_patches->push_back(Factory(movt_offset, &dex_file, add_pc_offset, offset_or_index));
+ }
+}
+
+void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
+ DCHECK(linker_patches->empty());
+ size_t size =
+ relative_call_patches_.size() +
+ /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() +
+ /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
+ /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size();
+ linker_patches->reserve(size);
+ for (const PatchInfo<vixl32::Label>& info : relative_call_patches_) {
+ uint32_t literal_offset = info.label.GetLocation();
+ linker_patches->push_back(
+ LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index));
+ }
+ EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
+ linker_patches);
+ if (!GetCompilerOptions().IsBootImage()) {
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+ linker_patches);
+ } else {
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
+ linker_patches);
+ }
+ EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
+ linker_patches);
+}
+
void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
@@ -6315,6 +6796,17 @@
jump_table->EmitTable(codegen_);
}
}
+void LocationsBuilderARMVIXL::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+ vixl32::Register base_reg = OutputRegister(base);
+ CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
+ codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset());
+ codegen_->EmitMovwMovtPlaceholder(labels, base_reg);
+}
// Copy the result of a call into the given target.
void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, Primitive::Type type) {
@@ -6325,7 +6817,7 @@
DCHECK_NE(type, Primitive::kPrimVoid);
- Location return_loc = InvokeDexCallingConventionVisitorARM().GetReturnLocation(type);
+ Location return_loc = InvokeDexCallingConventionVisitorARMVIXL().GetReturnLocation(type);
if (return_loc.Equals(trg)) {
return;
}
@@ -6373,6 +6865,21 @@
}
}
+void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder(
+ CodeGeneratorARMVIXL::PcRelativePatchInfo* labels,
+ vixl32::Register out) {
+ AssemblerAccurateScope aas(GetVIXLAssembler(),
+ kArmInstrMaxSizeInBytes * 3,
+ CodeBufferCheckScope::kMaximumSize);
+ // TODO(VIXL): Think about using mov instead of movw.
+ __ bind(&labels->movw_label);
+ __ movw(out, /* placeholder */ 0u);
+ __ bind(&labels->movt_label);
+ __ movt(out, /* placeholder */ 0u);
+ __ bind(&labels->add_pc_label);
+ __ add(out, out, pc);
+}
+
#undef __
#undef QUICK_ENTRY_POINT
#undef TODO_VIXL32