summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_arm.cc132
-rw-r--r--compiler/optimizing/code_generator_arm64.cc506
-rw-r--r--compiler/optimizing/code_generator_arm64.h13
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc111
-rw-r--r--compiler/optimizing/code_generator_mips.cc58
-rw-r--r--compiler/optimizing/code_generator_mips64.cc62
-rw-r--r--compiler/optimizing/code_generator_x86.cc30
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc39
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc50
-rw-r--r--compiler/optimizing/intrinsics_mips.cc55
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc49
-rw-r--r--compiler/optimizing/stack_map_stream.cc225
-rw-r--r--compiler/optimizing/stack_map_stream.h52
-rw-r--r--compiler/optimizing/stack_map_test.cc94
14 files changed, 1022 insertions, 454 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 20cdae3619..759a951d6b 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -367,22 +367,37 @@ class BoundsCheckSlowPathARM : public SlowPathCodeARM {
class LoadClassSlowPathARM : public SlowPathCodeARM {
public:
- LoadClassSlowPathARM(HLoadClass* cls,
- HInstruction* at,
- uint32_t dex_pc,
- bool do_clinit)
+ LoadClassSlowPathARM(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, bool do_clinit)
: SlowPathCodeARM(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
DCHECK(at->IsLoadClass() || at->IsClinitCheck());
}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
+ Location out = locations->Out();
+ constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier);
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
+ // For HLoadClass/kBssEntry/kSaveEverything, make sure we preserve the address of the entry.
+ DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
+ bool is_load_class_bss_entry =
+ (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry);
+ Register entry_address = kNoRegister;
+ if (is_load_class_bss_entry && call_saves_everything_except_r0) {
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ // In the unlucky case that the `temp` is R0, we preserve the address in `out` across
+ // the kSaveEverything call.
+ bool temp_is_r0 = (temp == calling_convention.GetRegisterAt(0));
+ entry_address = temp_is_r0 ? out.AsRegister<Register>() : temp;
+ DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0));
+ if (temp_is_r0) {
+ __ mov(entry_address, ShifterOperand(temp));
+ }
+ }
dex::TypeIndex type_index = cls_->GetTypeIndex();
__ LoadImmediate(calling_convention.GetRegisterAt(0), type_index.index_);
QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
@@ -394,30 +409,31 @@ class LoadClassSlowPathARM : public SlowPathCodeARM {
CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
}
+ // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
+ if (is_load_class_bss_entry) {
+ if (call_saves_everything_except_r0) {
+ // The class entry address was preserved in `entry_address` thanks to kSaveEverything.
+ __ str(R0, Address(entry_address));
+ } else {
+ // For non-Baker read barrier, we need to re-calculate the address of the string entry.
+ Register temp = IP;
+ CodeGeneratorARM::PcRelativePatchInfo* labels =
+ arm_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
+ __ BindTrackedLabel(&labels->movw_label);
+ __ movw(temp, /* placeholder */ 0u);
+ __ BindTrackedLabel(&labels->movt_label);
+ __ movt(temp, /* placeholder */ 0u);
+ __ BindTrackedLabel(&labels->add_pc_label);
+ __ add(temp, temp, ShifterOperand(PC));
+ __ str(R0, Address(temp));
+ }
+ }
// Move the class to the desired location.
- Location out = locations->Out();
if (out.IsValid()) {
DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
}
RestoreLiveRegisters(codegen, locations);
- // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
- DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
- if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
- DCHECK(out.IsValid());
- // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to
- // kSaveEverything and use a temporary for the .bss entry address in the fast path,
- // so that we can avoid another calculation here.
- CodeGeneratorARM::PcRelativePatchInfo* labels =
- arm_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
- __ BindTrackedLabel(&labels->movw_label);
- __ movw(IP, /* placeholder */ 0u);
- __ BindTrackedLabel(&labels->movt_label);
- __ movt(IP, /* placeholder */ 0u);
- __ BindTrackedLabel(&labels->add_pc_label);
- __ add(IP, IP, ShifterOperand(PC));
- __ str(locations->Out().AsRegister<Register>(), Address(IP));
- }
__ b(GetExitLabel());
}
@@ -441,12 +457,13 @@ class LoadStringSlowPathARM : public SlowPathCodeARM {
explicit LoadStringSlowPathARM(HLoadString* instruction) : SlowPathCodeARM(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ DCHECK(instruction_->IsLoadString());
+ DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
LocationSummary* locations = instruction_->GetLocations();
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
HLoadString* load = instruction_->AsLoadString();
const dex::StringIndex string_index = load->GetStringIndex();
Register out = locations->Out().AsRegister<Register>();
- Register temp = locations->GetTemp(0).AsRegister<Register>();
constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier);
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
@@ -455,12 +472,16 @@ class LoadStringSlowPathARM : public SlowPathCodeARM {
InvokeRuntimeCallingConvention calling_convention;
// In the unlucky case that the `temp` is R0, we preserve the address in `out` across
- // the kSaveEverything call (or use `out` for the address after non-kSaveEverything call).
- bool temp_is_r0 = (temp == calling_convention.GetRegisterAt(0));
- Register entry_address = temp_is_r0 ? out : temp;
- DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0));
- if (call_saves_everything_except_r0 && temp_is_r0) {
- __ mov(entry_address, ShifterOperand(temp));
+ // the kSaveEverything call.
+ Register entry_address = kNoRegister;
+ if (call_saves_everything_except_r0) {
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ bool temp_is_r0 = (temp == calling_convention.GetRegisterAt(0));
+ entry_address = temp_is_r0 ? out : temp;
+ DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0));
+ if (temp_is_r0) {
+ __ mov(entry_address, ShifterOperand(temp));
+ }
}
__ LoadImmediate(calling_convention.GetRegisterAt(0), string_index.index_);
@@ -473,15 +494,16 @@ class LoadStringSlowPathARM : public SlowPathCodeARM {
__ str(R0, Address(entry_address));
} else {
// For non-Baker read barrier, we need to re-calculate the address of the string entry.
+ Register temp = IP;
CodeGeneratorARM::PcRelativePatchInfo* labels =
arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
__ BindTrackedLabel(&labels->movw_label);
- __ movw(entry_address, /* placeholder */ 0u);
+ __ movw(temp, /* placeholder */ 0u);
__ BindTrackedLabel(&labels->movt_label);
- __ movt(entry_address, /* placeholder */ 0u);
+ __ movt(temp, /* placeholder */ 0u);
__ BindTrackedLabel(&labels->add_pc_label);
- __ add(entry_address, entry_address, ShifterOperand(PC));
- __ str(R0, Address(entry_address));
+ __ add(temp, temp, ShifterOperand(PC));
+ __ str(R0, Address(temp));
}
arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
@@ -624,6 +646,10 @@ class ArraySetSlowPathARM : public SlowPathCodeARM {
// probably still be a from-space reference (unless it gets updated by
// another thread, or if another thread installed another object
// reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is for the GcRoot read barrier.
class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
public:
ReadBarrierMarkSlowPathARM(HInstruction* instruction,
@@ -5755,6 +5781,7 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) {
cls,
Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
Location::RegisterLocation(R0));
+ DCHECK_EQ(calling_convention.GetRegisterAt(0), R0);
return;
}
DCHECK(!cls->NeedsAccessCheck());
@@ -5772,6 +5799,22 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) {
locations->SetInAt(0, Location::RequiresRegister());
}
locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadClass::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the type resolution or initialization and marking to save everything we need.
+ // Note that IP may be clobbered by saving/restoring the live register (only one thanks
+ // to the custom calling convention) or by marking, so we request a different temp.
+ locations->AddTemp(Location::RequiresRegister());
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
+ // that the the kPrimNot result register is the same as the first argument register.
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
}
// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -5834,15 +5877,18 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
break;
}
case HLoadClass::LoadKind::kBssEntry: {
+ Register temp = (!kUseReadBarrier || kUseBakerReadBarrier)
+ ? locations->GetTemp(0).AsRegister<Register>()
+ : out;
CodeGeneratorARM::PcRelativePatchInfo* labels =
codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
__ BindTrackedLabel(&labels->movw_label);
- __ movw(out, /* placeholder */ 0u);
+ __ movw(temp, /* placeholder */ 0u);
__ BindTrackedLabel(&labels->movt_label);
- __ movt(out, /* placeholder */ 0u);
+ __ movt(temp, /* placeholder */ 0u);
__ BindTrackedLabel(&labels->add_pc_label);
- __ add(out, out, ShifterOperand(PC));
- GenerateGcRootFieldLoad(cls, out_loc, out, 0, kCompilerReadBarrierOption);
+ __ add(temp, temp, ShifterOperand(PC));
+ GenerateGcRootFieldLoad(cls, out_loc, temp, /* offset */ 0, read_barrier_option);
generate_null_check = true;
break;
}
@@ -5851,7 +5897,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
cls->GetTypeIndex(),
cls->GetClass()));
// /* GcRoot<mirror::Class> */ out = *out
- GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
break;
}
case HLoadClass::LoadKind::kDexCacheViaMethod:
@@ -5938,9 +5984,9 @@ void LocationsBuilderARM::VisitLoadString(HLoadString* load) {
locations->SetOut(Location::RequiresRegister());
if (load_kind == HLoadString::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
- // Rely on the pResolveString and/or marking to save everything, including temps.
- // Note that IP may theoretically be clobbered by saving/restoring the live register
- // (only one thanks to the custom calling convention), so we request a different temp.
+ // Rely on the pResolveString and marking to save everything we need, including temps.
+ // Note that IP may be clobbered by saving/restoring the live register (only one thanks
+ // to the custom calling convention) or by marking, so we request a different temp.
locations->AddTemp(Location::RequiresRegister());
RegisterSet caller_saves = RegisterSet::Empty();
InvokeRuntimeCallingConvention calling_convention;
@@ -5991,7 +6037,9 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) NO_THREAD_S
}
case HLoadString::LoadKind::kBssEntry: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
- Register temp = locations->GetTemp(0).AsRegister<Register>();
+ Register temp = (!kUseReadBarrier || kUseBakerReadBarrier)
+ ? locations->GetTemp(0).AsRegister<Register>()
+ : out;
CodeGeneratorARM::PcRelativePatchInfo* labels =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
__ BindTrackedLabel(&labels->movw_label);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 598be4715b..e6032d2381 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -34,6 +34,9 @@
#include "utils/stack_checks.h"
using namespace vixl::aarch64; // NOLINT(build/namespaces)
+using vixl::ExactAssemblyScope;
+using vixl::CodeBufferCheckScope;
+using vixl::EmissionCheckScope;
#ifdef __
#error "ARM64 Codegen VIXL macro-assembler macro already defined."
@@ -275,15 +278,37 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
LoadClassSlowPathARM64(HLoadClass* cls,
HInstruction* at,
uint32_t dex_pc,
- bool do_clinit)
- : SlowPathCodeARM64(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+ bool do_clinit,
+ vixl::aarch64::Register bss_entry_temp = vixl::aarch64::Register(),
+ vixl::aarch64::Label* bss_entry_adrp_label = nullptr)
+ : SlowPathCodeARM64(at),
+ cls_(cls),
+ dex_pc_(dex_pc),
+ do_clinit_(do_clinit),
+ bss_entry_temp_(bss_entry_temp),
+ bss_entry_adrp_label_(bss_entry_adrp_label) {
DCHECK(at->IsLoadClass() || at->IsClinitCheck());
}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
+ Location out = locations->Out();
+ constexpr bool call_saves_everything_except_r0_ip0 = (!kUseReadBarrier || kUseBakerReadBarrier);
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ // For HLoadClass/kBssEntry/kSaveEverything, make sure we preserve the page address of
+ // the entry which is in a scratch register. Make sure it's not used for saving/restoring
+ // registers. Exclude the scratch register also for non-Baker read barrier for simplicity.
+ DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
+ bool is_load_class_bss_entry =
+ (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry);
+ UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
+ if (is_load_class_bss_entry) {
+ // This temp is a scratch register.
+ DCHECK(bss_entry_temp_.IsValid());
+ temps.Exclude(bss_entry_temp_);
+ }
+
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -300,7 +325,6 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
}
// Move the class to the desired location.
- Location out = locations->Out();
if (out.IsValid()) {
DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
Primitive::Type type = instruction_->GetType();
@@ -308,25 +332,23 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
}
RestoreLiveRegisters(codegen, locations);
// For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
- DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
- if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
+ if (is_load_class_bss_entry) {
DCHECK(out.IsValid());
- UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
- Register temp = temps.AcquireX();
const DexFile& dex_file = cls_->GetDexFile();
- // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to
- // kSaveEverything and use a temporary for the ADRP in the fast path, so that we
- // can avoid the ADRP here.
- vixl::aarch64::Label* adrp_label =
- arm64_codegen->NewBssEntryTypePatch(dex_file, type_index);
- arm64_codegen->EmitAdrpPlaceholder(adrp_label, temp);
+ if (call_saves_everything_except_r0_ip0) {
+ // The class entry page address was preserved in bss_entry_temp_ thanks to kSaveEverything.
+ } else {
+ // For non-Baker read barrier, we need to re-calculate the address of the class entry page.
+ bss_entry_adrp_label_ = arm64_codegen->NewBssEntryTypePatch(dex_file, type_index);
+ arm64_codegen->EmitAdrpPlaceholder(bss_entry_adrp_label_, bss_entry_temp_);
+ }
vixl::aarch64::Label* strp_label =
- arm64_codegen->NewBssEntryTypePatch(dex_file, type_index, adrp_label);
+ arm64_codegen->NewBssEntryTypePatch(dex_file, type_index, bss_entry_adrp_label_);
{
SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler());
__ Bind(strp_label);
__ str(RegisterFrom(locations->Out(), Primitive::kPrimNot),
- MemOperand(temp, /* offset placeholder */ 0));
+ MemOperand(bss_entry_temp_, /* offset placeholder */ 0));
}
}
__ B(GetExitLabel());
@@ -344,6 +366,10 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
// Whether to initialize the class.
const bool do_clinit_;
+ // For HLoadClass/kBssEntry, the temp register and the label of the ADRP where it was loaded.
+ vixl::aarch64::Register bss_entry_temp_;
+ vixl::aarch64::Label* bss_entry_adrp_label_;
+
DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
};
@@ -590,10 +616,9 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
// We are about to use the assembler to place literals directly. Make sure we have enough
// underlying code buffer and we have generated the jump table with right size.
- vixl::CodeBufferCheckScope scope(codegen->GetVIXLAssembler(),
- num_entries * sizeof(int32_t),
- vixl::CodeBufferCheckScope::kReserveBufferSpace,
- vixl::CodeBufferCheckScope::kExactSize);
+ EmissionCheckScope scope(codegen->GetVIXLAssembler(),
+ num_entries * sizeof(int32_t),
+ CodeBufferCheckScope::kExactSize);
__ Bind(&table_start_);
const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
@@ -619,8 +644,10 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
// probably still be a from-space reference (unless it gets updated by
// another thread, or if another thread installed another object
// reference (different from `ref`) in `obj.field`).
-// If entrypoint is a valid location it is assumed to already be holding the entrypoint. The case
-// where the entrypoint is passed in is for the GcRoot read barrier.
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is for the GcRoot read barrier.
class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
public:
ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
@@ -1254,7 +1281,6 @@ void ParallelMoveResolverARM64::EmitMove(size_t index) {
void CodeGeneratorARM64::GenerateFrameEntry() {
MacroAssembler* masm = GetVIXLAssembler();
- BlockPoolsScope block_pools(masm);
__ Bind(&frame_entry_label_);
bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod();
@@ -1263,8 +1289,14 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
Register temp = temps.AcquireX();
DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
__ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
- __ Ldr(wzr, MemOperand(temp, 0));
- RecordPcInfo(nullptr, 0);
+ {
+ // Ensure that between load and RecordPcInfo there are no pools emitted.
+ ExactAssemblyScope eas(GetVIXLAssembler(),
+ kInstructionSize,
+ CodeBufferCheckScope::kExactSize);
+ __ ldr(wzr, MemOperand(temp, 0));
+ RecordPcInfo(nullptr, 0);
+ }
}
if (!HasEmptyFrame()) {
@@ -1299,7 +1331,6 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
}
void CodeGeneratorARM64::GenerateFrameExit() {
- BlockPoolsScope block_pools(GetVIXLAssembler());
GetAssembler()->cfi().RememberState();
if (!HasEmptyFrame()) {
int frame_size = GetFrameSize();
@@ -1626,7 +1657,6 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
const MemOperand& src,
bool needs_null_check) {
MacroAssembler* masm = GetVIXLAssembler();
- BlockPoolsScope block_pools(masm);
UseScratchRegisterScope temps(masm);
Register temp_base = temps.AcquireX();
Primitive::Type type = instruction->GetType();
@@ -1636,58 +1666,79 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
// TODO(vixl): Let the MacroAssembler handle MemOperand.
__ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
- MemOperand base = MemOperand(temp_base);
- switch (type) {
- case Primitive::kPrimBoolean:
- __ Ldarb(Register(dst), base);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- break;
- case Primitive::kPrimByte:
- __ Ldarb(Register(dst), base);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
- break;
- case Primitive::kPrimChar:
- __ Ldarh(Register(dst), base);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- break;
- case Primitive::kPrimShort:
- __ Ldarh(Register(dst), base);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
- break;
- case Primitive::kPrimInt:
- case Primitive::kPrimNot:
- case Primitive::kPrimLong:
- DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
- __ Ldar(Register(dst), base);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble: {
- DCHECK(dst.IsFPRegister());
- DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
-
- Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
- __ Ldar(temp, base);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
+ {
+ // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
+ MemOperand base = MemOperand(temp_base);
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ {
+ ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
+ __ ldarb(Register(dst), base);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ }
+ break;
+ case Primitive::kPrimByte:
+ {
+ ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
+ __ ldarb(Register(dst), base);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ }
+ __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
+ break;
+ case Primitive::kPrimChar:
+ {
+ ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
+ __ ldarh(Register(dst), base);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ }
+ break;
+ case Primitive::kPrimShort:
+ {
+ ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
+ __ ldarh(Register(dst), base);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ }
+ __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
+ break;
+ case Primitive::kPrimInt:
+ case Primitive::kPrimNot:
+ case Primitive::kPrimLong:
+ DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
+ {
+ ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
+ __ ldar(Register(dst), base);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ }
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble: {
+ DCHECK(dst.IsFPRegister());
+ DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
+
+ Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
+ {
+ ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
+ __ ldar(temp, base);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ }
+ __ Fmov(FPRegister(dst), temp);
+ break;
}
- __ Fmov(FPRegister(dst), temp);
- break;
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unreachable type " << type;
}
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << type;
}
}
@@ -1716,9 +1767,12 @@ void CodeGeneratorARM64::Store(Primitive::Type type,
}
}
-void CodeGeneratorARM64::StoreRelease(Primitive::Type type,
+void CodeGeneratorARM64::StoreRelease(HInstruction* instruction,
+ Primitive::Type type,
CPURegister src,
- const MemOperand& dst) {
+ const MemOperand& dst,
+ bool needs_null_check) {
+ MacroAssembler* masm = GetVIXLAssembler();
UseScratchRegisterScope temps(GetVIXLAssembler());
Register temp_base = temps.AcquireX();
@@ -1729,20 +1783,39 @@ void CodeGeneratorARM64::StoreRelease(Primitive::Type type,
Operand op = OperandFromMemOperand(dst);
__ Add(temp_base, dst.GetBaseRegister(), op);
MemOperand base = MemOperand(temp_base);
+ // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- __ Stlrb(Register(src), base);
+ {
+ ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
+ __ stlrb(Register(src), base);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ }
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- __ Stlrh(Register(src), base);
+ {
+ ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
+ __ stlrh(Register(src), base);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ }
break;
case Primitive::kPrimInt:
case Primitive::kPrimNot:
case Primitive::kPrimLong:
DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type));
- __ Stlr(Register(src), base);
+ {
+ ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
+ __ stlr(Register(src), base);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ }
break;
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
@@ -1756,8 +1829,13 @@ void CodeGeneratorARM64::StoreRelease(Primitive::Type type,
temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
__ Fmov(temp_src, FPRegister(src));
}
-
- __ Stlr(temp_src, base);
+ {
+ ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
+ __ stlr(temp_src, base);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ }
break;
}
case Primitive::kPrimVoid:
@@ -1770,9 +1848,15 @@ void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
uint32_t dex_pc,
SlowPathCode* slow_path) {
ValidateInvokeRuntime(entrypoint, instruction, slow_path);
- GenerateInvokeRuntime(GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value());
- if (EntrypointRequiresStackMap(entrypoint)) {
- RecordPcInfo(instruction, dex_pc, slow_path);
+
+ __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value()));
+ {
+ // Ensure the pc position is recorded immediately after the `blr` instruction.
+ ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
+ __ blr(lr);
+ if (EntrypointRequiresStackMap(entrypoint)) {
+ RecordPcInfo(instruction, dex_pc, slow_path);
+ }
}
}
@@ -1780,11 +1864,6 @@ void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point
HInstruction* instruction,
SlowPathCode* slow_path) {
ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
- GenerateInvokeRuntime(entry_point_offset);
-}
-
-void CodeGeneratorARM64::GenerateInvokeRuntime(int32_t entry_point_offset) {
- BlockPoolsScope block_pools(GetVIXLAssembler());
__ Ldr(lr, MemOperand(tr, entry_point_offset));
__ Blr(lr);
}
@@ -1951,7 +2030,6 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
Location out = locations->Out();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
Primitive::Type field_type = field_info.GetFieldType();
- BlockPoolsScope block_pools(GetVIXLAssembler());
MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
@@ -1978,6 +2056,8 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
codegen_->LoadAcquire(
instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
} else {
+ // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
codegen_->Load(field_type, OutputCPURegister(instruction), field);
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
@@ -2007,7 +2087,6 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
bool value_can_be_null) {
DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
- BlockPoolsScope block_pools(GetVIXLAssembler());
Register obj = InputRegisterAt(instruction, 0);
CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
@@ -2029,9 +2108,11 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
}
if (field_info.IsVolatile()) {
- codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ codegen_->StoreRelease(
+ instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check */ true);
} else {
+ // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
codegen_->Store(field_type, source, HeapOperand(obj, offset));
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
@@ -2317,10 +2398,7 @@ void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate*
masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize;
if (prev->IsLoadOrStore()) {
// Make sure we emit only exactly one nop.
- vixl::CodeBufferCheckScope scope(masm,
- kInstructionSize,
- vixl::CodeBufferCheckScope::kReserveBufferSpace,
- vixl::CodeBufferCheckScope::kExactSize);
+ ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
__ nop();
}
}
@@ -2376,8 +2454,6 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
instruction->IsStringCharAt();
MacroAssembler* masm = GetVIXLAssembler();
UseScratchRegisterScope temps(masm);
- // Block pools between `Load` and `MaybeRecordImplicitNullCheck`.
- BlockPoolsScope block_pools(masm);
// The read barrier instrumentation of object ArrayGet instructions
// does not support the HIntermediateAddress instruction.
@@ -2399,15 +2475,21 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
if (maybe_compressed_char_at) {
uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
length = temps.AcquireW();
- if (instruction->GetArray()->IsIntermediateAddress()) {
- DCHECK_LT(count_offset, offset);
- int64_t adjusted_offset = static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
- // Note that `adjusted_offset` is negative, so this will be a LDUR.
- __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
- } else {
- __ Ldr(length, HeapOperand(obj, count_offset));
+ {
+ // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+
+ if (instruction->GetArray()->IsIntermediateAddress()) {
+ DCHECK_LT(count_offset, offset);
+ int64_t adjusted_offset =
+ static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
+ // Note that `adjusted_offset` is negative, so this will be a LDUR.
+ __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
+ } else {
+ __ Ldr(length, HeapOperand(obj, count_offset));
+ }
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
}
if (index.IsConstant()) {
if (maybe_compressed_char_at) {
@@ -2457,6 +2539,8 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
}
}
if (!maybe_compressed_char_at) {
+ // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
codegen_->Load(type, OutputCPURegister(instruction), source);
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
@@ -2484,9 +2568,12 @@ void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
vixl::aarch64::Register out = OutputRegister(instruction);
- BlockPoolsScope block_pools(GetVIXLAssembler());
- __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ {
+ // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+ __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
// Mask out compression flag from String's array length.
if (mirror::kUseStringCompression && instruction->IsStringLength()) {
__ Lsr(out.W(), out.W(), 1u);
@@ -2527,7 +2614,6 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
MemOperand destination = HeapOperand(array);
MacroAssembler* masm = GetVIXLAssembler();
- BlockPoolsScope block_pools(masm);
if (!needs_write_barrier) {
DCHECK(!may_need_runtime_call_for_type_check);
@@ -2554,8 +2640,12 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
LSL,
Primitive::ComponentSizeShift(value_type));
}
- codegen_->Store(value_type, value, destination);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ {
+ // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+ codegen_->Store(value_type, value, destination);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
} else {
DCHECK(!instruction->GetArray()->IsIntermediateAddress());
vixl::aarch64::Label done;
@@ -2588,8 +2678,13 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
if (!index.IsConstant()) {
__ Add(temp, array, offset);
}
- __ Str(wzr, destination);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ {
+ // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools
+ // emitted.
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+ __ Str(wzr, destination);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
__ B(&done);
__ Bind(&non_zero);
}
@@ -2604,8 +2699,12 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
Register temp2 = temps.AcquireSameSizeAs(array);
// /* HeapReference<Class> */ temp = array->klass_
- __ Ldr(temp, HeapOperand(array, class_offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ {
+ // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+ __ Ldr(temp, HeapOperand(array, class_offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
GetAssembler()->MaybeUnpoisonHeapReference(temp);
// /* HeapReference<Class> */ temp = temp->component_type_
@@ -2646,10 +2745,14 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
if (!index.IsConstant()) {
__ Add(temp, array, offset);
}
- __ Str(source, destination);
+ {
+ // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+ __ Str(source, destination);
- if (!may_need_runtime_call_for_type_check) {
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ if (!may_need_runtime_call_for_type_check) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
}
}
@@ -3944,19 +4047,25 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok
// art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
MacroAssembler* masm = GetVIXLAssembler();
UseScratchRegisterScope scratch_scope(masm);
- BlockPoolsScope block_pools(masm);
scratch_scope.Exclude(ip1);
__ Mov(ip1, invoke->GetDexMethodIndex());
+ // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
if (receiver.IsStackSlot()) {
__ Ldr(temp.W(), StackOperandFrom(receiver));
- // /* HeapReference<Class> */ temp = temp->klass_
- __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
+ {
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+ // /* HeapReference<Class> */ temp = temp->klass_
+ __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
+ codegen_->MaybeRecordImplicitNullCheck(invoke);
+ }
} else {
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
// /* HeapReference<Class> */ temp = receiver->klass_
__ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
+ codegen_->MaybeRecordImplicitNullCheck(invoke);
}
- codegen_->MaybeRecordImplicitNullCheck(invoke);
+
// Instead of simply (possibly) unpoisoning `temp` here, we should
// emit a read barrier for the previous class reference load.
// However this is not required in practice, as this is an
@@ -3973,10 +4082,16 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok
__ Ldr(temp, MemOperand(temp, method_offset));
// lr = temp->GetEntryPoint();
__ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
- // lr();
- __ Blr(lr);
- DCHECK(!codegen_->IsLeafMethod());
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+
+ {
+ // Ensure the pc position is recorded immediately after the `blr` instruction.
+ ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
+
+ // lr();
+ __ blr(lr);
+ DCHECK(!codegen_->IsLeafMethod());
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+ }
}
void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
@@ -4088,8 +4203,16 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok
__ Ldr(lr, MemOperand(
XRegisterFrom(callee_method),
ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
- // lr()
- __ Blr(lr);
+ {
+ // To ensure that the pc position is recorded immediately after the `blr` instruction
+ // BLR must be the last instruction emitted in this function.
+ // Recording the pc will occur right after returning from this function.
+ ExactAssemblyScope eas(GetVIXLAssembler(),
+ kInstructionSize,
+ CodeBufferCheckScope::kExactSize);
+ // lr()
+ __ blr(lr);
+ }
break;
}
@@ -4109,12 +4232,15 @@ void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location te
Offset class_offset = mirror::Object::ClassOffset();
Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
- BlockPoolsScope block_pools(GetVIXLAssembler());
-
DCHECK(receiver.IsRegister());
- // /* HeapReference<Class> */ temp = receiver->klass_
- __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
- MaybeRecordImplicitNullCheck(invoke);
+
+ {
+ // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+ // /* HeapReference<Class> */ temp = receiver->klass_
+ __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
+ MaybeRecordImplicitNullCheck(invoke);
+ }
// Instead of simply (possibly) unpoisoning `temp` here, we should
// emit a read barrier for the previous class reference load.
// intermediate/temporary reference and because the current
@@ -4126,8 +4252,14 @@ void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location te
__ Ldr(temp, MemOperand(temp, method_offset));
// lr = temp->GetEntryPoint();
__ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
- // lr();
- __ Blr(lr);
+ {
+ // To ensure that the pc position is recorded immediately after the `blr` instruction
+ // BLR should be the last instruction emitted in this function.
+ // Recording the pc will occur right after returning from this function.
+ ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
+ // lr();
+ __ blr(lr);
+ }
}
void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
@@ -4340,7 +4472,9 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir
return;
}
- BlockPoolsScope block_pools(GetVIXLAssembler());
+ // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
+ // are no pools emitted.
+ EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
LocationSummary* locations = invoke->GetLocations();
codegen_->GenerateStaticOrDirectCall(
invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
@@ -4352,6 +4486,9 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
return;
}
+ // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
+ // are no pools emitted.
+ EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
DCHECK(!codegen_->IsLeafMethod());
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -4393,6 +4530,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
cls,
LocationFrom(calling_convention.GetRegisterAt(0)),
LocationFrom(vixl::aarch64::x0));
+ DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0));
return;
}
DCHECK(!cls->NeedsAccessCheck());
@@ -4410,6 +4548,22 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
locations->SetInAt(0, Location::RequiresRegister());
}
locations->SetOut(Location::RequiresRegister());
+ if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the type resolution or initialization and marking to save everything we need.
+ // Note that IP0 may be clobbered by saving/restoring the live register (only one thanks
+ // to the custom calling convention) or by marking, so we shall use IP1.
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
+ DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
+ RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot),
+ Primitive::kPrimNot).GetCode());
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
}
// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -4424,6 +4578,8 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
Location out_loc = cls->GetLocations()->Out();
Register out = OutputRegister(cls);
+ Register bss_entry_temp;
+ vixl::aarch64::Label* bss_entry_adrp_label = nullptr;
const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
? kWithoutReadBarrier
@@ -4473,18 +4629,23 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
// Add ADRP with its PC-relative Class .bss entry patch.
const DexFile& dex_file = cls->GetDexFile();
dex::TypeIndex type_index = cls->GetTypeIndex();
- vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
- codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
+ // We can go to slow path even with non-zero reference and in that case marking
+ // can clobber IP0, so we need to use IP1 which shall be preserved.
+ bss_entry_temp = ip1;
+ UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+ temps.Exclude(bss_entry_temp);
+ bss_entry_adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
+ codegen_->EmitAdrpPlaceholder(bss_entry_adrp_label, bss_entry_temp);
// Add LDR with its PC-relative Class patch.
vixl::aarch64::Label* ldr_label =
- codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label);
+ codegen_->NewBssEntryTypePatch(dex_file, type_index, bss_entry_adrp_label);
// /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */
GenerateGcRootFieldLoad(cls,
- cls->GetLocations()->Out(),
- out.X(),
- /* placeholder */ 0u,
+ out_loc,
+ bss_entry_temp,
+ /* offset placeholder */ 0u,
ldr_label,
- kCompilerReadBarrierOption);
+ read_barrier_option);
generate_null_check = true;
break;
}
@@ -4497,7 +4658,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
out.X(),
/* offset */ 0,
/* fixup_label */ nullptr,
- kCompilerReadBarrierOption);
+ read_barrier_option);
break;
}
case HLoadClass::LoadKind::kDexCacheViaMethod:
@@ -4506,10 +4667,11 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
UNREACHABLE();
}
- if (generate_null_check || cls->MustGenerateClinitCheck()) {
+ bool do_clinit = cls->MustGenerateClinitCheck();
+ if (generate_null_check || do_clinit) {
DCHECK(cls->CanCallRuntime());
SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
- cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+ cls, cls, cls->GetDexPc(), do_clinit, bss_entry_temp, bss_entry_adrp_label);
codegen_->AddSlowPath(slow_path);
if (generate_null_check) {
__ Cbz(out, slow_path->GetEntryLabel());
@@ -4577,7 +4739,9 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
locations->SetOut(Location::RequiresRegister());
if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
- // Rely on the pResolveString and/or marking to save everything, including temps.
+ // Rely on the pResolveString and marking to save everything we need.
+ // Note that IP0 may be clobbered by saving/restoring the live register (only one thanks
+ // to the custom calling convention) or by marking, so we shall use IP1.
RegisterSet caller_saves = RegisterSet::Empty();
InvokeRuntimeCallingConvention calling_convention;
caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
@@ -4628,8 +4792,11 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
const DexFile& dex_file = load->GetDexFile();
const dex::StringIndex string_index = load->GetStringIndex();
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ // We could use IP0 as the marking shall not clobber IP0 if the reference is null and
+ // that's when we need the slow path. But let's not rely on such details and use IP1.
+ Register temp = ip1;
UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
- Register temp = temps.AcquireX();
+ temps.Exclude(temp);
vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
codegen_->EmitAdrpPlaceholder(adrp_label, temp);
// Add LDR with its PC-relative String patch.
@@ -4817,8 +4984,15 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction)
MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
__ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString)));
__ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value()));
- __ Blr(lr);
- codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+
+ {
+ // Ensure the pc position is recorded immediately after the `blr` instruction.
+ ExactAssemblyScope eas(GetVIXLAssembler(),
+ kInstructionSize,
+ CodeBufferCheckScope::kExactSize);
+ __ blr(lr);
+ codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+ }
} else {
codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
@@ -4862,11 +5036,13 @@ void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
if (CanMoveNullCheckToUser(instruction)) {
return;
}
-
- BlockPoolsScope block_pools(GetVIXLAssembler());
- Location obj = instruction->GetLocations()->InAt(0);
- __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
- RecordPcInfo(instruction, instruction->GetDexPc());
+ {
+ // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+ Location obj = instruction->GetLocations()->InAt(0);
+ __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
+ RecordPcInfo(instruction, instruction->GetDexPc());
+ }
}
void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
@@ -5603,10 +5779,14 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
DCHECK(obj.IsW());
uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- // /* int32_t */ monitor = obj->monitor_
- __ Ldr(temp, HeapOperand(obj, monitor_offset));
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
+ {
+ // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
+ EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+ // /* int32_t */ monitor = obj->monitor_
+ __ Ldr(temp, HeapOperand(obj, monitor_offset));
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
}
// /* LockWord */ lock_word = LockWord(monitor)
static_assert(sizeof(LockWord) == sizeof(int32_t),
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index f6cb90a63a..5faf29a90f 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -43,6 +43,11 @@ class CodeGeneratorARM64;
// Use a local definition to prevent copying mistakes.
static constexpr size_t kArm64WordSize = static_cast<size_t>(kArm64PointerSize);
+// These constants are used as an approximate margin when emission of veneer and literal pools
+// must be blocked.
+static constexpr int kMaxMacroInstructionSizeInBytes = 15 * vixl::aarch64::kInstructionSize;
+static constexpr int kInvokeCodeMarginSizeInBytes = 6 * kMaxMacroInstructionSizeInBytes;
+
static const vixl::aarch64::Register kParameterCoreRegisters[] = {
vixl::aarch64::x1,
vixl::aarch64::x2,
@@ -486,9 +491,11 @@ class CodeGeneratorARM64 : public CodeGenerator {
vixl::aarch64::CPURegister dst,
const vixl::aarch64::MemOperand& src,
bool needs_null_check);
- void StoreRelease(Primitive::Type type,
+ void StoreRelease(HInstruction* instruction,
+ Primitive::Type type,
vixl::aarch64::CPURegister src,
- const vixl::aarch64::MemOperand& dst);
+ const vixl::aarch64::MemOperand& dst,
+ bool needs_null_check);
// Generate code to invoke a runtime entry point.
void InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -502,8 +509,6 @@ class CodeGeneratorARM64 : public CodeGenerator {
HInstruction* instruction,
SlowPathCode* slow_path);
- void GenerateInvokeRuntime(int32_t entry_point_offset);
-
ParallelMoveResolverARM64* GetMoveResolver() OVERRIDE { return &move_resolver_; }
bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index e18960872e..5c4ca5bc17 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -400,12 +400,30 @@ class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
+ Location out = locations->Out();
+ constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier);
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ // For HLoadClass/kBssEntry/kSaveEverything, make sure we preserve the address of the entry.
+ DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
+ bool is_load_class_bss_entry =
+ (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry);
+ vixl32::Register entry_address;
+ if (is_load_class_bss_entry && call_saves_everything_except_r0) {
+ vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+ // In the unlucky case that the `temp` is R0, we preserve the address in `out` across
+ // the kSaveEverything call.
+ bool temp_is_r0 = temp.Is(calling_convention.GetRegisterAt(0));
+ entry_address = temp_is_r0 ? RegisterFrom(out) : temp;
+ DCHECK(!entry_address.Is(calling_convention.GetRegisterAt(0)));
+ if (temp_is_r0) {
+ __ Mov(entry_address, temp);
+ }
+ }
dex::TypeIndex type_index = cls_->GetTypeIndex();
__ Mov(calling_convention.GetRegisterAt(0), type_index.index_);
QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
@@ -417,27 +435,28 @@ class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL {
CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
}
+ // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
+ if (is_load_class_bss_entry) {
+ if (call_saves_everything_except_r0) {
+ // The class entry address was preserved in `entry_address` thanks to kSaveEverything.
+ __ Str(r0, MemOperand(entry_address));
+ } else {
+ // For non-Baker read barrier, we need to re-calculate the address of the string entry.
+ UseScratchRegisterScope temps(
+ down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
+ arm_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
+ arm_codegen->EmitMovwMovtPlaceholder(labels, temp);
+ __ Str(r0, MemOperand(temp));
+ }
+ }
// Move the class to the desired location.
- Location out = locations->Out();
if (out.IsValid()) {
DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
arm_codegen->Move32(locations->Out(), LocationFrom(r0));
}
RestoreLiveRegisters(codegen, locations);
- // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
- DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
- if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
- DCHECK(out.IsValid());
- // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to
- // kSaveEverything and use a temporary for the .bss entry address in the fast path,
- // so that we can avoid another calculation here.
- UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
- vixl32::Register temp = temps.Acquire();
- CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
- arm_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
- arm_codegen->EmitMovwMovtPlaceholder(labels, temp);
- __ Str(OutputRegister(cls_), MemOperand(temp));
- }
__ B(GetExitLabel());
}
@@ -462,12 +481,13 @@ class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL {
: SlowPathCodeARMVIXL(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ DCHECK(instruction_->IsLoadString());
+ DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
LocationSummary* locations = instruction_->GetLocations();
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
HLoadString* load = instruction_->AsLoadString();
const dex::StringIndex string_index = load->GetStringIndex();
vixl32::Register out = OutputRegister(load);
- vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier);
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
@@ -476,12 +496,16 @@ class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL {
InvokeRuntimeCallingConventionARMVIXL calling_convention;
// In the unlucky case that the `temp` is R0, we preserve the address in `out` across
- // the kSaveEverything call (or use `out` for the address after non-kSaveEverything call).
- bool temp_is_r0 = (temp.Is(calling_convention.GetRegisterAt(0)));
- vixl32::Register entry_address = temp_is_r0 ? out : temp;
- DCHECK(!entry_address.Is(calling_convention.GetRegisterAt(0)));
- if (call_saves_everything_except_r0 && temp_is_r0) {
- __ Mov(entry_address, temp);
+ // the kSaveEverything call.
+ vixl32::Register entry_address;
+ if (call_saves_everything_except_r0) {
+ vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+ bool temp_is_r0 = (temp.Is(calling_convention.GetRegisterAt(0)));
+ entry_address = temp_is_r0 ? out : temp;
+ DCHECK(!entry_address.Is(calling_convention.GetRegisterAt(0)));
+ if (temp_is_r0) {
+ __ Mov(entry_address, temp);
+ }
}
__ Mov(calling_convention.GetRegisterAt(0), string_index.index_);
@@ -494,10 +518,13 @@ class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL {
__ Str(r0, MemOperand(entry_address));
} else {
// For non-Baker read barrier, we need to re-calculate the address of the string entry.
+ UseScratchRegisterScope temps(
+ down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
- arm_codegen->EmitMovwMovtPlaceholder(labels, out);
- __ Str(r0, MemOperand(entry_address));
+ arm_codegen->EmitMovwMovtPlaceholder(labels, temp);
+ __ Str(r0, MemOperand(temp));
}
arm_codegen->Move32(locations->Out(), LocationFrom(r0));
@@ -5832,6 +5859,7 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
cls,
LocationFrom(calling_convention.GetRegisterAt(0)),
LocationFrom(r0));
+ DCHECK(calling_convention.GetRegisterAt(0).Is(r0));
return;
}
DCHECK(!cls->NeedsAccessCheck());
@@ -5849,6 +5877,22 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
locations->SetInAt(0, Location::RequiresRegister());
}
locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadClass::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the type resolution or initialization and marking to save everything we need.
+ // Note that IP may be clobbered by saving/restoring the live register (only one thanks
+ // to the custom calling convention) or by marking, so we request a different temp.
+ locations->AddTemp(Location::RequiresRegister());
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
+ // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
+ // that the the kPrimNot result register is the same as the first argument register.
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
}
// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -5906,10 +5950,13 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
break;
}
case HLoadClass::LoadKind::kBssEntry: {
+ vixl32::Register temp = (!kUseReadBarrier || kUseBakerReadBarrier)
+ ? RegisterFrom(locations->GetTemp(0))
+ : out;
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
- codegen_->EmitMovwMovtPlaceholder(labels, out);
- GenerateGcRootFieldLoad(cls, out_loc, out, 0, kCompilerReadBarrierOption);
+ codegen_->EmitMovwMovtPlaceholder(labels, temp);
+ GenerateGcRootFieldLoad(cls, out_loc, temp, /* offset */ 0, read_barrier_option);
generate_null_check = true;
break;
}
@@ -5918,7 +5965,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
cls->GetTypeIndex(),
cls->GetClass()));
// /* GcRoot<mirror::Class> */ out = *out
- GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
break;
}
case HLoadClass::LoadKind::kDexCacheViaMethod:
@@ -6012,9 +6059,9 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
locations->SetOut(Location::RequiresRegister());
if (load_kind == HLoadString::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
- // Rely on the pResolveString and/or marking to save everything, including temps.
- // Note that IP may theoretically be clobbered by saving/restoring the live register
- // (only one thanks to the custom calling convention), so we request a different temp.
+ // Rely on the pResolveString and marking to save everything we need, including temps.
+ // Note that IP may be clobbered by saving/restoring the live register (only one thanks
+ // to the custom calling convention) or by marking, so we request a different temp.
locations->AddTemp(Location::RequiresRegister());
RegisterSet caller_saves = RegisterSet::Empty();
InvokeRuntimeCallingConventionARMVIXL calling_convention;
@@ -6059,7 +6106,9 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
}
case HLoadString::LoadKind::kBssEntry: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
- vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+ vixl32::Register temp = (!kUseReadBarrier || kUseBakerReadBarrier)
+ ? RegisterFrom(locations->GetTemp(0))
+ : out;
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
codegen_->EmitMovwMovtPlaceholder(labels, temp);
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 0677dad078..c9dde7cc55 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1914,6 +1914,8 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
auto null_checker = GetImplicitNullChecker(instruction);
Primitive::Type type = instruction->GetType();
+ const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
+ instruction->IsStringCharAt();
switch (type) {
case Primitive::kPrimBoolean: {
Register out = locations->Out().AsRegister<Register>();
@@ -1957,14 +1959,54 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimChar: {
Register out = locations->Out().AsRegister<Register>();
+ if (maybe_compressed_char_at) {
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ __ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker);
+ __ Sll(TMP, TMP, 31); // Extract compression flag into the most significant bit of TMP.
+ static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+ "Expecting 0=compressed, 1=uncompressed");
+ }
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset, null_checker);
+ int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+ if (maybe_compressed_char_at) {
+ MipsLabel uncompressed_load, done;
+ __ Bnez(TMP, &uncompressed_load);
+ __ LoadFromOffset(kLoadUnsignedByte,
+ out,
+ obj,
+ data_offset + (const_index << TIMES_1));
+ __ B(&done);
+ __ Bind(&uncompressed_load);
+ __ LoadFromOffset(kLoadUnsignedHalfword,
+ out,
+ obj,
+ data_offset + (const_index << TIMES_2));
+ __ Bind(&done);
+ } else {
+ __ LoadFromOffset(kLoadUnsignedHalfword,
+ out,
+ obj,
+ data_offset + (const_index << TIMES_2),
+ null_checker);
+ }
} else {
- __ Sll(TMP, index.AsRegister<Register>(), TIMES_2);
- __ Addu(TMP, obj, TMP);
- __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker);
+ Register index_reg = index.AsRegister<Register>();
+ if (maybe_compressed_char_at) {
+ MipsLabel uncompressed_load, done;
+ __ Bnez(TMP, &uncompressed_load);
+ __ Addu(TMP, obj, index_reg);
+ __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset);
+ __ B(&done);
+ __ Bind(&uncompressed_load);
+ __ Sll(TMP, index_reg, TIMES_2);
+ __ Addu(TMP, obj, TMP);
+ __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset);
+ __ Bind(&done);
+ } else {
+ __ Sll(TMP, index_reg, TIMES_2);
+ __ Addu(TMP, obj, TMP);
+ __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker);
+ }
}
break;
}
@@ -2046,6 +2088,10 @@ void InstructionCodeGeneratorMIPS::VisitArrayLength(HArrayLength* instruction) {
Register out = locations->Out().AsRegister<Register>();
__ LoadFromOffset(kLoadWord, out, obj, offset);
codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Mask out compression flag from String's array length.
+ if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+ __ Srl(out, out, 1u);
+ }
}
Location LocationsBuilderMIPS::RegisterOrZeroConstant(HInstruction* instruction) {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 4c8dabfede..5be0da4011 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1490,6 +1490,8 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
Primitive::Type type = instruction->GetType();
+ const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
+ instruction->IsStringCharAt();
switch (type) {
case Primitive::kPrimBoolean: {
GpuRegister out = locations->Out().AsRegister<GpuRegister>();
@@ -1533,14 +1535,54 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimChar: {
GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ if (maybe_compressed_char_at) {
+ uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ __ LoadFromOffset(kLoadWord, TMP, obj, count_offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ Dext(TMP, TMP, 0, 1);
+ static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+ "Expecting 0=compressed, 1=uncompressed");
+ }
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset);
+ int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+ if (maybe_compressed_char_at) {
+ Mips64Label uncompressed_load, done;
+ __ Bnezc(TMP, &uncompressed_load);
+ __ LoadFromOffset(kLoadUnsignedByte,
+ out,
+ obj,
+ data_offset + (const_index << TIMES_1));
+ __ Bc(&done);
+ __ Bind(&uncompressed_load);
+ __ LoadFromOffset(kLoadUnsignedHalfword,
+ out,
+ obj,
+ data_offset + (const_index << TIMES_2));
+ __ Bind(&done);
+ } else {
+ __ LoadFromOffset(kLoadUnsignedHalfword,
+ out,
+ obj,
+ data_offset + (const_index << TIMES_2));
+ }
} else {
- __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2);
- __ Daddu(TMP, obj, TMP);
- __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset);
+ GpuRegister index_reg = index.AsRegister<GpuRegister>();
+ if (maybe_compressed_char_at) {
+ Mips64Label uncompressed_load, done;
+ __ Bnezc(TMP, &uncompressed_load);
+ __ Daddu(TMP, obj, index_reg);
+ __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset);
+ __ Bc(&done);
+ __ Bind(&uncompressed_load);
+ __ Dsll(TMP, index_reg, TIMES_2);
+ __ Daddu(TMP, obj, TMP);
+ __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset);
+ __ Bind(&done);
+ } else {
+ __ Dsll(TMP, index_reg, TIMES_2);
+ __ Daddu(TMP, obj, TMP);
+ __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset);
+ }
}
break;
}
@@ -1608,7 +1650,9 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ if (!maybe_compressed_char_at) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
}
void LocationsBuilderMIPS64::VisitArrayLength(HArrayLength* instruction) {
@@ -1624,6 +1668,10 @@ void InstructionCodeGeneratorMIPS64::VisitArrayLength(HArrayLength* instruction)
GpuRegister out = locations->Out().AsRegister<GpuRegister>();
__ LoadFromOffset(kLoadWord, out, obj, offset);
codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Mask out compression flag from String's array length.
+ if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+ __ Srl(out, out, 1u);
+ }
}
void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 137b55423b..09612c8dbf 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -6057,6 +6057,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
cls,
Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
Location::RegisterLocation(EAX));
+ DCHECK_EQ(calling_convention.GetRegisterAt(0), EAX);
return;
}
DCHECK(!cls->NeedsAccessCheck());
@@ -6076,6 +6077,17 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
locations->SetInAt(0, Location::RequiresRegister());
}
locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadClass::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the type resolution and/or initialization to save everything.
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
}
Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
@@ -6158,7 +6170,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
Label* fixup_label = codegen_->NewJitRootClassPatch(
cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
// /* GcRoot<mirror::Class> */ out = *address
- GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, kCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
break;
}
case HLoadClass::LoadKind::kDexCacheViaMethod:
@@ -6250,7 +6262,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
locations->SetOut(Location::RequiresRegister());
if (load_kind == HLoadString::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
- // Rely on the pResolveString and/or marking to save everything.
+ // Rely on the pResolveString to save everything.
RegisterSet caller_saves = RegisterSet::Empty();
InvokeRuntimeCallingConvention calling_convention;
caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
@@ -7136,9 +7148,10 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used:
//
- // root = *address;
- // if (Thread::Current()->GetIsGcMarking()) {
- // root = ReadBarrier::Mark(root)
+ // root = obj.field;
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp != null) {
+ // root = temp(root)
// }
// /* GcRoot<mirror::Object> */ root = *address
@@ -7159,8 +7172,11 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
instruction, root, /* unpoison_ref_before_marking */ false);
codegen_->AddSlowPath(slow_path);
- __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>().Int32Value()),
- Immediate(0));
+ // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
+ __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
+ // The entrypoint is null when the GC is not marking.
__ j(kNotEqual, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
} else {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index c5367ce86e..0879992e32 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -245,9 +245,8 @@ class LoadClassSlowPathX86_64 : public SlowPathCode {
SaveLiveRegisters(codegen, locations);
- InvokeRuntimeCallingConvention calling_convention;
- __ movl(CpuRegister(calling_convention.GetRegisterAt(0)),
- Immediate(cls_->GetTypeIndex().index_));
+ // Custom calling convention: RAX serves as both input and output.
+ __ movl(CpuRegister(RAX), Immediate(cls_->GetTypeIndex().index_));
x86_64_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType,
instruction_,
dex_pc_,
@@ -5456,10 +5455,10 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
- InvokeRuntimeCallingConvention calling_convention;
+ // Custom calling convention: RAX serves as both input and output.
CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
cls,
- Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Location::RegisterLocation(RAX),
Location::RegisterLocation(RAX));
return;
}
@@ -5478,6 +5477,17 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
locations->SetInAt(0, Location::RequiresRegister());
}
locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadClass::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the type resolution and/or initialization to save everything.
+ // Custom calling convention: RAX serves as both input and output.
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(RAX));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
}
Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
@@ -5553,7 +5563,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
Label* fixup_label =
codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
// /* GcRoot<mirror::Class> */ out = *address
- GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, kCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
break;
}
default:
@@ -5629,7 +5639,7 @@ void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
locations->SetOut(Location::RequiresRegister());
if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
- // Rely on the pResolveString and/or marking to save everything.
+ // Rely on the pResolveString to save everything.
// Custom calling convention: RAX serves as both input and output.
RegisterSet caller_saves = RegisterSet::Empty();
caller_saves.Add(Location::RegisterLocation(RAX));
@@ -6501,9 +6511,10 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used:
//
- // root = *address;
- // if (Thread::Current()->GetIsGcMarking()) {
- // root = ReadBarrier::Mark(root)
+ // root = obj.field;
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp != null) {
+ // root = temp(root)
// }
// /* GcRoot<mirror::Object> */ root = *address
@@ -6524,9 +6535,11 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
instruction, root, /* unpoison_ref_before_marking */ false);
codegen_->AddSlowPath(slow_path);
- __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>().Int32Value(),
- /* no_rip */ true),
- Immediate(0));
+ // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
+ __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip */ true), Immediate(0));
+ // The entrypoint is null when the GC is not marking.
__ j(kNotEqual, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
} else {
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index b107280bd5..86e54294ae 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -115,13 +115,18 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
MoveArguments(invoke_, codegen);
- if (invoke_->IsInvokeStaticOrDirect()) {
- codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
- LocationFrom(kArtMethodRegister));
- } else {
- codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister));
+ {
+ // Ensure that between the BLR (emitted by Generate*Call) and RecordPcInfo there
+ // are no pools emitted.
+ vixl::EmissionCheckScope guard(codegen->GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
+ if (invoke_->IsInvokeStaticOrDirect()) {
+ codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
+ LocationFrom(kArtMethodRegister));
+ } else {
+ codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister));
+ }
+ codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
}
- codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
// Copy the result back to the expected output.
Location out = invoke_->GetLocations()->Out();
@@ -980,11 +985,12 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke)
CreateIntIntIntIntToVoid(arena_, invoke);
}
-static void GenUnsafePut(LocationSummary* locations,
+static void GenUnsafePut(HInvoke* invoke,
Primitive::Type type,
bool is_volatile,
bool is_ordered,
CodeGeneratorARM64* codegen) {
+ LocationSummary* locations = invoke->GetLocations();
MacroAssembler* masm = codegen->GetVIXLAssembler();
Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
@@ -1007,7 +1013,7 @@ static void GenUnsafePut(LocationSummary* locations,
}
if (is_volatile || is_ordered) {
- codegen->StoreRelease(type, source, mem_op);
+ codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check */ false);
} else {
codegen->Store(type, source, mem_op);
}
@@ -1020,63 +1026,63 @@ static void GenUnsafePut(LocationSummary* locations,
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(),
+ GenUnsafePut(invoke,
Primitive::kPrimInt,
/* is_volatile */ false,
/* is_ordered */ false,
codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(),
+ GenUnsafePut(invoke,
Primitive::kPrimInt,
/* is_volatile */ false,
/* is_ordered */ true,
codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(),
+ GenUnsafePut(invoke,
Primitive::kPrimInt,
/* is_volatile */ true,
/* is_ordered */ false,
codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(),
+ GenUnsafePut(invoke,
Primitive::kPrimNot,
/* is_volatile */ false,
/* is_ordered */ false,
codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(),
+ GenUnsafePut(invoke,
Primitive::kPrimNot,
/* is_volatile */ false,
/* is_ordered */ true,
codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(),
+ GenUnsafePut(invoke,
Primitive::kPrimNot,
/* is_volatile */ true,
/* is_ordered */ false,
codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(),
+ GenUnsafePut(invoke,
Primitive::kPrimLong,
/* is_volatile */ false,
/* is_ordered */ false,
codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(),
+ GenUnsafePut(invoke,
Primitive::kPrimLong,
/* is_volatile */ false,
/* is_ordered */ true,
codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(),
+ GenUnsafePut(invoke,
Primitive::kPrimLong,
/* is_volatile */ true,
/* is_ordered */ false,
@@ -2907,9 +2913,13 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
}
__ Cbnz(temp0, slow_path->GetEntryLabel());
- // Fast path.
- __ Ldr(out, HeapOperand(obj, mirror::Reference::ReferentOffset().Int32Value()));
- codegen_->MaybeRecordImplicitNullCheck(invoke);
+ {
+ // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
+ vixl::EmissionCheckScope guard(codegen_->GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
+ // Fast path.
+ __ Ldr(out, HeapOperand(obj, mirror::Reference::ReferentOffset().Int32Value()));
+ codegen_->MaybeRecordImplicitNullCheck(invoke);
+ }
codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
__ Bind(slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 6cf9b83d44..64a68403e9 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2004,31 +2004,48 @@ void IntrinsicCodeGeneratorMIPS::VisitStringEquals(HInvoke* invoke) {
__ Lw(temp2, arg, class_offset);
__ Bne(temp1, temp2, &return_false);
- // Load lengths of this and argument strings.
+ // Load `count` fields of this and argument strings.
__ Lw(temp1, str, count_offset);
__ Lw(temp2, arg, count_offset);
- // Check if lengths are equal, return false if they're not.
+ // Check if `count` fields are equal, return false if they're not.
+ // Also compares the compression style, if differs return false.
__ Bne(temp1, temp2, &return_false);
- // Return true if both strings are empty.
+ // Return true if both strings are empty. Even with string compression `count == 0` means empty.
+ static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+ "Expecting 0=compressed, 1=uncompressed");
__ Beqz(temp1, &return_true);
// Don't overwrite input registers
__ Move(TMP, str);
__ Move(temp3, arg);
- // Assertions that must hold in order to compare strings 2 characters at a time.
+ // Assertions that must hold in order to compare strings 4 bytes at a time.
DCHECK_ALIGNED(value_offset, 4);
static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
- // Loop to compare strings 2 characters at a time starting at the beginning of the string.
- // Ok to do this because strings are zero-padded.
+ // For string compression, calculate the number of bytes to compare (not chars).
+ if (mirror::kUseStringCompression) {
+ // Extract compression flag.
+ if (IsR2OrNewer()) {
+ __ Ext(temp2, temp1, 0, 1);
+ } else {
+ __ Sll(temp2, temp1, 31);
+ __ Srl(temp2, temp2, 31);
+ }
+ __ Srl(temp1, temp1, 1); // Extract length.
+ __ Sllv(temp1, temp1, temp2); // Double the byte count if uncompressed.
+ }
+
+ // Loop to compare strings 4 bytes at a time starting at the beginning of the string.
+ // Ok to do this because strings are zero-padded to kObjectAlignment.
__ Bind(&loop);
__ Lw(out, TMP, value_offset);
__ Lw(temp2, temp3, value_offset);
__ Bne(out, temp2, &return_false);
__ Addiu(TMP, TMP, 4);
__ Addiu(temp3, temp3, 4);
- __ Addiu(temp1, temp1, -2);
+ // With string compression, we have compared 4 bytes, otherwise 2 chars.
+ __ Addiu(temp1, temp1, mirror::kUseStringCompression ? -4 : -2);
__ Bgtz(temp1, &loop);
// Return true and exit the function.
@@ -2578,6 +2595,30 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ Addu(dstPtr, dstPtr, AT);
}
+ if (mirror::kUseStringCompression) {
+ MipsLabel uncompressed_copy, compressed_loop;
+ const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ // Load count field and extract compression flag.
+ __ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset);
+ __ Sll(TMP, TMP, 31);
+
+ // If string is uncompressed, use memcpy() path.
+ __ Bnez(TMP, &uncompressed_copy);
+
+ // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
+ __ Addu(srcPtr, srcObj, srcBegin);
+ __ Bind(&compressed_loop);
+ __ LoadFromOffset(kLoadUnsignedByte, TMP, srcPtr, value_offset);
+ __ StoreToOffset(kStoreHalfword, TMP, dstPtr, 0);
+ __ Addiu(numChrs, numChrs, -1);
+ __ Addiu(srcPtr, srcPtr, 1);
+ __ Addiu(dstPtr, dstPtr, 2);
+ __ Bnez(numChrs, &compressed_loop);
+
+ __ B(&done);
+ __ Bind(&uncompressed_copy);
+ }
+
// Calculate source address.
__ Addiu(srcPtr, srcObj, value_offset);
if (IsR6()) {
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 00a1fa11bb..3888828722 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1607,31 +1607,42 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) {
__ Lw(temp2, arg, class_offset);
__ Bnec(temp1, temp2, &return_false);
- // Load lengths of this and argument strings.
+ // Load `count` fields of this and argument strings.
__ Lw(temp1, str, count_offset);
__ Lw(temp2, arg, count_offset);
- // Check if lengths are equal, return false if they're not.
+ // Check if `count` fields are equal, return false if they're not.
+ // Also compares the compression style, if differs return false.
__ Bnec(temp1, temp2, &return_false);
- // Return true if both strings are empty.
+ // Return true if both strings are empty. Even with string compression `count == 0` means empty.
+ static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+ "Expecting 0=compressed, 1=uncompressed");
__ Beqzc(temp1, &return_true);
// Don't overwrite input registers
__ Move(TMP, str);
__ Move(temp3, arg);
- // Assertions that must hold in order to compare strings 4 characters at a time.
+ // Assertions that must hold in order to compare strings 8 bytes at a time.
DCHECK_ALIGNED(value_offset, 8);
static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
- // Loop to compare strings 4 characters at a time starting at the beginning of the string.
- // Ok to do this because strings are zero-padded to be 8-byte aligned.
+ if (mirror::kUseStringCompression) {
+ // For string compression, calculate the number of bytes to compare (not chars).
+ __ Dext(temp2, temp1, 0, 1); // Extract compression flag.
+ __ Srl(temp1, temp1, 1); // Extract length.
+ __ Sllv(temp1, temp1, temp2); // Double the byte count if uncompressed.
+ }
+
+ // Loop to compare strings 8 bytes at a time starting at the beginning of the string.
+ // Ok to do this because strings are zero-padded to kObjectAlignment.
__ Bind(&loop);
__ Ld(out, TMP, value_offset);
__ Ld(temp2, temp3, value_offset);
__ Bnec(out, temp2, &return_false);
__ Daddiu(TMP, TMP, 8);
__ Daddiu(temp3, temp3, 8);
- __ Addiu(temp1, temp1, -4);
+ // With string compression, we have compared 8 bytes, otherwise 4 chars.
+ __ Addiu(temp1, temp1, mirror::kUseStringCompression ? -8 : -4);
__ Bgtzc(temp1, &loop);
// Return true and exit the function.
@@ -1912,6 +1923,30 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ Daddiu(dstPtr, dstObj, data_offset);
__ Dlsa(dstPtr, dstBegin, dstPtr, char_shift);
+ if (mirror::kUseStringCompression) {
+ Mips64Label uncompressed_copy, compressed_loop;
+ const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ // Load count field and extract compression flag.
+ __ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset);
+ __ Dext(TMP, TMP, 0, 1);
+
+ // If string is uncompressed, use memcpy() path.
+ __ Bnezc(TMP, &uncompressed_copy);
+
+ // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
+ __ Daddu(srcPtr, srcObj, srcBegin);
+ __ Bind(&compressed_loop);
+ __ LoadFromOffset(kLoadUnsignedByte, TMP, srcPtr, value_offset);
+ __ StoreToOffset(kStoreHalfword, TMP, dstPtr, 0);
+ __ Daddiu(numChrs, numChrs, -1);
+ __ Daddiu(srcPtr, srcPtr, 1);
+ __ Daddiu(dstPtr, dstPtr, 2);
+ __ Bnezc(numChrs, &compressed_loop);
+
+ __ Bc(&done);
+ __ Bind(&uncompressed_copy);
+ }
+
// Calculate source address.
__ Daddiu(srcPtr, srcObj, value_offset);
__ Dlsa(srcPtr, srcBegin, srcPtr, char_shift);
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index f8e01b7537..1bcc8e1ace 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -38,19 +38,14 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc,
current_entry_.native_pc_code_offset = CodeOffset::FromOffset(native_pc_offset, instruction_set_);
current_entry_.register_mask = register_mask;
current_entry_.sp_mask = sp_mask;
- current_entry_.num_dex_registers = num_dex_registers;
current_entry_.inlining_depth = inlining_depth;
- current_entry_.dex_register_locations_start_index = dex_register_locations_.size();
current_entry_.inline_infos_start_index = inline_infos_.size();
- current_entry_.dex_register_map_hash = 0;
- current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound;
current_entry_.stack_mask_index = 0;
- if (num_dex_registers != 0) {
- current_entry_.live_dex_registers_mask =
- ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream);
- } else {
- current_entry_.live_dex_registers_mask = nullptr;
- }
+ current_entry_.dex_register_entry.num_dex_registers = num_dex_registers;
+ current_entry_.dex_register_entry.locations_start_index = dex_register_locations_.size();
+ current_entry_.dex_register_entry.live_dex_registers_mask = (num_dex_registers != 0)
+ ? ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream)
+ : nullptr;
if (sp_mask != nullptr) {
stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet());
@@ -65,7 +60,7 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc,
}
void StackMapStream::EndStackMapEntry() {
- current_entry_.same_dex_register_map_as_ = FindEntryWithTheSameDexMap();
+ current_entry_.dex_register_map_index = AddDexRegisterMapEntry(current_entry_.dex_register_entry);
stack_maps_.push_back(current_entry_);
current_entry_ = StackMapEntry();
}
@@ -91,19 +86,15 @@ void StackMapStream::AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t
dex_register_locations_.push_back(index);
location_catalog_entries_indices_.Insert(std::make_pair(location, index));
}
-
- if (in_inline_frame_) {
- // TODO: Support sharing DexRegisterMap across InlineInfo.
- DCHECK_LT(current_dex_register_, current_inline_info_.num_dex_registers);
- current_inline_info_.live_dex_registers_mask->SetBit(current_dex_register_);
- } else {
- DCHECK_LT(current_dex_register_, current_entry_.num_dex_registers);
- current_entry_.live_dex_registers_mask->SetBit(current_dex_register_);
- current_entry_.dex_register_map_hash += (1 <<
- (current_dex_register_ % (sizeof(current_entry_.dex_register_map_hash) * kBitsPerByte)));
- current_entry_.dex_register_map_hash += static_cast<uint32_t>(value);
- current_entry_.dex_register_map_hash += static_cast<uint32_t>(kind);
- }
+ DexRegisterMapEntry* const entry = in_inline_frame_
+ ? &current_inline_info_.dex_register_entry
+ : &current_entry_.dex_register_entry;
+ DCHECK_LT(current_dex_register_, entry->num_dex_registers);
+ entry->live_dex_registers_mask->SetBit(current_dex_register_);
+ entry->hash += (1 <<
+ (current_dex_register_ % (sizeof(DexRegisterMapEntry::hash) * kBitsPerByte)));
+ entry->hash += static_cast<uint32_t>(value);
+ entry->hash += static_cast<uint32_t>(kind);
}
current_dex_register_++;
}
@@ -124,20 +115,19 @@ void StackMapStream::BeginInlineInfoEntry(ArtMethod* method,
current_inline_info_.method_index = method->GetDexMethodIndexUnchecked();
}
current_inline_info_.dex_pc = dex_pc;
- current_inline_info_.num_dex_registers = num_dex_registers;
- current_inline_info_.dex_register_locations_start_index = dex_register_locations_.size();
- if (num_dex_registers != 0) {
- current_inline_info_.live_dex_registers_mask =
- ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream);
- } else {
- current_inline_info_.live_dex_registers_mask = nullptr;
- }
+ current_inline_info_.dex_register_entry.num_dex_registers = num_dex_registers;
+ current_inline_info_.dex_register_entry.locations_start_index = dex_register_locations_.size();
+ current_inline_info_.dex_register_entry.live_dex_registers_mask = (num_dex_registers != 0)
+ ? ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream)
+ : nullptr;
current_dex_register_ = 0;
}
void StackMapStream::EndInlineInfoEntry() {
+ current_inline_info_.dex_register_map_index =
+ AddDexRegisterMapEntry(current_inline_info_.dex_register_entry);
DCHECK(in_inline_frame_);
- DCHECK_EQ(current_dex_register_, current_inline_info_.num_dex_registers)
+ DCHECK_EQ(current_dex_register_, current_inline_info_.dex_register_entry.num_dex_registers)
<< "Inline information contains less registers than expected";
in_inline_frame_ = false;
inline_infos_.push_back(current_inline_info_);
@@ -193,8 +183,7 @@ size_t StackMapStream::ComputeDexRegisterLocationCatalogSize() const {
return size;
}
-size_t StackMapStream::ComputeDexRegisterMapSize(uint32_t num_dex_registers,
- const BitVector* live_dex_registers_mask) const {
+size_t StackMapStream::DexRegisterMapEntry::ComputeSize(size_t catalog_size) const {
// For num_dex_registers == 0u live_dex_registers_mask may be null.
if (num_dex_registers == 0u) {
return 0u; // No register map will be emitted.
@@ -208,8 +197,7 @@ size_t StackMapStream::ComputeDexRegisterMapSize(uint32_t num_dex_registers,
// Compute the size of the set of live Dex register entries.
size_t number_of_live_dex_registers = live_dex_registers_mask->NumSetBits();
size_t map_entries_size_in_bits =
- DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.size())
- * number_of_live_dex_registers;
+ DexRegisterMap::SingleEntrySizeInBits(catalog_size) * number_of_live_dex_registers;
size_t map_entries_size_in_bytes =
RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte;
size += map_entries_size_in_bytes;
@@ -218,18 +206,8 @@ size_t StackMapStream::ComputeDexRegisterMapSize(uint32_t num_dex_registers,
size_t StackMapStream::ComputeDexRegisterMapsSize() const {
size_t size = 0;
- size_t inline_info_index = 0;
- for (const StackMapEntry& entry : stack_maps_) {
- if (entry.same_dex_register_map_as_ == kNoSameDexMapFound) {
- size += ComputeDexRegisterMapSize(entry.num_dex_registers, entry.live_dex_registers_mask);
- } else {
- // Entries with the same dex map will have the same offset.
- }
- for (size_t j = 0; j < entry.inlining_depth; ++j) {
- InlineInfoEntry inline_entry = inline_infos_[inline_info_index++];
- size += ComputeDexRegisterMapSize(inline_entry.num_dex_registers,
- inline_entry.live_dex_registers_mask);
- }
+ for (const DexRegisterMapEntry& entry : dex_register_entries_) {
+ size += entry.ComputeSize(location_catalog_entries_.size());
}
return size;
}
@@ -264,6 +242,30 @@ void StackMapStream::ComputeInlineInfoEncoding(InlineInfoEncoding* encoding,
encoding->SetFromSizes(method_index_max, dex_pc_max, extra_data_max, dex_register_maps_bytes);
}
+size_t StackMapStream::MaybeCopyDexRegisterMap(DexRegisterMapEntry& entry,
+ size_t* current_offset,
+ MemoryRegion dex_register_locations_region) {
+ DCHECK(current_offset != nullptr);
+ if ((entry.num_dex_registers == 0) || (entry.live_dex_registers_mask->NumSetBits() == 0)) {
+ // No dex register map needed.
+ return StackMap::kNoDexRegisterMap;
+ }
+ if (entry.offset == DexRegisterMapEntry::kOffsetUnassigned) {
+ // Not already copied, need to copy and and assign an offset.
+ entry.offset = *current_offset;
+ const size_t entry_size = entry.ComputeSize(location_catalog_entries_.size());
+ DexRegisterMap dex_register_map(
+ dex_register_locations_region.Subregion(entry.offset, entry_size));
+ *current_offset += entry_size;
+ // Fill in the map since it was just added.
+ FillInDexRegisterMap(dex_register_map,
+ entry.num_dex_registers,
+ *entry.live_dex_registers_mask,
+ entry.locations_start_index);
+ }
+ return entry.offset;
+}
+
void StackMapStream::FillIn(MemoryRegion region) {
DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry";
DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before FillIn";
@@ -311,35 +313,10 @@ void StackMapStream::FillIn(MemoryRegion region) {
stack_map.SetRegisterMaskIndex(encoding.stack_map.encoding, entry.register_mask_index);
stack_map.SetStackMaskIndex(encoding.stack_map.encoding, entry.stack_mask_index);
- if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) {
- // No dex map available.
- stack_map.SetDexRegisterMapOffset(encoding.stack_map.encoding, StackMap::kNoDexRegisterMap);
- } else {
- // Search for an entry with the same dex map.
- if (entry.same_dex_register_map_as_ != kNoSameDexMapFound) {
- // If we have a hit reuse the offset.
- stack_map.SetDexRegisterMapOffset(
- encoding.stack_map.encoding,
- code_info.GetStackMapAt(entry.same_dex_register_map_as_, encoding)
- .GetDexRegisterMapOffset(encoding.stack_map.encoding));
- } else {
- // New dex registers maps should be added to the stack map.
- MemoryRegion register_region = dex_register_locations_region.Subregion(
- next_dex_register_map_offset,
- ComputeDexRegisterMapSize(entry.num_dex_registers, entry.live_dex_registers_mask));
- next_dex_register_map_offset += register_region.size();
- DexRegisterMap dex_register_map(register_region);
- stack_map.SetDexRegisterMapOffset(
- encoding.stack_map.encoding,
- register_region.begin() - dex_register_locations_region.begin());
-
- // Set the dex register location.
- FillInDexRegisterMap(dex_register_map,
- entry.num_dex_registers,
- *entry.live_dex_registers_mask,
- entry.dex_register_locations_start_index);
- }
- }
+ size_t offset = MaybeCopyDexRegisterMap(dex_register_entries_[entry.dex_register_map_index],
+ &next_dex_register_map_offset,
+ dex_register_locations_region);
+ stack_map.SetDexRegisterMapOffset(encoding.stack_map.encoding, offset);
// Set the inlining info.
if (entry.inlining_depth != 0) {
@@ -371,29 +348,13 @@ void StackMapStream::FillIn(MemoryRegion region) {
inline_info.SetExtraDataAtDepth(encoding.inline_info.encoding, depth, 1);
}
inline_info.SetDexPcAtDepth(encoding.inline_info.encoding, depth, inline_entry.dex_pc);
- if (inline_entry.num_dex_registers == 0) {
- // No dex map available.
- inline_info.SetDexRegisterMapOffsetAtDepth(encoding.inline_info.encoding,
- depth,
- StackMap::kNoDexRegisterMap);
- DCHECK(inline_entry.live_dex_registers_mask == nullptr);
- } else {
- MemoryRegion register_region = dex_register_locations_region.Subregion(
- next_dex_register_map_offset,
- ComputeDexRegisterMapSize(inline_entry.num_dex_registers,
- inline_entry.live_dex_registers_mask));
- next_dex_register_map_offset += register_region.size();
- DexRegisterMap dex_register_map(register_region);
- inline_info.SetDexRegisterMapOffsetAtDepth(
- encoding.inline_info.encoding,
- depth,
- register_region.begin() - dex_register_locations_region.begin());
-
- FillInDexRegisterMap(dex_register_map,
- inline_entry.num_dex_registers,
- *inline_entry.live_dex_registers_mask,
- inline_entry.dex_register_locations_start_index);
- }
+ size_t dex_register_map_offset = MaybeCopyDexRegisterMap(
+ dex_register_entries_[inline_entry.dex_register_map_index],
+ &next_dex_register_map_offset,
+ dex_register_locations_region);
+ inline_info.SetDexRegisterMapOffsetAtDepth(encoding.inline_info.encoding,
+ depth,
+ dex_register_map_offset);
}
} else if (encoding.stack_map.encoding.GetInlineInfoEncoding().BitSize() > 0) {
stack_map.SetInlineInfoIndex(encoding.stack_map.encoding, StackMap::kNoInlineInfo);
@@ -448,34 +409,31 @@ void StackMapStream::FillInDexRegisterMap(DexRegisterMap dex_register_map,
}
}
-size_t StackMapStream::FindEntryWithTheSameDexMap() {
- size_t current_entry_index = stack_maps_.size();
- auto entries_it = dex_map_hash_to_stack_map_indices_.find(current_entry_.dex_register_map_hash);
+size_t StackMapStream::AddDexRegisterMapEntry(const DexRegisterMapEntry& entry) {
+ const size_t current_entry_index = dex_register_entries_.size();
+ auto entries_it = dex_map_hash_to_stack_map_indices_.find(entry.hash);
if (entries_it == dex_map_hash_to_stack_map_indices_.end()) {
// We don't have a perfect hash functions so we need a list to collect all stack maps
// which might have the same dex register map.
ArenaVector<uint32_t> stack_map_indices(allocator_->Adapter(kArenaAllocStackMapStream));
stack_map_indices.push_back(current_entry_index);
- dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash,
- std::move(stack_map_indices));
- return kNoSameDexMapFound;
- }
-
- // We might have collisions, so we need to check whether or not we really have a match.
- for (uint32_t test_entry_index : entries_it->second) {
- if (HaveTheSameDexMaps(GetStackMap(test_entry_index), current_entry_)) {
- return test_entry_index;
+ dex_map_hash_to_stack_map_indices_.Put(entry.hash, std::move(stack_map_indices));
+ } else {
+ // We might have collisions, so we need to check whether or not we really have a match.
+ for (uint32_t test_entry_index : entries_it->second) {
+ if (DexRegisterMapEntryEquals(dex_register_entries_[test_entry_index], entry)) {
+ return test_entry_index;
+ }
}
+ entries_it->second.push_back(current_entry_index);
}
- entries_it->second.push_back(current_entry_index);
- return kNoSameDexMapFound;
+ dex_register_entries_.push_back(entry);
+ return current_entry_index;
}
-bool StackMapStream::HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const {
- if (a.live_dex_registers_mask == nullptr && b.live_dex_registers_mask == nullptr) {
- return true;
- }
- if (a.live_dex_registers_mask == nullptr || b.live_dex_registers_mask == nullptr) {
+bool StackMapStream::DexRegisterMapEntryEquals(const DexRegisterMapEntry& a,
+ const DexRegisterMapEntry& b) const {
+ if ((a.live_dex_registers_mask == nullptr) != (b.live_dex_registers_mask == nullptr)) {
return false;
}
if (a.num_dex_registers != b.num_dex_registers) {
@@ -489,12 +447,12 @@ bool StackMapStream::HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEn
}
size_t number_of_live_dex_registers = a.live_dex_registers_mask->NumSetBits();
DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size());
- DCHECK_LE(a.dex_register_locations_start_index,
+ DCHECK_LE(a.locations_start_index,
dex_register_locations_.size() - number_of_live_dex_registers);
- DCHECK_LE(b.dex_register_locations_start_index,
+ DCHECK_LE(b.locations_start_index,
dex_register_locations_.size() - number_of_live_dex_registers);
- auto a_begin = dex_register_locations_.begin() + a.dex_register_locations_start_index;
- auto b_begin = dex_register_locations_.begin() + b.dex_register_locations_start_index;
+ auto a_begin = dex_register_locations_.begin() + a.locations_start_index;
+ auto b_begin = dex_register_locations_.begin() + b.locations_start_index;
if (!std::equal(a_begin, a_begin + number_of_live_dex_registers, b_begin)) {
return false;
}
@@ -597,10 +555,10 @@ void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
CheckDexRegisterMap(code_info,
code_info.GetDexRegisterMapOf(
- stack_map, encoding, entry.num_dex_registers),
- entry.num_dex_registers,
- entry.live_dex_registers_mask,
- entry.dex_register_locations_start_index);
+ stack_map, encoding, entry.dex_register_entry.num_dex_registers),
+ entry.dex_register_entry.num_dex_registers,
+ entry.dex_register_entry.live_dex_registers_mask,
+ entry.dex_register_entry.locations_start_index);
// Check inline info.
DCHECK_EQ(stack_map.HasInlineInfo(stack_map_encoding), (entry.inlining_depth != 0));
@@ -623,10 +581,13 @@ void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
CheckDexRegisterMap(code_info,
code_info.GetDexRegisterMapAtDepth(
- d, inline_info, encoding, inline_entry.num_dex_registers),
- inline_entry.num_dex_registers,
- inline_entry.live_dex_registers_mask,
- inline_entry.dex_register_locations_start_index);
+ d,
+ inline_info,
+ encoding,
+ inline_entry.dex_register_entry.num_dex_registers),
+ inline_entry.dex_register_entry.num_dex_registers,
+ inline_entry.dex_register_entry.live_dex_registers_mask,
+ inline_entry.dex_register_entry.locations_start_index);
}
}
}
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 08c1d3e3c0..bba3d51e62 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -70,6 +70,7 @@ class StackMapStream : public ValueObject {
inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
register_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
+ dex_register_entries_(allocator->Adapter(kArenaAllocStackMapStream)),
stack_mask_max_(-1),
dex_pc_max_(0),
register_mask_max_(0),
@@ -89,30 +90,42 @@ class StackMapStream : public ValueObject {
code_info_encoding_.reserve(16);
}
+ // A dex register map entry for a single stack map entry, contains what registers are live as
+ // well as indices into the location catalog.
+ class DexRegisterMapEntry {
+ public:
+ static const size_t kOffsetUnassigned = -1;
+
+ BitVector* live_dex_registers_mask;
+ uint32_t num_dex_registers;
+ size_t locations_start_index;
+ // Computed fields
+ size_t hash = 0;
+ size_t offset = kOffsetUnassigned;
+
+ size_t ComputeSize(size_t catalog_size) const;
+ };
+
// See runtime/stack_map.h to know what these fields contain.
struct StackMapEntry {
uint32_t dex_pc;
CodeOffset native_pc_code_offset;
uint32_t register_mask;
BitVector* sp_mask;
- uint32_t num_dex_registers;
uint8_t inlining_depth;
- size_t dex_register_locations_start_index;
size_t inline_infos_start_index;
- BitVector* live_dex_registers_mask;
- uint32_t dex_register_map_hash;
- size_t same_dex_register_map_as_;
uint32_t stack_mask_index;
uint32_t register_mask_index;
+ DexRegisterMapEntry dex_register_entry;
+ size_t dex_register_map_index;
};
struct InlineInfoEntry {
uint32_t dex_pc; // DexFile::kDexNoIndex for intrinsified native methods.
ArtMethod* method;
uint32_t method_index;
- uint32_t num_dex_registers;
- BitVector* live_dex_registers_mask;
- size_t dex_register_locations_start_index;
+ DexRegisterMapEntry dex_register_entry;
+ size_t dex_register_map_index;
};
void BeginStackMapEntry(uint32_t dex_pc,
@@ -140,7 +153,8 @@ class StackMapStream : public ValueObject {
}
void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) {
- stack_maps_[i].native_pc_code_offset = CodeOffset::FromOffset(native_pc_offset, instruction_set_);
+ stack_maps_[i].native_pc_code_offset =
+ CodeOffset::FromOffset(native_pc_offset, instruction_set_);
}
// Prepares the stream to fill in a memory region. Must be called before FillIn.
@@ -150,8 +164,6 @@ class StackMapStream : public ValueObject {
private:
size_t ComputeDexRegisterLocationCatalogSize() const;
- size_t ComputeDexRegisterMapSize(uint32_t num_dex_registers,
- const BitVector* live_dex_registers_mask) const;
size_t ComputeDexRegisterMapsSize() const;
void ComputeInlineInfoEncoding(InlineInfoEncoding* encoding,
size_t dex_register_maps_bytes);
@@ -164,15 +176,24 @@ class StackMapStream : public ValueObject {
// Returns the number of unique register masks.
size_t PrepareRegisterMasks();
- // Returns the index of an entry with the same dex register map as the current_entry,
- // or kNoSameDexMapFound if no such entry exists.
- size_t FindEntryWithTheSameDexMap();
- bool HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const;
+ // Deduplicate entry if possible and return the corresponding index into dex_register_entries_
+ // array. If entry is not a duplicate, a new entry is added to dex_register_entries_.
+ size_t AddDexRegisterMapEntry(const DexRegisterMapEntry& entry);
+
+ // Return true if the two dex register map entries are equal.
+ bool DexRegisterMapEntryEquals(const DexRegisterMapEntry& a, const DexRegisterMapEntry& b) const;
+
+ // Fill in the corresponding entries of a register map.
void FillInDexRegisterMap(DexRegisterMap dex_register_map,
uint32_t num_dex_registers,
const BitVector& live_dex_registers_mask,
uint32_t start_index_in_dex_register_locations) const;
+ // Returns the offset for the dex register inside of the dex register location region. See FillIn.
+ // Only copies the dex register map if the offset for the entry is not already assigned.
+ size_t MaybeCopyDexRegisterMap(DexRegisterMapEntry& entry,
+ size_t* current_offset,
+ MemoryRegion dex_register_locations_region);
void CheckDexRegisterMap(const CodeInfo& code_info,
const DexRegisterMap& dex_register_map,
size_t num_dex_registers,
@@ -199,6 +220,7 @@ class StackMapStream : public ValueObject {
ArenaVector<InlineInfoEntry> inline_infos_;
ArenaVector<uint8_t> stack_masks_;
ArenaVector<uint32_t> register_masks_;
+ ArenaVector<DexRegisterMapEntry> dex_register_entries_;
int stack_mask_max_;
uint32_t dex_pc_max_;
uint32_t register_mask_max_;
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index bd0aa6dea7..041695187b 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -410,6 +410,100 @@ TEST(StackMapTest, Test2) {
}
}
+TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) {
+ ArenaPool pool;
+ ArenaAllocator arena(&pool);
+ StackMapStream stream(&arena, kRuntimeISA);
+ ArtMethod art_method;
+
+ ArenaBitVector sp_mask1(&arena, 0, true);
+ sp_mask1.SetBit(2);
+ sp_mask1.SetBit(4);
+ const size_t number_of_dex_registers = 2;
+ const size_t number_of_dex_registers_in_inline_info = 2;
+ stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 1);
+ stream.AddDexRegisterEntry(Kind::kInStack, 0); // Short location.
+ stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location.
+ stream.BeginInlineInfoEntry(&art_method, 3, number_of_dex_registers_in_inline_info);
+ stream.AddDexRegisterEntry(Kind::kInStack, 0); // Short location.
+ stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location.
+ stream.EndInlineInfoEntry();
+ stream.EndStackMapEntry();
+
+ size_t size = stream.PrepareForFillIn();
+ void* memory = arena.Alloc(size, kArenaAllocMisc);
+ MemoryRegion region(memory, size);
+ stream.FillIn(region);
+
+ CodeInfo code_info(region);
+ CodeInfoEncoding encoding = code_info.ExtractEncoding();
+ ASSERT_EQ(1u, code_info.GetNumberOfStackMaps(encoding));
+
+ uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
+ ASSERT_EQ(2u, number_of_catalog_entries);
+ DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
+ // The Dex register location catalog contains:
+ // - one 1-byte short Dex register locations, and
+ // - one 5-byte large Dex register location.
+ const size_t expected_location_catalog_size = 1u + 5u;
+ ASSERT_EQ(expected_location_catalog_size, location_catalog.Size());
+
+ // First stack map.
+ {
+ StackMap stack_map = code_info.GetStackMapAt(0, encoding);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
+ ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map.encoding));
+ ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA));
+ ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map));
+
+ ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask1));
+
+ ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding));
+ DexRegisterMap map(code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers));
+ ASSERT_TRUE(map.IsDexRegisterLive(0));
+ ASSERT_TRUE(map.IsDexRegisterLive(1));
+ ASSERT_EQ(2u, map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
+ // The Dex register map contains:
+ // - one 1-byte live bit mask, and
+ // - one 1-byte set of location catalog entry indices composed of two 2-bit values.
+ size_t expected_map_size = 1u + 1u;
+ ASSERT_EQ(expected_map_size, map.Size());
+
+ ASSERT_EQ(Kind::kInStack, map.GetLocationKind(0, number_of_dex_registers, code_info, encoding));
+ ASSERT_EQ(Kind::kConstant,
+ map.GetLocationKind(1, number_of_dex_registers, code_info, encoding));
+ ASSERT_EQ(Kind::kInStack,
+ map.GetLocationInternalKind(0, number_of_dex_registers, code_info, encoding));
+ ASSERT_EQ(Kind::kConstantLargeValue,
+ map.GetLocationInternalKind(1, number_of_dex_registers, code_info, encoding));
+ ASSERT_EQ(0, map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info, encoding));
+ ASSERT_EQ(-2, map.GetConstant(1, number_of_dex_registers, code_info, encoding));
+
+ const size_t index0 =
+ map.GetLocationCatalogEntryIndex(0, number_of_dex_registers, number_of_catalog_entries);
+ const size_t index1 =
+ map.GetLocationCatalogEntryIndex(1, number_of_dex_registers, number_of_catalog_entries);
+ ASSERT_EQ(0u, index0);
+ ASSERT_EQ(1u, index1);
+ DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
+ DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+ ASSERT_EQ(Kind::kInStack, location0.GetKind());
+ ASSERT_EQ(Kind::kConstant, location1.GetKind());
+ ASSERT_EQ(Kind::kInStack, location0.GetInternalKind());
+ ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind());
+ ASSERT_EQ(0, location0.GetValue());
+ ASSERT_EQ(-2, location1.GetValue());
+
+ // Test that the inline info dex register map deduplicated to the same offset as the stack map
+ // one.
+ ASSERT_TRUE(stack_map.HasInlineInfo(encoding.stack_map.encoding));
+ InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
+ EXPECT_EQ(inline_info.GetDexRegisterMapOffsetAtDepth(encoding.inline_info.encoding, 0),
+ stack_map.GetDexRegisterMapOffset(encoding.stack_map.encoding));
+ }
+}
+
TEST(StackMapTest, TestNonLiveDexRegisters) {
ArenaPool pool;
ArenaAllocator arena(&pool);