summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc14
-rw-r--r--compiler/optimizing/cha_guard_optimization.cc2
-rw-r--r--compiler/optimizing/code_generator_arm.cc560
-rw-r--r--compiler/optimizing/code_generator_arm.h47
-rw-r--r--compiler/optimizing/code_generator_arm64.cc162
-rw-r--r--compiler/optimizing/code_generator_arm64.h26
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc616
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h48
-rw-r--r--compiler/optimizing/code_generator_mips.cc484
-rw-r--r--compiler/optimizing/code_generator_mips.h1
-rw-r--r--compiler/optimizing/code_generator_mips64.cc82
-rw-r--r--compiler/optimizing/code_generator_x86.cc22
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc22
-rw-r--r--compiler/optimizing/graph_checker.cc19
-rw-r--r--compiler/optimizing/inliner.cc29
-rw-r--r--compiler/optimizing/instruction_builder.cc27
-rw-r--r--compiler/optimizing/instruction_builder.h6
-rw-r--r--compiler/optimizing/instruction_simplifier.cc14
-rw-r--r--compiler/optimizing/intrinsics_arm.cc11
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc9
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc11
-rw-r--r--compiler/optimizing/intrinsics_mips.cc3
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc3
-rw-r--r--compiler/optimizing/load_store_elimination.cc10
-rw-r--r--compiler/optimizing/loop_optimization.cc21
-rw-r--r--compiler/optimizing/nodes.cc96
-rw-r--r--compiler/optimizing/nodes.h173
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc7
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.h1
-rw-r--r--compiler/optimizing/ssa_liveness_analysis_test.cc2
30 files changed, 2084 insertions, 444 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 476906a768..ed630cda91 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1550,7 +1550,7 @@ class BCEVisitor : public HGraphVisitor {
HBasicBlock* block = GetPreHeader(loop, check);
HInstruction* cond =
new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant());
- InsertDeoptInLoop(loop, block, cond);
+ InsertDeoptInLoop(loop, block, cond, /* is_null_check */ true);
ReplaceInstruction(check, array);
return true;
}
@@ -1616,11 +1616,16 @@ class BCEVisitor : public HGraphVisitor {
}
/** Inserts a deoptimization test in a loop preheader. */
- void InsertDeoptInLoop(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
+ void InsertDeoptInLoop(HLoopInformation* loop,
+ HBasicBlock* block,
+ HInstruction* condition,
+ bool is_null_check = false) {
HInstruction* suspend = loop->GetSuspendCheck();
block->InsertInstructionBefore(condition, block->GetLastInstruction());
+ DeoptimizationKind kind =
+ is_null_check ? DeoptimizationKind::kLoopNullBCE : DeoptimizationKind::kLoopBoundsBCE;
HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
- GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, suspend->GetDexPc());
+ GetGraph()->GetArena(), condition, kind, suspend->GetDexPc());
block->InsertInstructionBefore(deoptimize, block->GetLastInstruction());
if (suspend->HasEnvironment()) {
deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
@@ -1633,7 +1638,7 @@ class BCEVisitor : public HGraphVisitor {
HBasicBlock* block = bounds_check->GetBlock();
block->InsertInstructionBefore(condition, bounds_check);
HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
- GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, bounds_check->GetDexPc());
+ GetGraph()->GetArena(), condition, DeoptimizationKind::kBlockBCE, bounds_check->GetDexPc());
block->InsertInstructionBefore(deoptimize, bounds_check);
deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
}
@@ -1749,6 +1754,7 @@ class BCEVisitor : public HGraphVisitor {
phi = NewPhi(new_preheader, instruction, type);
}
user->ReplaceInput(phi, index); // Removes the use node from the list.
+ induction_range_.Replace(user, instruction, phi); // update induction
}
}
// Scan all environment uses of an instruction and replace each later use with a phi node.
diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc
index 048073e37a..c806dbfef6 100644
--- a/compiler/optimizing/cha_guard_optimization.cc
+++ b/compiler/optimizing/cha_guard_optimization.cc
@@ -203,7 +203,7 @@ bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag,
// Need a new deoptimize instruction that copies the environment
// of the suspend instruction for the loop.
HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
- GetGraph()->GetArena(), compare, HDeoptimize::Kind::kInline, suspend->GetDexPc());
+ GetGraph()->GetArena(), compare, DeoptimizationKind::kCHA, suspend->GetDexPc());
pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
suspend->GetEnvironment(), loop_info->GetHeader());
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index d7cc577580..cf2a391e8f 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -16,6 +16,7 @@
#include "code_generator_arm.h"
+#include "arch/arm/asm_support_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
#include "art_method.h"
#include "code_generator_utils.h"
@@ -25,6 +26,7 @@
#include "gc/accounting/card_table.h"
#include "intrinsics.h"
#include "intrinsics_arm.h"
+#include "linker/arm/relative_patcher_thumb2.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "thread.h"
@@ -60,10 +62,45 @@ static constexpr DRegister DTMP = D31;
static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
+// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
+// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
+// For the Baker read barrier implementation using link-generated thunks we need to split
+// the offset explicitly.
+constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
+
+// Flags controlling the use of link-time generated thunks for Baker read barriers.
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
+
+// The reserved entrypoint register for link-time generated thunks.
+const Register kBakerCcEntrypointRegister = R4;
+
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
+static inline void CheckLastTempIsBakerCcEntrypointRegister(HInstruction* instruction) {
+ DCHECK_EQ(static_cast<uint32_t>(kBakerCcEntrypointRegister),
+ linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister);
+ DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u);
+ DCHECK_EQ(kBakerCcEntrypointRegister,
+ instruction->GetLocations()->GetTemp(
+ instruction->GetLocations()->GetTempCount() - 1u).AsRegister<Register>());
+}
+
+static inline void EmitPlaceholderBne(CodeGeneratorARM* codegen, Label* bne_label) {
+ ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(codegen->GetAssembler()));
+ __ BindTrackedLabel(bne_label);
+ Label placeholder_label;
+ __ b(&placeholder_label, NE); // Placeholder, patched at link-time.
+ __ Bind(&placeholder_label);
+}
+
+static inline bool CanEmitNarrowLdr(Register rt, Register rn, uint32_t offset) {
+ return ArmAssembler::IsLowRegister(rt) && ArmAssembler::IsLowRegister(rn) && offset < 32u;
+}
+
static constexpr int kRegListThreshold = 4;
// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
@@ -585,8 +622,13 @@ class DeoptimizationSlowPathARM : public SlowPathCodeARM {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
+ LocationSummary* locations = instruction_->GetLocations();
+ SaveLiveRegisters(codegen, locations);
+ InvokeRuntimeCallingConvention calling_convention;
+ __ LoadImmediate(calling_convention.GetRegisterAt(0),
+ static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
- CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+ CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; }
@@ -819,7 +861,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARM : public ReadBarrierMarkSlowP
// Baker's read barriers, we need to perform the load of
// mirror::Object::monitor_ *before* the original reference load.
// This load-load ordering is required by the read barrier.
- // The fast path/slow path (for Baker's algorithm) should look like:
+ // The slow path (for Baker's algorithm) should look like:
//
// uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
// lfence; // Load fence or artificial data dependency to prevent load-load reordering
@@ -954,6 +996,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM
__ Bind(GetEntryLabel());
+ // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM's:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // old_ref = ref;
+ // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // compareAndSwapObject(obj, field_offset, old_ref, ref);
+ // }
+
// /* int32_t */ monitor = obj->monitor_
uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
__ LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset);
@@ -1962,6 +2016,7 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph,
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
@@ -2672,7 +2727,10 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -3067,6 +3125,15 @@ void InstructionCodeGeneratorARM::VisitDoubleConstant(HDoubleConstant* constant
// Will be generated at use site.
}
+void LocationsBuilderARM::VisitConstructorFence(HConstructorFence* constructor_fence) {
+ constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM::VisitConstructorFence(
+ HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
void LocationsBuilderARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
memory_barrier->SetLocations(nullptr);
}
@@ -5272,7 +5339,18 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI
} else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // If link-time thunks for the Baker read barrier are enabled, for AOT
+ // loads we need a temporary only if the offset is too big.
+ if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
}
@@ -5738,11 +5816,35 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
Location::RequiresRegister(),
object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
- // Also need for String compression feature.
- if ((object_array_get_with_read_barrier && kUseBakerReadBarrier)
- || (mirror::kUseStringCompression && instruction->IsStringCharAt())) {
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation() &&
+ instruction->GetIndex()->IsConstant()) {
+ // Array loads with constant index are treated as field loads.
+ // If link-time thunks for the Baker read barrier are enabled, for AOT
+ // constant index loads we need a temporary only if the offset is too big.
+ uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
+ uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
+ offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot);
+ if (offset >= kReferenceLoadMinFarOffset) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+ !Runtime::Current()->UseJitCompilation() &&
+ !instruction->GetIndex()->IsConstant()) {
+ // We need a non-scratch temporary for the array data pointer.
+ locations->AddTemp(Location::RequiresRegister());
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ // Also need a temporary for String compression feature.
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -5854,8 +5956,20 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
Location temp = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier call.
- codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+ DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
+ if (index.IsConstant()) {
+ // Array load with a constant index can be treated as a field load.
+ data_offset += helpers::Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ data_offset,
+ locations->GetTemp(0),
+ /* needs_null_check */ false);
+ } else {
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(
+ instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false);
+ }
} else {
Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
@@ -6692,6 +6806,13 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) {
// For non-Baker read barrier we have a temp-clobbering call.
}
}
+ if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+ if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+ (load_kind == HLoadClass::LoadKind::kReferrersClass &&
+ !Runtime::Current()->UseJitCompilation())) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ }
+ }
}
// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -6871,6 +6992,9 @@ void LocationsBuilderARM::VisitLoadString(HLoadString* load) {
// TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
// that the the kPrimNot result register is the same as the first argument register.
locations->SetCustomSlowPathCallerSaves(caller_saves);
+ if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ }
} else {
// For non-Baker read barrier we have a temp-clobbering call.
}
@@ -7041,6 +7165,9 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
// Note that TypeCheckSlowPathARM uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ codegen_->MaybeAddBakerCcEntrypointTempForFields(locations);
+ }
}
void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
@@ -7914,48 +8041,96 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
- //
- // Note that we do not actually check the value of
- // `GetIsGcMarking()` to decide whether to mark the loaded GC
- // root or not. Instead, we load into `temp` the read barrier
- // mark entry point corresponding to register `root`. If `temp`
- // is null, it means that `GetIsGcMarking()` is false, and vice
- // versa.
- //
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
- // }
-
- // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
- Location temp = Location::RegisterLocation(LR);
- SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
- instruction, root, /* entrypoint */ temp);
- codegen_->AddSlowPath(slow_path);
+ if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded GC root or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk
+ // checks the reference and jumps to the entrypoint if needed.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &return_address;
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) {
+ // goto gc_root_thunk<root_reg>(lr)
+ // }
+ // return_address:
+
+ CheckLastTempIsBakerCcEntrypointRegister(instruction);
+ bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
+ uint32_t custom_data =
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow);
+ Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
+
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(IP, 12);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+ __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
+
+ Label return_address;
+ __ AdrCode(LR, &return_address);
+ __ CmpConstant(kBakerCcEntrypointRegister, 0);
+ // Currently the offset is always within range. If that changes,
+ // we shall have to split the load the same way as for fields.
+ DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+ DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit());
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow);
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
+ __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ EmitPlaceholderBne(codegen_, bne_label);
+ __ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
+ } else {
+ // Note that we do not actually check the value of
+ // `GetIsGcMarking()` to decide whether to mark the loaded GC
+ // root or not. Instead, we load into `temp` the read barrier
+ // mark entry point corresponding to register `root`. If `temp`
+ // is null, it means that `GetIsGcMarking()` is false, and vice
+ // versa.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
+ // }
+
+ // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+ Location temp = Location::RegisterLocation(LR);
+ SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
+ instruction, root, /* entrypoint */ temp);
+ codegen_->AddSlowPath(slow_path);
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
- // /* GcRoot<mirror::Object> */ root = *(obj + offset)
- __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
- static_assert(
- sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
- "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
- "have different sizes.");
- static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::CompressedReference<mirror::Object> and int32_t "
- "have different sizes.");
-
- // The entrypoint is null when the GC is not marking, this prevents one load compared to
- // checking GetIsGcMarking.
- __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ }
} else {
// GC root loaded through a slow path for read barriers other
// than Baker's.
@@ -7973,6 +8148,16 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
}
}
+void CodeGeneratorARM::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields) {
+ if (!Runtime::Current()->UseJitCompilation()) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ }
+ }
+}
+
void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
Register obj,
@@ -7982,6 +8167,76 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk checks
+ // the holder and jumps to the entrypoint if needed. If the holder is not
+ // gray, it creates a fake dependency and returns to the LDR instruction.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &gray_return_address;
+ // if (temp != nullptr) {
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // HeapReference<mirror::Object> reference = *(obj+offset);
+ // gray_return_address:
+
+ DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ Register ref_reg = ref.AsRegister<Register>();
+ bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
+ Register base = obj;
+ if (offset >= kReferenceLoadMinFarOffset) {
+ base = temp.AsRegister<Register>();
+ DCHECK_NE(base, kBakerCcEntrypointRegister);
+ static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
+ __ AddConstant(base, obj, offset & ~(kReferenceLoadMinFarOffset - 1u));
+ offset &= (kReferenceLoadMinFarOffset - 1u);
+ // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
+ // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
+ // increase the overall code size when taking the generated thunks into account.
+ DCHECK(!narrow);
+ }
+ CheckLastTempIsBakerCcEntrypointRegister(instruction);
+ uint32_t custom_data =
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj, narrow);
+ Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(IP, 12);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+ __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
+
+ Label return_address;
+ __ AdrCode(LR, &return_address);
+ __ CmpConstant(kBakerCcEntrypointRegister, 0);
+ EmitPlaceholderBne(this, bne_label);
+ DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+ DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit());
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow);
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
+ __ LoadFromOffset(kLoadWord, ref_reg, base, offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+ __ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
+ return;
+ }
+
// /* HeapReference<Object> */ ref = *(obj + offset)
Location no_index = Location::NoLocation();
ScaleFactor no_scale_factor = TIMES_1;
@@ -8002,9 +8257,67 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr
static_assert(
sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ ScaleFactor scale_factor = TIMES_4;
+
+ if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk checks
+ // the holder and jumps to the entrypoint if needed. If the holder is not
+ // gray, it creates a fake dependency and returns to the LDR instruction.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &gray_return_address;
+ // if (temp != nullptr) {
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // HeapReference<mirror::Object> reference = data[index];
+ // gray_return_address:
+
+ DCHECK(index.IsValid());
+ Register index_reg = index.AsRegister<Register>();
+ Register ref_reg = ref.AsRegister<Register>();
+ Register data_reg = temp.AsRegister<Register>();
+ DCHECK_NE(data_reg, kBakerCcEntrypointRegister);
+
+ CheckLastTempIsBakerCcEntrypointRegister(instruction);
+ uint32_t custom_data =
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg);
+ Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(IP, 12);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+ __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
+ __ AddConstant(data_reg, obj, data_offset);
+
+ Label return_address;
+ __ AdrCode(LR, &return_address);
+ __ CmpConstant(kBakerCcEntrypointRegister, 0);
+ EmitPlaceholderBne(this, bne_label);
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
+ __ ldr(ref_reg, Address(data_reg, index_reg, LSL, scale_factor));
+ DCHECK(!needs_null_check); // The thunk cannot handle the null check.
+ GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+ __ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+ return;
+ }
+
// /* HeapReference<Object> */ ref =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- ScaleFactor scale_factor = TIMES_4;
GenerateReferenceLoadWithBakerReadBarrier(
instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check);
}
@@ -8016,9 +8329,7 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
Location index,
ScaleFactor scale_factor,
Location temp,
- bool needs_null_check,
- bool always_update_field,
- Register* temp2) {
+ bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -8029,6 +8340,73 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// not.
//
// Note that we do not actually check the value of `GetIsGcMarking()`;
+ // instead, we load into `temp2` the read barrier mark entry point
+ // corresponding to register `ref`. If `temp2` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
+ // } else {
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // }
+
+ Register temp_reg = temp.AsRegister<Register>();
+
+ // Slow path marking the object `ref` when the GC is marking. The
+ // entrypoint will already be loaded in `temp2`.
+ Location temp2 = Location::RegisterLocation(LR);
+ SlowPathCodeARM* slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM(
+ instruction,
+ ref,
+ obj,
+ offset,
+ index,
+ scale_factor,
+ needs_null_check,
+ temp_reg,
+ /* entrypoint */ temp2);
+ AddSlowPath(slow_path);
+
+ // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadWord, temp2.AsRegister<Register>(), TR, entry_point_offset);
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ CompareAndBranchIfNonZero(temp2.AsRegister<Register>(), slow_path->GetEntryLabel());
+ // Fast path: the GC is not marking: just load the reference.
+ GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ Location field_offset,
+ Location temp,
+ bool needs_null_check,
+ Register temp2) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+ // whether we need to enter the slow path to update the reference
+ // field within `obj`. Then, in the slow path, check the gray bit
+ // in the lock word of the reference's holder (`obj`) to decide
+ // whether to mark `ref` and update the field or not.
+ //
+ // Note that we do not actually check the value of `GetIsGcMarking()`;
// instead, we load into `temp3` the read barrier mark entry point
// corresponding to register `ref`. If `temp3` is null, it means
// that `GetIsGcMarking()` is false, and vice versa.
@@ -8041,52 +8419,30 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// HeapReference<mirror::Object> ref = *src; // Original reference load.
// bool is_gray = (rb_state == ReadBarrier::GrayState());
// if (is_gray) {
+ // old_ref = ref;
// ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // compareAndSwapObject(obj, field_offset, old_ref, ref);
// }
- // } else {
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
// }
Register temp_reg = temp.AsRegister<Register>();
- // Slow path marking the object `ref` when the GC is marking. The
- // entrypoint will already be loaded in `temp3`.
+ // Slow path updating the object reference at address `obj +
+ // field_offset` when the GC is marking. The entrypoint will already
+ // be loaded in `temp3`.
Location temp3 = Location::RegisterLocation(LR);
- SlowPathCodeARM* slow_path;
- if (always_update_field) {
- DCHECK(temp2 != nullptr);
- // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM only
- // supports address of the form `obj + field_offset`, where `obj`
- // is a register and `field_offset` is a register pair (of which
- // only the lower half is used). Thus `offset` and `scale_factor`
- // above are expected to be null in this code path.
- DCHECK_EQ(offset, 0u);
- DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
- Location field_offset = index;
- slow_path =
- new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(
- instruction,
- ref,
- obj,
- offset,
- /* index */ field_offset,
- scale_factor,
- needs_null_check,
- temp_reg,
- *temp2,
- /* entrypoint */ temp3);
- } else {
- slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM(
- instruction,
- ref,
- obj,
- offset,
- index,
- scale_factor,
- needs_null_check,
- temp_reg,
- /* entrypoint */ temp3);
- }
+ SlowPathCodeARM* slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(
+ instruction,
+ ref,
+ obj,
+ /* offset */ 0u,
+ /* index */ field_offset,
+ /* scale_factor */ ScaleFactor::TIMES_1,
+ needs_null_check,
+ temp_reg,
+ temp2,
+ /* entrypoint */ temp3);
AddSlowPath(slow_path);
// temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
@@ -8098,8 +8454,8 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ CompareAndBranchIfNonZero(temp3.AsRegister<Register>(), slow_path->GetEntryLabel());
- // Fast path: just load the reference.
- GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+ // Fast path: the GC is not marking: nothing to do (the field is
+ // up-to-date, and we don't need to load the reference).
__ Bind(slow_path->GetExitLabel());
}
@@ -8370,6 +8726,11 @@ CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativePatch(
return &patches->back();
}
+Label* CodeGeneratorARM::NewBakerReadBarrierPatch(uint32_t custom_data) {
+ baker_read_barrier_patches_.emplace_back(custom_data);
+ return &baker_read_barrier_patches_.back().label;
+}
+
Literal* CodeGeneratorARM::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
dex::StringIndex string_index) {
return boot_image_string_patches_.GetOrCreate(
@@ -8436,7 +8797,8 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche
/* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
boot_image_type_patches_.size() +
/* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
- /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size();
+ /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
+ baker_read_barrier_patches_.size();
linker_patches->reserve(size);
EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
linker_patches);
@@ -8470,6 +8832,10 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche
target_type.dex_file,
target_type.type_index.index_));
}
+ for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+ linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.Position(),
+ info.custom_data));
+ }
DCHECK_EQ(size, linker_patches->size());
}
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 86f2f21df7..b94ee20d9d 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -488,6 +488,11 @@ class CodeGeneratorARM : public CodeGenerator {
PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
uint32_t element_offset);
+
+ // Add a new baker read barrier patch and return the label to be bound
+ // before the BNE instruction.
+ Label* NewBakerReadBarrierPatch(uint32_t custom_data);
+
Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
dex::StringIndex string_index);
Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index);
@@ -503,6 +508,10 @@ class CodeGeneratorARM : public CodeGenerator {
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+ // Maybe add the reserved entrypoint register as a temporary for field load. This temp
+ // is added only for AOT compilation if link-time generated thunks for fields are enabled.
+ void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations);
+
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -526,11 +535,6 @@ class CodeGeneratorARM : public CodeGenerator {
// Load the object reference located at the address
// `obj + offset + (index << scale_factor)`, held by object `obj`, into
// `ref`, and mark it if needed.
- //
- // If `always_update_field` is true, the value of the reference is
- // atomically updated in the holder (`obj`). This operation
- // requires an extra temporary register, which must be provided as a
- // non-null pointer (`temp2`).
void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
Register obj,
@@ -538,9 +542,27 @@ class CodeGeneratorARM : public CodeGenerator {
Location index,
ScaleFactor scale_factor,
Location temp,
- bool needs_null_check,
- bool always_update_field = false,
- Register* temp2 = nullptr);
+ bool needs_null_check);
+
+ // Generate code checking whether the the reference field at the
+ // address `obj + field_offset`, held by object `obj`, needs to be
+ // marked, and if so, marking it and updating the field within `obj`
+ // with the marked value.
+ //
+ // This routine is used for the implementation of the
+ // UnsafeCASObject intrinsic with Baker read barriers.
+ //
+ // This method has a structure similar to
+ // GenerateReferenceLoadWithBakerReadBarrier, but note that argument
+ // `ref` is only as a temporary here, and thus its value should not
+ // be used afterwards.
+ void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ Location field_offset,
+ Location temp,
+ bool needs_null_check,
+ Register temp2);
// Generate a heap reference load (with no read barrier).
void GenerateRawReferenceLoad(HInstruction* instruction,
@@ -616,6 +638,13 @@ class CodeGeneratorARM : public CodeGenerator {
Literal*,
TypeReferenceValueComparator>;
+ struct BakerReadBarrierPatchInfo {
+ explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { }
+
+ Label label;
+ uint32_t custom_data;
+ };
+
Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
@@ -648,6 +677,8 @@ class CodeGeneratorARM : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+ // Baker read barrier patch info.
+ ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
// Patches for string literals in JIT compiled code.
StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 0bc4bd7524..d59f8b435c 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -587,8 +587,13 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
+ LocationSummary* locations = instruction_->GetLocations();
+ SaveLiveRegisters(codegen, locations);
+ InvokeRuntimeCallingConvention calling_convention;
+ __ Mov(calling_convention.GetRegisterAt(0),
+ static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
- CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+ CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
@@ -851,7 +856,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlo
// Baker's read barriers, we need to perform the load of
// mirror::Object::monitor_ *before* the original reference load.
// This load-load ordering is required by the read barrier.
- // The fast path/slow path (for Baker's algorithm) should look like:
+ // The slow path (for Baker's algorithm) should look like:
//
// uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
// lfence; // Load fence or artificial data dependency to prevent load-load reordering
@@ -1002,6 +1007,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
__ Bind(GetEntryLabel());
+ // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // old_ref = ref;
+ // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // compareAndSwapObject(obj, field_offset, old_ref, ref);
+ // }
+
// /* int32_t */ monitor = obj->monitor_
uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
__ Ldr(temp_, HeapOperand(obj_, monitor_offset));
@@ -3693,7 +3710,10 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -5479,6 +5499,15 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
}
}
+void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
+ constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM64::VisitConstructorFence(
+ HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
memory_barrier->SetLocations(nullptr);
}
@@ -6073,7 +6102,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = *(obj+offset);
+ // HeapReference<mirror::Object> reference = *(obj+offset);
// gray_return_address:
DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
@@ -6168,7 +6197,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = data[index];
+ // HeapReference<mirror::Object> reference = data[index];
// gray_return_address:
DCHECK(index.IsValid());
@@ -6230,8 +6259,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
size_t scale_factor,
Register temp,
bool needs_null_check,
- bool use_load_acquire,
- bool always_update_field) {
+ bool use_load_acquire) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
// If we are emitting an array load, we should not be using a
@@ -6268,41 +6296,18 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
// entrypoint will already be loaded in `temp2`.
Register temp2 = lr;
Location temp2_loc = LocationFrom(temp2);
- SlowPathCodeARM64* slow_path;
- if (always_update_field) {
- // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
- // only supports address of the form `obj + field_offset`, where
- // `obj` is a register and `field_offset` is a register. Thus
- // `offset` and `scale_factor` above are expected to be null in
- // this code path.
- DCHECK_EQ(offset, 0u);
- DCHECK_EQ(scale_factor, 0u); /* "times 1" */
- Location field_offset = index;
- slow_path =
- new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
- instruction,
- ref,
- obj,
- offset,
- /* index */ field_offset,
- scale_factor,
- needs_null_check,
- use_load_acquire,
- temp,
- /* entrypoint */ temp2_loc);
- } else {
- slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
- instruction,
- ref,
- obj,
- offset,
- index,
- scale_factor,
- needs_null_check,
- use_load_acquire,
- temp,
- /* entrypoint */ temp2_loc);
- }
+ SlowPathCodeARM64* slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
+ instruction,
+ ref,
+ obj,
+ offset,
+ index,
+ scale_factor,
+ needs_null_check,
+ use_load_acquire,
+ temp,
+ /* entrypoint */ temp2_loc);
AddSlowPath(slow_path);
// temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
@@ -6314,12 +6319,83 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ Cbnz(temp2, slow_path->GetEntryLabel());
- // Fast path: just load the reference.
+ // Fast path: the GC is not marking: just load the reference.
GenerateRawReferenceLoad(
instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
__ Bind(slow_path->GetExitLabel());
}
+void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ Location field_offset,
+ Register temp,
+ bool needs_null_check,
+ bool use_load_acquire) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ // If we are emitting an array load, we should not be using a
+ // Load Acquire instruction. In other words:
+ // `instruction->IsArrayGet()` => `!use_load_acquire`.
+ DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
+
+ // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+ // whether we need to enter the slow path to update the reference
+ // field within `obj`. Then, in the slow path, check the gray bit
+ // in the lock word of the reference's holder (`obj`) to decide
+ // whether to mark `ref` and update the field or not.
+ //
+ // Note that we do not actually check the value of `GetIsGcMarking()`;
+ // instead, we load into `temp2` the read barrier mark entry point
+ // corresponding to register `ref`. If `temp2` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // old_ref = ref;
+ // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // compareAndSwapObject(obj, field_offset, old_ref, ref);
+ // }
+ // }
+
+ // Slow path updating the object reference at address `obj + field_offset`
+ // when the GC is marking. The entrypoint will already be loaded in `temp2`.
+ Register temp2 = lr;
+ Location temp2_loc = LocationFrom(temp2);
+ SlowPathCodeARM64* slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
+ instruction,
+ ref,
+ obj,
+ /* offset */ 0u,
+ /* index */ field_offset,
+ /* scale_factor */ 0u /* "times 1" */,
+ needs_null_check,
+ use_load_acquire,
+ temp,
+ /* entrypoint */ temp2_loc);
+ AddSlowPath(slow_path);
+
+ // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ Ldr(temp2, MemOperand(tr, entry_point_offset));
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ Cbnz(temp2, slow_path->GetEntryLabel());
+ // Fast path: the GC is not marking: nothing to do (the field is
+ // up-to-date, and we don't need to load the reference).
+ __ Bind(slow_path->GetExitLabel());
+}
+
void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
Location ref,
Register obj,
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 3ded3e4412..f16f625b6c 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -635,9 +635,6 @@ class CodeGeneratorARM64 : public CodeGenerator {
// Load the object reference located at the address
// `obj + offset + (index << scale_factor)`, held by object `obj`, into
// `ref`, and mark it if needed.
- //
- // If `always_update_field` is true, the value of the reference is
- // atomically updated in the holder (`obj`).
void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl::aarch64::Register obj,
@@ -646,8 +643,27 @@ class CodeGeneratorARM64 : public CodeGenerator {
size_t scale_factor,
vixl::aarch64::Register temp,
bool needs_null_check,
- bool use_load_acquire,
- bool always_update_field = false);
+ bool use_load_acquire);
+
+ // Generate code checking whether the the reference field at the
+ // address `obj + field_offset`, held by object `obj`, needs to be
+ // marked, and if so, marking it and updating the field within `obj`
+ // with the marked value.
+ //
+ // This routine is used for the implementation of the
+ // UnsafeCASObject intrinsic with Baker read barriers.
+ //
+ // This method has a structure similar to
+ // GenerateReferenceLoadWithBakerReadBarrier, but note that argument
+ // `ref` is only as a temporary here, and thus its value should not
+ // be used afterwards.
+ void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ vixl::aarch64::Register obj,
+ Location field_offset,
+ vixl::aarch64::Register temp,
+ bool needs_null_check,
+ bool use_load_acquire);
// Generate a heap reference load (with no read barrier).
void GenerateRawReferenceLoad(HInstruction* instruction,
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index b6678b03ef..9f03a39bd5 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -16,6 +16,7 @@
#include "code_generator_arm_vixl.h"
+#include "arch/arm/asm_support_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
#include "art_method.h"
#include "code_generator_utils.h"
@@ -24,6 +25,7 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
#include "intrinsics_arm_vixl.h"
+#include "linker/arm/relative_patcher_thumb2.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "thread.h"
@@ -77,6 +79,20 @@ static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
+// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
+// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
+// For the Baker read barrier implementation using link-generated thunks we need to split
+// the offset explicitly.
+constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
+
+// Flags controlling the use of link-time generated thunks for Baker read barriers.
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
+
+// The reserved entrypoint register for link-time generated thunks.
+const vixl32::Register kBakerCcEntrypointRegister = r4;
+
#ifdef __
#error "ARM Codegen VIXL macro-assembler macro already defined."
#endif
@@ -88,6 +104,60 @@ static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
// Marker that code is yet to be, and must, be implemented.
#define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
+static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope* temps,
+ HInstruction* instruction) {
+ DCHECK(temps->IsAvailable(ip));
+ temps->Exclude(ip);
+ DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister));
+ DCHECK_EQ(kBakerCcEntrypointRegister.GetCode(),
+ linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister);
+ DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u);
+ DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp(
+ instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister));
+}
+
+static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) {
+ ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes);
+ __ bind(patch_label);
+ vixl32::Label placeholder_label;
+ __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time.
+ __ bind(&placeholder_label);
+}
+
+static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) {
+ return rt.IsLow() && rn.IsLow() && offset < 32u;
+}
+
+class EmitAdrCode {
+ public:
+ EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
+ : assembler_(assembler), rd_(rd), label_(label) {
+ ExactAssemblyScope aas(assembler, kMaxInstructionSizeInBytes);
+ adr_location_ = assembler->GetCursorOffset();
+ assembler->adr(EncodingSize(Wide), rd, label);
+ }
+
+ ~EmitAdrCode() {
+ DCHECK(label_->IsBound());
+ // The ADR emitted by the assembler does not set the Thumb mode bit we need.
+ // TODO: Maybe extend VIXL to allow ADR for return address?
+ uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_);
+ // Expecting ADR encoding T3 with `(offset & 1) == 0`.
+ DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u); // Check bits 24-31, except 26.
+ DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu); // Check bits 16-23.
+ DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode()); // Check bits 8-11 and 15.
+ DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u); // Check bit 0, i.e. the `offset & 1`.
+ // Add the Thumb mode bit.
+ raw_adr[2] |= 0x01u;
+ }
+
+ private:
+ ArmVIXLMacroAssembler* const assembler_;
+ vixl32::Register rd_;
+ vixl32::Label* const label_;
+ int32_t adr_location_;
+};
+
// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
// for each live D registers they treat two corresponding S registers as live ones.
//
@@ -608,8 +678,14 @@ class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
__ Bind(GetEntryLabel());
+ LocationSummary* locations = instruction_->GetLocations();
+ SaveLiveRegisters(codegen, locations);
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ __ Mov(calling_convention.GetRegisterAt(0),
+ static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
+
arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
- CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+ CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARMVIXL"; }
@@ -845,7 +921,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkS
// Baker's read barriers, we need to perform the load of
// mirror::Object::monitor_ *before* the original reference load.
// This load-load ordering is required by the read barrier.
- // The fast path/slow path (for Baker's algorithm) should look like:
+ // The slow path (for Baker's algorithm) should look like:
//
// uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
// lfence; // Load fence or artificial data dependency to prevent load-load reordering
@@ -987,6 +1063,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
__ Bind(GetEntryLabel());
+ // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARMVIXL's:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // old_ref = ref;
+ // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // compareAndSwapObject(obj, field_offset, old_ref, ref);
+ // }
+
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
// /* int32_t */ monitor = obj->monitor_
@@ -2012,6 +2100,7 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
@@ -2704,7 +2793,10 @@ void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) {
void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -3103,6 +3195,15 @@ void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(
// Will be generated at use site.
}
+void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* constructor_fence) {
+ constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitConstructorFence(
+ HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
memory_barrier->SetLocations(nullptr);
}
@@ -5280,7 +5381,18 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
} else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // If link-time thunks for the Baker read barrier are enabled, for AOT
+ // loads we need a temporary only if the offset is too big.
+ if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
}
@@ -5747,11 +5859,35 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
Location::RequiresRegister(),
object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier.
- // Also need for String compression feature.
- if ((object_array_get_with_read_barrier && kUseBakerReadBarrier)
- || (mirror::kUseStringCompression && instruction->IsStringCharAt())) {
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier.
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation() &&
+ instruction->GetIndex()->IsConstant()) {
+ // Array loads with constant index are treated as field loads.
+ // If link-time thunks for the Baker read barrier are enabled, for AOT
+ // constant index loads we need a temporary only if the offset is too big.
+ uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
+ uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
+ offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot);
+ if (offset >= kReferenceLoadMinFarOffset) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+ !Runtime::Current()->UseJitCompilation() &&
+ !instruction->GetIndex()->IsConstant()) {
+ // We need a non-scratch temporary for the array data pointer.
+ locations->AddTemp(Location::RequiresRegister());
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ // Also need a temporary for String compression feature.
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -5862,8 +5998,20 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
Location temp = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
- codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+ DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
+ if (index.IsConstant()) {
+ // Array load with a constant index can be treated as a field load.
+ data_offset += Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ data_offset,
+ locations->GetTemp(0),
+ /* needs_null_check */ false);
+ } else {
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(
+ instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false);
+ }
} else {
vixl32::Register out = OutputRegister(instruction);
if (index.IsConstant()) {
@@ -6753,6 +6901,13 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
// For non-Baker read barrier we have a temp-clobbering call.
}
}
+ if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+ if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+ (load_kind == HLoadClass::LoadKind::kReferrersClass &&
+ !Runtime::Current()->UseJitCompilation())) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ }
+ }
}
// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -6929,6 +7084,9 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
// TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
// that the the kPrimNot result register is the same as the first argument register.
locations->SetCustomSlowPathCallerSaves(caller_saves);
+ if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ }
} else {
// For non-Baker read barrier we have a temp-clobbering call.
}
@@ -7091,6 +7249,9 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
// Note that TypeCheckSlowPathARM uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ codegen_->MaybeAddBakerCcEntrypointTempForFields(locations);
+ }
}
void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
@@ -7989,48 +8150,98 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
- //
- // Note that we do not actually check the value of
- // `GetIsGcMarking()` to decide whether to mark the loaded GC
- // root or not. Instead, we load into `temp` the read barrier
- // mark entry point corresponding to register `root`. If `temp`
- // is null, it means that `GetIsGcMarking()` is false, and vice
- // versa.
- //
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
- // }
-
- // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
- Location temp = LocationFrom(lr);
- SlowPathCodeARMVIXL* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
- instruction, root, /* entrypoint */ temp);
- codegen_->AddSlowPath(slow_path);
+ if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded GC root or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk
+ // checks the reference and jumps to the entrypoint if needed.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &return_address;
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) {
+ // goto gc_root_thunk<root_reg>(lr)
+ // }
+ // return_address:
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+ bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
+ uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(
+ root_reg.GetCode(), narrow);
+ vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
+
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip.GetCode(), 12u);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+ __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
+
+ vixl::EmissionCheckScope guard(GetVIXLAssembler(),
+ 4 * vixl32::kMaxInstructionSizeInBytes);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(kBakerCcEntrypointRegister, Operand(0));
+ // Currently the offset is always within range. If that changes,
+ // we shall have to split the load the same way as for fields.
+ DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
+ EmitPlaceholderBne(codegen_, bne_label);
+ __ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
+ } else {
+ // Note that we do not actually check the value of
+ // `GetIsGcMarking()` to decide whether to mark the loaded GC
+ // root or not. Instead, we load into `temp` the read barrier
+ // mark entry point corresponding to register `root`. If `temp`
+ // is null, it means that `GetIsGcMarking()` is false, and vice
+ // versa.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
+ // }
+
+ // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+ Location temp = LocationFrom(lr);
+ SlowPathCodeARMVIXL* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
+ instruction, root, /* entrypoint */ temp);
+ codegen_->AddSlowPath(slow_path);
- // /* GcRoot<mirror::Object> */ root = *(obj + offset)
- GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
- static_assert(
- sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
- "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
- "have different sizes.");
- static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::CompressedReference<mirror::Object> and int32_t "
- "have different sizes.");
-
- // The entrypoint is null when the GC is not marking, this prevents one load compared to
- // checking GetIsGcMarking.
- __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
+
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ }
} else {
// GC root loaded through a slow path for read barriers other
// than Baker's.
@@ -8048,6 +8259,16 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
}
}
+void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields) {
+ if (!Runtime::Current()->UseJitCompilation()) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ }
+ }
+}
+
void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl32::Register obj,
@@ -8057,6 +8278,85 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk checks
+ // the holder and jumps to the entrypoint if needed. If the holder is not
+ // gray, it creates a fake dependency and returns to the LDR instruction.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &gray_return_address;
+ // if (temp != nullptr) {
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // HeapReference<mirror::Object> reference = *(obj+offset);
+ // gray_return_address:
+
+ DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+ bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
+ vixl32::Register base = obj;
+ if (offset >= kReferenceLoadMinFarOffset) {
+ base = RegisterFrom(temp);
+ DCHECK(!base.Is(kBakerCcEntrypointRegister));
+ static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
+ __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
+ offset &= (kReferenceLoadMinFarOffset - 1u);
+ // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
+ // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
+ // increase the overall code size when taking the generated thunks into account.
+ DCHECK(!narrow);
+ }
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+ uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ base.GetCode(), obj.GetCode(), narrow);
+ vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip.GetCode(), 12u);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+ __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
+
+ vixl::EmissionCheckScope guard(
+ GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(kBakerCcEntrypointRegister, Operand(0));
+ EmitPlaceholderBne(this, bne_label);
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // Note: We need a specific width for the unpoisoning NEG.
+ if (kPoisonHeapReferences) {
+ if (narrow) {
+ // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
+ __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
+ } else {
+ __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ }
+ }
+ __ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
+ return;
+ }
+
// /* HeapReference<Object> */ ref = *(obj + offset)
Location no_index = Location::NoLocation();
ScaleFactor no_scale_factor = TIMES_1;
@@ -8077,9 +8377,73 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i
static_assert(
sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ ScaleFactor scale_factor = TIMES_4;
+
+ if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk checks
+ // the holder and jumps to the entrypoint if needed. If the holder is not
+ // gray, it creates a fake dependency and returns to the LDR instruction.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &gray_return_address;
+ // if (temp != nullptr) {
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // HeapReference<mirror::Object> reference = data[index];
+ // gray_return_address:
+
+ DCHECK(index.IsValid());
+ vixl32::Register index_reg = RegisterFrom(index, Primitive::kPrimInt);
+ vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+ vixl32::Register data_reg = RegisterFrom(temp, Primitive::kPrimInt); // Raw pointer.
+ DCHECK(!data_reg.Is(kBakerCcEntrypointRegister));
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+ uint32_t custom_data =
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode());
+ vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip.GetCode(), 12u);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+ __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
+ __ Add(data_reg, obj, Operand(data_offset));
+
+ vixl::EmissionCheckScope guard(
+ GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(kBakerCcEntrypointRegister, Operand(0));
+ EmitPlaceholderBne(this, bne_label);
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
+ DCHECK(!needs_null_check); // The thunk cannot handle the null check.
+ // Note: We need a Wide NEG for the unpoisoning.
+ if (kPoisonHeapReferences) {
+ __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ }
+ __ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+ return;
+ }
+
// /* HeapReference<Object> */ ref =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- ScaleFactor scale_factor = TIMES_4;
GenerateReferenceLoadWithBakerReadBarrier(
instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check);
}
@@ -8091,9 +8455,7 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio
Location index,
ScaleFactor scale_factor,
Location temp,
- bool needs_null_check,
- bool always_update_field,
- vixl32::Register* temp2) {
+ bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -8104,6 +8466,73 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio
// not.
//
// Note that we do not actually check the value of `GetIsGcMarking()`;
+ // instead, we load into `temp2` the read barrier mark entry point
+ // corresponding to register `ref`. If `temp2` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // }
+ // } else {
+ // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // }
+
+ vixl32::Register temp_reg = RegisterFrom(temp);
+
+ // Slow path marking the object `ref` when the GC is marking. The
+ // entrypoint will already be loaded in `temp2`.
+ Location temp2 = LocationFrom(lr);
+ SlowPathCodeARMVIXL* slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(
+ instruction,
+ ref,
+ obj,
+ offset,
+ index,
+ scale_factor,
+ needs_null_check,
+ temp_reg,
+ /* entrypoint */ temp2);
+ AddSlowPath(slow_path);
+
+ // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp2), tr, entry_point_offset);
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ CompareAndBranchIfNonZero(RegisterFrom(temp2), slow_path->GetEntryLabel());
+ // Fast path: the GC is not marking: just load the reference.
+ GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ vixl32::Register obj,
+ Location field_offset,
+ Location temp,
+ bool needs_null_check,
+ vixl32::Register temp2) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // Query `art::Thread::Current()->GetIsGcMarking()` to decide
+ // whether we need to enter the slow path to update the reference
+ // field within `obj`. Then, in the slow path, check the gray bit
+ // in the lock word of the reference's holder (`obj`) to decide
+ // whether to mark `ref` and update the field or not.
+ //
+ // Note that we do not actually check the value of `GetIsGcMarking()`;
// instead, we load into `temp3` the read barrier mark entry point
// corresponding to register `ref`. If `temp3` is null, it means
// that `GetIsGcMarking()` is false, and vice versa.
@@ -8113,55 +8542,32 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio
// // Slow path.
// uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
// lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
+ // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load.
// bool is_gray = (rb_state == ReadBarrier::GrayState());
// if (is_gray) {
+ // old_ref = ref;
// ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
+ // compareAndSwapObject(obj, field_offset, old_ref, ref);
// }
- // } else {
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
// }
vixl32::Register temp_reg = RegisterFrom(temp);
- // Slow path marking the object `ref` when the GC is marking. The
- // entrypoint will already be loaded in `temp3`.
+ // Slow path updating the object reference at address `obj + field_offset`
+ // when the GC is marking. The entrypoint will already be loaded in `temp3`.
Location temp3 = LocationFrom(lr);
- SlowPathCodeARMVIXL* slow_path;
- if (always_update_field) {
- DCHECK(temp2 != nullptr);
- // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
- // only supports address of the form `obj + field_offset`, where
- // `obj` is a register and `field_offset` is a register pair (of
- // which only the lower half is used). Thus `offset` and
- // `scale_factor` above are expected to be null in this code path.
- DCHECK_EQ(offset, 0u);
- DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
- Location field_offset = index;
- slow_path =
- new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
- instruction,
- ref,
- obj,
- offset,
- /* index */ field_offset,
- scale_factor,
- needs_null_check,
- temp_reg,
- *temp2,
- /* entrypoint */ temp3);
- } else {
- slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(
- instruction,
- ref,
- obj,
- offset,
- index,
- scale_factor,
- needs_null_check,
- temp_reg,
- /* entrypoint */ temp3);
- }
+ SlowPathCodeARMVIXL* slow_path =
+ new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
+ instruction,
+ ref,
+ obj,
+ /* offset */ 0u,
+ /* index */ field_offset,
+ /* scale_factor */ ScaleFactor::TIMES_1,
+ needs_null_check,
+ temp_reg,
+ temp2,
+ /* entrypoint */ temp3);
AddSlowPath(slow_path);
// temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
@@ -8173,8 +8579,8 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio
// The entrypoint is null when the GC is not marking, this prevents one load compared to
// checking GetIsGcMarking.
__ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel());
- // Fast path: just load the reference.
- GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
+ // Fast path: the GC is not marking: nothing to do (the field is
+ // up-to-date, and we don't need to load the reference).
__ Bind(slow_path->GetExitLabel());
}
@@ -8488,6 +8894,11 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePa
return &patches->back();
}
+vixl::aarch32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) {
+ baker_read_barrier_patches_.emplace_back(custom_data);
+ return &baker_read_barrier_patches_.back().label;
+}
+
VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageStringLiteral(
const DexFile& dex_file,
dex::StringIndex string_index) {
@@ -8512,10 +8923,6 @@ VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint
return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
}
-VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateDexCacheAddressLiteral(uint32_t address) {
- return DeduplicateUint32Literal(address, &uint32_literals_);
-}
-
VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral(
const DexFile& dex_file,
dex::StringIndex string_index,
@@ -8569,7 +8976,8 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa
/* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
boot_image_type_patches_.size() +
/* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
- /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size();
+ /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
+ baker_read_barrier_patches_.size();
linker_patches->reserve(size);
EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
linker_patches);
@@ -8603,6 +9011,10 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa
target_type.dex_file,
target_type.type_index.index_));
}
+ for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+ linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
+ info.custom_data));
+ }
DCHECK_EQ(size, linker_patches->size());
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 1e9669dc38..657d3c134f 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -572,12 +572,16 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
uint32_t element_offset);
+
+ // Add a new baker read barrier patch and return the label to be bound
+ // before the BNE instruction.
+ vixl::aarch32::Label* NewBakerReadBarrierPatch(uint32_t custom_data);
+
VIXLUInt32Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
dex::StringIndex string_index);
VIXLUInt32Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
dex::TypeIndex type_index);
VIXLUInt32Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
- VIXLUInt32Literal* DeduplicateDexCacheAddressLiteral(uint32_t address);
VIXLUInt32Literal* DeduplicateJitStringLiteral(const DexFile& dex_file,
dex::StringIndex string_index,
Handle<mirror::String> handle);
@@ -589,6 +593,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+ // Maybe add the reserved entrypoint register as a temporary for field load. This temp
+ // is added only for AOT compilation if link-time generated thunks for fields are enabled.
+ void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations);
+
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -612,11 +620,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
// Load the object reference located at the address
// `obj + offset + (index << scale_factor)`, held by object `obj`, into
// `ref`, and mark it if needed.
- //
- // If `always_update_field` is true, the value of the reference is
- // atomically updated in the holder (`obj`). This operation
- // requires an extra temporary register, which must be provided as a
- // non-null pointer (`temp2`).
void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl::aarch32::Register obj,
@@ -624,9 +627,27 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
Location index,
ScaleFactor scale_factor,
Location temp,
- bool needs_null_check,
- bool always_update_field = false,
- vixl::aarch32::Register* temp2 = nullptr);
+ bool needs_null_check);
+
+ // Generate code checking whether the the reference field at the
+ // address `obj + field_offset`, held by object `obj`, needs to be
+ // marked, and if so, marking it and updating the field within `obj`
+ // with the marked value.
+ //
+ // This routine is used for the implementation of the
+ // UnsafeCASObject intrinsic with Baker read barriers.
+ //
+ // This method has a structure similar to
+ // GenerateReferenceLoadWithBakerReadBarrier, but note that argument
+ // `ref` is only as a temporary here, and thus its value should not
+ // be used afterwards.
+ void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ vixl::aarch32::Register obj,
+ Location field_offset,
+ Location temp,
+ bool needs_null_check,
+ vixl::aarch32::Register temp2);
// Generate a heap reference load (with no read barrier).
void GenerateRawReferenceLoad(HInstruction* instruction,
@@ -713,6 +734,13 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
VIXLUInt32Literal*,
TypeReferenceValueComparator>;
+ struct BakerReadBarrierPatchInfo {
+ explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { }
+
+ vixl::aarch32::Label label;
+ uint32_t custom_data;
+ };
+
VIXLUInt32Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
VIXLUInt32Literal* DeduplicateMethodLiteral(MethodReference target_method,
MethodToLiteralMap* map);
@@ -750,6 +778,8 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+ // Baker read barrier patch info.
+ ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
// Patches for string literals in JIT compiled code.
StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index aa030b279c..e9870acff4 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -219,15 +219,33 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
+ Location out = locations->Out();
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
-
+ const bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6();
+ const bool r2_baker_or_no_read_barriers = !isR6 && (!kUseReadBarrier || kUseBakerReadBarrier);
+ InvokeRuntimeCallingConvention calling_convention;
+ DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
+ const bool is_load_class_bss_entry =
+ (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
- InvokeRuntimeCallingConvention calling_convention;
+ // For HLoadClass/kBssEntry/kSaveEverything, make sure we preserve the address of the entry.
+ Register entry_address = kNoRegister;
+ if (is_load_class_bss_entry && r2_baker_or_no_read_barriers) {
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ bool temp_is_a0 = (temp == calling_convention.GetRegisterAt(0));
+ // In the unlucky case that `temp` is A0, we preserve the address in `out` across the
+ // kSaveEverything call.
+ entry_address = temp_is_a0 ? out.AsRegister<Register>() : temp;
+ DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0));
+ if (temp_is_a0) {
+ __ Move(entry_address, temp);
+ }
+ }
+
dex::TypeIndex type_index = cls_->GetTypeIndex();
__ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_);
-
QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
: kQuickInitializeType;
mips_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
@@ -237,25 +255,27 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS {
CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
}
+ // For HLoadClass/kBssEntry, store the resolved class to the BSS entry.
+ if (is_load_class_bss_entry && r2_baker_or_no_read_barriers) {
+ // The class entry address was preserved in `entry_address` thanks to kSaveEverything.
+ __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(0), entry_address, 0);
+ }
+
// Move the class to the desired location.
- Location out = locations->Out();
if (out.IsValid()) {
DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
Primitive::Type type = instruction_->GetType();
- mips_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
+ mips_codegen->MoveLocation(out,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ type);
}
-
RestoreLiveRegisters(codegen, locations);
- // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
- DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
- if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
- DCHECK(out.IsValid());
- // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to
- // kSaveEverything and use a temporary for the .bss entry address in the fast path,
- // so that we can avoid another calculation here.
- bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6();
+
+ // For HLoadClass/kBssEntry, store the resolved class to the BSS entry.
+ if (is_load_class_bss_entry && !r2_baker_or_no_read_barriers) {
+ // For non-Baker read barriers (or on R6), we need to re-calculate the address of
+ // the class entry.
Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
- DCHECK_NE(out.AsRegister<Register>(), AT);
CodeGeneratorMIPS::PcRelativePatchInfo* info =
mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
bool reordering = __ SetReorder(false);
@@ -286,40 +306,62 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS {
explicit LoadStringSlowPathMIPS(HLoadString* instruction) : SlowPathCodeMIPS(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ DCHECK(instruction_->IsLoadString());
+ DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
LocationSummary* locations = instruction_->GetLocations();
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+ HLoadString* load = instruction_->AsLoadString();
+ const dex::StringIndex string_index = load->GetStringIndex();
+ Register out = locations->Out().AsRegister<Register>();
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
-
+ const bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6();
+ const bool r2_baker_or_no_read_barriers = !isR6 && (!kUseReadBarrier || kUseBakerReadBarrier);
+ InvokeRuntimeCallingConvention calling_convention;
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
- InvokeRuntimeCallingConvention calling_convention;
- HLoadString* load = instruction_->AsLoadString();
- const dex::StringIndex string_index = load->GetStringIndex();
+ // For HLoadString/kBssEntry/kSaveEverything, make sure we preserve the address of the entry.
+ Register entry_address = kNoRegister;
+ if (r2_baker_or_no_read_barriers) {
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ bool temp_is_a0 = (temp == calling_convention.GetRegisterAt(0));
+ // In the unlucky case that `temp` is A0, we preserve the address in `out` across the
+ // kSaveEverything call.
+ entry_address = temp_is_a0 ? out : temp;
+ DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0));
+ if (temp_is_a0) {
+ __ Move(entry_address, temp);
+ }
+ }
+
__ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_);
mips_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+
+ // Store the resolved string to the BSS entry.
+ if (r2_baker_or_no_read_barriers) {
+ // The string entry address was preserved in `entry_address` thanks to kSaveEverything.
+ __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(0), entry_address, 0);
+ }
+
Primitive::Type type = instruction_->GetType();
mips_codegen->MoveLocation(locations->Out(),
- calling_convention.GetReturnLocation(type),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
type);
-
RestoreLiveRegisters(codegen, locations);
- // Store the resolved String to the BSS entry.
- // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the
- // .bss entry address in the fast path, so that we can avoid another calculation here.
- bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6();
- Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
- DCHECK_NE(out, AT);
- CodeGeneratorMIPS::PcRelativePatchInfo* info =
- mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
- bool reordering = __ SetReorder(false);
- mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base);
- __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678);
- __ SetReorder(reordering);
-
+ // Store the resolved string to the BSS entry.
+ if (!r2_baker_or_no_read_barriers) {
+ // For non-Baker read barriers (or on R6), we need to re-calculate the address of
+ // the string entry.
+ Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
+ CodeGeneratorMIPS::PcRelativePatchInfo* info =
+ mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
+ bool reordering = __ SetReorder(false);
+ mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base);
+ __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678);
+ __ SetReorder(reordering);
+ }
__ B(GetExitLabel());
}
@@ -451,8 +493,13 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
+ LocationSummary* locations = instruction_->GetLocations();
+ SaveLiveRegisters(codegen, locations);
+ InvokeRuntimeCallingConvention calling_convention;
+ __ LoadConst32(calling_convention.GetRegisterAt(0),
+ static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
mips_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
- CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+ CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; }
@@ -1719,15 +1766,14 @@ void CodeGeneratorMIPS::PatchJitRootUse(uint8_t* code,
DCHECK_EQ(code[literal_offset + 1], 0x12);
DCHECK_EQ((code[literal_offset + 2] & 0xE0), 0x00);
DCHECK_EQ(code[literal_offset + 3], 0x3C);
- // lw reg, reg, addr32_low
+ // instr reg, reg, addr32_low
DCHECK_EQ(code[literal_offset + 4], 0x78);
DCHECK_EQ(code[literal_offset + 5], 0x56);
- DCHECK_EQ((code[literal_offset + 7] & 0xFC), 0x8C);
- addr32 += (addr32 & 0x8000) << 1; // Account for sign extension in "lw reg, reg, addr32_low".
+ addr32 += (addr32 & 0x8000) << 1; // Account for sign extension in "instr reg, reg, addr32_low".
// lui reg, addr32_high
code[literal_offset + 0] = static_cast<uint8_t>(addr32 >> 16);
code[literal_offset + 1] = static_cast<uint8_t>(addr32 >> 24);
- // lw reg, reg, addr32_low
+ // instr reg, reg, addr32_low
code[literal_offset + 4] = static_cast<uint8_t>(addr32 >> 0);
code[literal_offset + 5] = static_cast<uint8_t>(addr32 >> 8);
}
@@ -2436,6 +2482,9 @@ void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) {
object_array_get_with_read_barrier
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall);
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(type)) {
@@ -3438,8 +3487,6 @@ void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) {
Primitive::Type type = instruction->InputAt(0)->GetType();
LocationSummary* locations = instruction->GetLocations();
- Register dst = locations->Out().AsRegister<Register>();
- MipsLabel true_label;
switch (type) {
default:
@@ -3448,27 +3495,14 @@ void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) {
return;
case Primitive::kPrimLong:
- // TODO: don't use branches.
- GenerateLongCompareAndBranch(instruction->GetCondition(), locations, &true_label);
- break;
+ GenerateLongCompare(instruction->GetCondition(), locations);
+ return;
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
GenerateFpCompare(instruction->GetCondition(), instruction->IsGtBias(), type, locations);
return;
}
-
- // Convert the branches into the result.
- MipsLabel done;
-
- // False case: result = 0.
- __ LoadConst32(dst, 0);
- __ B(&done);
-
- // True case: result = 1.
- __ Bind(&true_label);
- __ LoadConst32(dst, 1);
- __ Bind(&done);
}
void InstructionCodeGeneratorMIPS::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
@@ -4238,6 +4272,221 @@ void InstructionCodeGeneratorMIPS::GenerateIntCompareAndBranch(IfCondition cond,
}
}
+void InstructionCodeGeneratorMIPS::GenerateLongCompare(IfCondition cond,
+ LocationSummary* locations) {
+ Register dst = locations->Out().AsRegister<Register>();
+ Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>();
+ Location rhs_location = locations->InAt(1);
+ Register rhs_high = ZERO;
+ Register rhs_low = ZERO;
+ int64_t imm = 0;
+ uint32_t imm_high = 0;
+ uint32_t imm_low = 0;
+ bool use_imm = rhs_location.IsConstant();
+ if (use_imm) {
+ imm = rhs_location.GetConstant()->AsLongConstant()->GetValue();
+ imm_high = High32Bits(imm);
+ imm_low = Low32Bits(imm);
+ } else {
+ rhs_high = rhs_location.AsRegisterPairHigh<Register>();
+ rhs_low = rhs_location.AsRegisterPairLow<Register>();
+ }
+ if (use_imm && imm == 0) {
+ switch (cond) {
+ case kCondEQ:
+ case kCondBE: // <= 0 if zero
+ __ Or(dst, lhs_high, lhs_low);
+ __ Sltiu(dst, dst, 1);
+ break;
+ case kCondNE:
+ case kCondA: // > 0 if non-zero
+ __ Or(dst, lhs_high, lhs_low);
+ __ Sltu(dst, ZERO, dst);
+ break;
+ case kCondLT:
+ __ Slt(dst, lhs_high, ZERO);
+ break;
+ case kCondGE:
+ __ Slt(dst, lhs_high, ZERO);
+ __ Xori(dst, dst, 1);
+ break;
+ case kCondLE:
+ __ Or(TMP, lhs_high, lhs_low);
+ __ Sra(AT, lhs_high, 31);
+ __ Sltu(dst, AT, TMP);
+ __ Xori(dst, dst, 1);
+ break;
+ case kCondGT:
+ __ Or(TMP, lhs_high, lhs_low);
+ __ Sra(AT, lhs_high, 31);
+ __ Sltu(dst, AT, TMP);
+ break;
+ case kCondB: // always false
+ __ Andi(dst, dst, 0);
+ break;
+ case kCondAE: // always true
+ __ Ori(dst, ZERO, 1);
+ break;
+ }
+ } else if (use_imm) {
+ // TODO: more efficient comparison with constants without loading them into TMP/AT.
+ switch (cond) {
+ case kCondEQ:
+ __ LoadConst32(TMP, imm_high);
+ __ Xor(TMP, TMP, lhs_high);
+ __ LoadConst32(AT, imm_low);
+ __ Xor(AT, AT, lhs_low);
+ __ Or(dst, TMP, AT);
+ __ Sltiu(dst, dst, 1);
+ break;
+ case kCondNE:
+ __ LoadConst32(TMP, imm_high);
+ __ Xor(TMP, TMP, lhs_high);
+ __ LoadConst32(AT, imm_low);
+ __ Xor(AT, AT, lhs_low);
+ __ Or(dst, TMP, AT);
+ __ Sltu(dst, ZERO, dst);
+ break;
+ case kCondLT:
+ case kCondGE:
+ if (dst == lhs_low) {
+ __ LoadConst32(TMP, imm_low);
+ __ Sltu(dst, lhs_low, TMP);
+ }
+ __ LoadConst32(TMP, imm_high);
+ __ Slt(AT, lhs_high, TMP);
+ __ Slt(TMP, TMP, lhs_high);
+ if (dst != lhs_low) {
+ __ LoadConst32(dst, imm_low);
+ __ Sltu(dst, lhs_low, dst);
+ }
+ __ Slt(dst, TMP, dst);
+ __ Or(dst, dst, AT);
+ if (cond == kCondGE) {
+ __ Xori(dst, dst, 1);
+ }
+ break;
+ case kCondGT:
+ case kCondLE:
+ if (dst == lhs_low) {
+ __ LoadConst32(TMP, imm_low);
+ __ Sltu(dst, TMP, lhs_low);
+ }
+ __ LoadConst32(TMP, imm_high);
+ __ Slt(AT, TMP, lhs_high);
+ __ Slt(TMP, lhs_high, TMP);
+ if (dst != lhs_low) {
+ __ LoadConst32(dst, imm_low);
+ __ Sltu(dst, dst, lhs_low);
+ }
+ __ Slt(dst, TMP, dst);
+ __ Or(dst, dst, AT);
+ if (cond == kCondLE) {
+ __ Xori(dst, dst, 1);
+ }
+ break;
+ case kCondB:
+ case kCondAE:
+ if (dst == lhs_low) {
+ __ LoadConst32(TMP, imm_low);
+ __ Sltu(dst, lhs_low, TMP);
+ }
+ __ LoadConst32(TMP, imm_high);
+ __ Sltu(AT, lhs_high, TMP);
+ __ Sltu(TMP, TMP, lhs_high);
+ if (dst != lhs_low) {
+ __ LoadConst32(dst, imm_low);
+ __ Sltu(dst, lhs_low, dst);
+ }
+ __ Slt(dst, TMP, dst);
+ __ Or(dst, dst, AT);
+ if (cond == kCondAE) {
+ __ Xori(dst, dst, 1);
+ }
+ break;
+ case kCondA:
+ case kCondBE:
+ if (dst == lhs_low) {
+ __ LoadConst32(TMP, imm_low);
+ __ Sltu(dst, TMP, lhs_low);
+ }
+ __ LoadConst32(TMP, imm_high);
+ __ Sltu(AT, TMP, lhs_high);
+ __ Sltu(TMP, lhs_high, TMP);
+ if (dst != lhs_low) {
+ __ LoadConst32(dst, imm_low);
+ __ Sltu(dst, dst, lhs_low);
+ }
+ __ Slt(dst, TMP, dst);
+ __ Or(dst, dst, AT);
+ if (cond == kCondBE) {
+ __ Xori(dst, dst, 1);
+ }
+ break;
+ }
+ } else {
+ switch (cond) {
+ case kCondEQ:
+ __ Xor(TMP, lhs_high, rhs_high);
+ __ Xor(AT, lhs_low, rhs_low);
+ __ Or(dst, TMP, AT);
+ __ Sltiu(dst, dst, 1);
+ break;
+ case kCondNE:
+ __ Xor(TMP, lhs_high, rhs_high);
+ __ Xor(AT, lhs_low, rhs_low);
+ __ Or(dst, TMP, AT);
+ __ Sltu(dst, ZERO, dst);
+ break;
+ case kCondLT:
+ case kCondGE:
+ __ Slt(TMP, rhs_high, lhs_high);
+ __ Sltu(AT, lhs_low, rhs_low);
+ __ Slt(TMP, TMP, AT);
+ __ Slt(AT, lhs_high, rhs_high);
+ __ Or(dst, AT, TMP);
+ if (cond == kCondGE) {
+ __ Xori(dst, dst, 1);
+ }
+ break;
+ case kCondGT:
+ case kCondLE:
+ __ Slt(TMP, lhs_high, rhs_high);
+ __ Sltu(AT, rhs_low, lhs_low);
+ __ Slt(TMP, TMP, AT);
+ __ Slt(AT, rhs_high, lhs_high);
+ __ Or(dst, AT, TMP);
+ if (cond == kCondLE) {
+ __ Xori(dst, dst, 1);
+ }
+ break;
+ case kCondB:
+ case kCondAE:
+ __ Sltu(TMP, rhs_high, lhs_high);
+ __ Sltu(AT, lhs_low, rhs_low);
+ __ Slt(TMP, TMP, AT);
+ __ Sltu(AT, lhs_high, rhs_high);
+ __ Or(dst, AT, TMP);
+ if (cond == kCondAE) {
+ __ Xori(dst, dst, 1);
+ }
+ break;
+ case kCondA:
+ case kCondBE:
+ __ Sltu(TMP, lhs_high, rhs_high);
+ __ Sltu(AT, rhs_low, lhs_low);
+ __ Slt(TMP, TMP, AT);
+ __ Sltu(AT, rhs_high, lhs_high);
+ __ Or(dst, AT, TMP);
+ if (cond == kCondBE) {
+ __ Xori(dst, dst, 1);
+ }
+ break;
+ }
+ }
+}
+
void InstructionCodeGeneratorMIPS::GenerateLongCompareAndBranch(IfCondition cond,
LocationSummary* locations,
MipsLabel* label) {
@@ -5155,7 +5404,10 @@ void InstructionCodeGeneratorMIPS::VisitIf(HIf* if_instr) {
void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -5767,6 +6019,9 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall));
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
if (generate_volatile) {
InvokeRuntimeCallingConvention calling_convention;
@@ -6445,6 +6700,7 @@ void CodeGeneratorMIPS::GenerateReadBarrierForRootSlow(HInstruction* instruction
void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
@@ -6452,6 +6708,7 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kArrayObjectCheck:
call_kind =
kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ baker_read_barrier_slow_path = kUseBakerReadBarrier;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -6461,6 +6718,9 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
// The output does overlap inputs.
@@ -7048,26 +7308,27 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
InvokeRuntimeCallingConvention calling_convention;
- CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
- cls,
- Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
- calling_convention.GetReturnLocation(Primitive::kPrimNot));
+ Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc);
return;
}
DCHECK(!cls->NeedsAccessCheck());
-
+ const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+ if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
switch (load_kind) {
// We need an extra register for PC-relative literals on R2.
case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
case HLoadClass::LoadKind::kBootImageAddress:
case HLoadClass::LoadKind::kBssEntry:
- if (codegen_->GetInstructionSetFeatures().IsR6()) {
+ if (isR6) {
break;
}
FALLTHROUGH_INTENDED;
@@ -7078,6 +7339,22 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
break;
}
locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadClass::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the type resolution or initialization and marking to save everything we need.
+ // Request a temp to hold the BSS entry location for the slow path on R2
+ // (no benefit for R6).
+ if (!isR6) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barriers we have a temp-clobbering call.
+ }
+ }
}
// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -7160,10 +7437,22 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
case HLoadClass::LoadKind::kBssEntry: {
CodeGeneratorMIPS::PcRelativePatchInfo* info =
codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
- bool reordering = __ SetReorder(false);
- codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
- GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
- __ SetReorder(reordering);
+ constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier;
+ if (isR6 || non_baker_read_barrier) {
+ bool reordering = __ SetReorder(false);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+ GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
+ __ SetReorder(reordering);
+ } else {
+ // On R2 save the BSS entry address in a temporary register instead of
+ // recalculating it in the slow path.
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ bool reordering = __ SetReorder(false);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info, temp, base_or_current_method_reg);
+ __ Addiu(temp, temp, /* placeholder */ 0x5678);
+ __ SetReorder(reordering);
+ GenerateGcRootFieldLoad(cls, out_loc, temp, /* offset */ 0, read_barrier_option);
+ }
generate_null_check = true;
break;
}
@@ -7227,13 +7516,14 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
HLoadString::LoadKind load_kind = load->GetLoadKind();
+ const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
switch (load_kind) {
// We need an extra register for PC-relative literals on R2.
case HLoadString::LoadKind::kBootImageLinkTimeAddress:
case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
case HLoadString::LoadKind::kBssEntry:
- if (codegen_->GetInstructionSetFeatures().IsR6()) {
+ if (isR6) {
break;
}
FALLTHROUGH_INTENDED;
@@ -7246,9 +7536,25 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
}
if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
InvokeRuntimeCallingConvention calling_convention;
- locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
+ locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
} else {
locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadString::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the pResolveString and marking to save everything we need.
+ // Request a temp to hold the BSS entry location for the slow path on R2
+ // (no benefit for R6).
+ if (!isR6) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barriers we have a temp-clobbering call.
+ }
+ }
}
}
@@ -7305,14 +7611,26 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorMIPS::PcRelativePatchInfo* info =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
- bool reordering = __ SetReorder(false);
- codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
- GenerateGcRootFieldLoad(load,
- out_loc,
- out,
- /* placeholder */ 0x5678,
- kCompilerReadBarrierOption);
- __ SetReorder(reordering);
+ constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier;
+ if (isR6 || non_baker_read_barrier) {
+ bool reordering = __ SetReorder(false);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+ GenerateGcRootFieldLoad(load,
+ out_loc,
+ out,
+ /* placeholder */ 0x5678,
+ kCompilerReadBarrierOption);
+ __ SetReorder(reordering);
+ } else {
+ // On R2 save the BSS entry address in a temporary register instead of
+ // recalculating it in the slow path.
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ bool reordering = __ SetReorder(false);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info, temp, base_or_current_method_reg);
+ __ Addiu(temp, temp, /* placeholder */ 0x5678);
+ __ SetReorder(reordering);
+ GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kCompilerReadBarrierOption);
+ }
SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
codegen_->AddSlowPath(slow_path);
__ Beqz(out, slow_path->GetEntryLabel());
@@ -7342,6 +7660,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
// TODO: Re-add the compiler code to do string dex cache lookup again.
DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
InvokeRuntimeCallingConvention calling_convention;
+ DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
__ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -7766,6 +8085,15 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) {
}
}
+void LocationsBuilderMIPS::VisitConstructorFence(HConstructorFence* constructor_fence) {
+ constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS::VisitConstructorFence(
+ HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
void LocationsBuilderMIPS::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
memory_barrier->SetLocations(nullptr);
}
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 03939e3530..5ad1f12f8a 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -295,6 +295,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
void GenerateIntCompareAndBranch(IfCondition cond,
LocationSummary* locations,
MipsLabel* label);
+ void GenerateLongCompare(IfCondition cond, LocationSummary* locations);
void GenerateLongCompareAndBranch(IfCondition cond,
LocationSummary* locations,
MipsLabel* label);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 19250c64e3..f04e3841f5 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -141,7 +141,8 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
public:
- explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) : SlowPathCodeMIPS64(instruction) {}
+ explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction)
+ : SlowPathCodeMIPS64(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
@@ -192,7 +193,9 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 {
if (out.IsValid()) {
DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
Primitive::Type type = instruction_->GetType();
- mips64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
+ mips64_codegen->MoveLocation(out,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ type);
}
RestoreLiveRegisters(codegen, locations);
@@ -200,10 +203,6 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 {
DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
DCHECK(out.IsValid());
- // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to
- // kSaveEverything and use a temporary for the .bss entry address in the fast path,
- // so that we can avoid another calculation here.
- DCHECK_NE(out.AsRegister<GpuRegister>(), AT);
CodeGeneratorMIPS64::PcRelativePatchInfo* info =
mips64_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info, AT);
@@ -250,16 +249,13 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 {
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
Primitive::Type type = instruction_->GetType();
mips64_codegen->MoveLocation(locations->Out(),
- calling_convention.GetReturnLocation(type),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
type);
RestoreLiveRegisters(codegen, locations);
// Store the resolved String to the BSS entry.
- // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the
- // .bss entry address in the fast path, so that we can avoid another calculation here.
GpuRegister out = locations->Out().AsRegister<GpuRegister>();
- DCHECK_NE(out, AT);
CodeGeneratorMIPS64::PcRelativePatchInfo* info =
mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info, AT);
@@ -397,8 +393,13 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
+ LocationSummary* locations = instruction_->GetLocations();
+ SaveLiveRegisters(codegen, locations);
+ InvokeRuntimeCallingConvention calling_convention;
+ __ LoadConst32(calling_convention.GetRegisterAt(0),
+ static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
mips64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
- CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+ CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; }
@@ -1986,6 +1987,9 @@ void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) {
object_array_get_with_read_barrier
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall);
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(type)) {
@@ -3906,7 +3910,10 @@ void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) {
void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -3982,6 +3989,9 @@ void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction,
object_field_get_with_read_barrier
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall);
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister());
@@ -4544,6 +4554,7 @@ void CodeGeneratorMIPS64::GenerateReadBarrierForRootSlow(HInstruction* instructi
void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool baker_read_barrier_slow_path = false;
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
@@ -4551,6 +4562,7 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kArrayObjectCheck:
call_kind =
kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+ baker_read_barrier_slow_path = kUseBakerReadBarrier;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -4560,6 +4572,9 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
}
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ if (baker_read_barrier_slow_path) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
// The output does overlap inputs.
@@ -5077,10 +5092,8 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) {
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
InvokeRuntimeCallingConvention calling_convention;
- CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
- cls,
- Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
- calling_convention.GetReturnLocation(Primitive::kPrimNot));
+ Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc);
return;
}
DCHECK(!cls->NeedsAccessCheck());
@@ -5090,10 +5103,24 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) {
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+ if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
locations->SetInAt(0, Location::RequiresRegister());
}
locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadClass::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the type resolution or initialization and marking to save everything we need.
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
}
// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -5224,9 +5251,20 @@ void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
InvokeRuntimeCallingConvention calling_convention;
- locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
+ locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
} else {
locations->SetOut(Location::RequiresRegister());
+ if (load_kind == HLoadString::LoadKind::kBssEntry) {
+ if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ // Rely on the pResolveString and marking to save everything we need.
+ RegisterSet caller_saves = RegisterSet::Empty();
+ InvokeRuntimeCallingConvention calling_convention;
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
+ } else {
+ // For non-Baker read barrier we have a temp-clobbering call.
+ }
+ }
}
}
@@ -5294,6 +5332,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
// TODO: Re-add the compiler code to do string dex cache lookup again.
DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
InvokeRuntimeCallingConvention calling_convention;
+ DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
__ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -5653,6 +5692,15 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) {
}
}
+void LocationsBuilderMIPS64::VisitConstructorFence(HConstructorFence* constructor_fence) {
+ constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitConstructorFence(
+ HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
void LocationsBuilderMIPS64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
memory_barrier->SetLocations(nullptr);
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 08a752f1d2..cf2d5cbee3 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -384,8 +384,14 @@ class DeoptimizationSlowPathX86 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
+ LocationSummary* locations = instruction_->GetLocations();
+ SaveLiveRegisters(codegen, locations);
+ InvokeRuntimeCallingConvention calling_convention;
+ x86_codegen->Load32BitValue(
+ calling_convention.GetRegisterAt(0),
+ static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
- CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+ CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
@@ -1688,7 +1694,10 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::Any());
}
@@ -2057,6 +2066,15 @@ void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant
// Will be generated at use site.
}
+void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
+ constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86::VisitConstructorFence(
+ HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
memory_barrier->SetLocations(nullptr);
}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index ff6e099d12..f2ed52b5a5 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -397,8 +397,14 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
+ LocationSummary* locations = instruction_->GetLocations();
+ SaveLiveRegisters(codegen, locations);
+ InvokeRuntimeCallingConvention calling_convention;
+ x86_64_codegen->Load32BitValue(
+ CpuRegister(calling_convention.GetRegisterAt(0)),
+ static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
- CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+ CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
@@ -1710,7 +1716,10 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetCustomSlowPathCallerSaves(caller_saves);
if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::Any());
}
@@ -2165,6 +2174,15 @@ void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
// Will be generated at use site.
}
+void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
+ constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitConstructorFence(
+ HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
memory_barrier->SetLocations(nullptr);
}
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 12340b416d..aea901dec7 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -338,14 +338,21 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
// Ensure the inputs of `instruction` are defined in a block of the graph.
for (HInstruction* input : instruction->GetInputs()) {
- const HInstructionList& list = input->IsPhi()
- ? input->GetBlock()->GetPhis()
- : input->GetBlock()->GetInstructions();
- if (!list.Contains(input)) {
- AddError(StringPrintf("Input %d of instruction %d is not defined "
- "in a basic block of the control-flow graph.",
+ if (input->GetBlock() == nullptr) {
+ AddError(StringPrintf("Input %d of instruction %d is not in any "
+ "basic block of the control-flow graph.",
input->GetId(),
instruction->GetId()));
+ } else {
+ const HInstructionList& list = input->IsPhi()
+ ? input->GetBlock()->GetPhis()
+ : input->GetBlock()->GetInstructions();
+ if (!list.Contains(input)) {
+ AddError(StringPrintf("Input %d of instruction %d is not defined "
+ "in a basic block of the control-flow graph.",
+ input->GetId(),
+ instruction->GetId()));
+ }
}
}
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 92d0f3c032..8674e727bb 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -783,7 +783,7 @@ void HInliner::AddCHAGuard(HInstruction* invoke_instruction,
HInstruction* compare = new (graph_->GetArena()) HNotEqual(
deopt_flag, graph_->GetIntConstant(0, dex_pc));
HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(
- graph_->GetArena(), compare, HDeoptimize::Kind::kInline, dex_pc);
+ graph_->GetArena(), compare, DeoptimizationKind::kCHA, dex_pc);
if (cursor != nullptr) {
bb_cursor->InsertInstructionAfter(deopt_flag, cursor);
@@ -817,7 +817,17 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
}
const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
- bool is_referrer = (klass.Get() == outermost_graph_->GetArtMethod()->GetDeclaringClass());
+ bool is_referrer;
+ ArtMethod* outermost_art_method = outermost_graph_->GetArtMethod();
+ if (outermost_art_method == nullptr) {
+ DCHECK(Runtime::Current()->IsAotCompiler());
+ // We are in AOT mode and we don't have an ART method to determine
+ // if the inlined method belongs to the referrer. Assume it doesn't.
+ is_referrer = false;
+ } else {
+ is_referrer = klass.Get() == outermost_art_method->GetDeclaringClass();
+ }
+
// Note that we will just compare the classes, so we don't need Java semantics access checks.
// Note that the type index and the dex file are relative to the method this type guard is
// inlined into.
@@ -850,7 +860,9 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
graph_->GetArena(),
compare,
receiver,
- HDeoptimize::Kind::kInline,
+ Runtime::Current()->IsAotCompiler()
+ ? DeoptimizationKind::kAotInlineCache
+ : DeoptimizationKind::kJitInlineCache,
invoke_instruction->GetDexPc());
bb_cursor->InsertInstructionAfter(deoptimize, compare);
deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
@@ -1137,7 +1149,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
graph_->GetArena(),
compare,
receiver,
- HDeoptimize::Kind::kInline,
+ DeoptimizationKind::kJitSameTarget,
invoke_instruction->GetDexPc());
bb_cursor->InsertInstructionAfter(deoptimize, compare);
deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
@@ -1470,8 +1482,13 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction,
}
}
if (needs_constructor_barrier) {
- HMemoryBarrier* barrier = new (graph_->GetArena()) HMemoryBarrier(kStoreStore, kNoDexPc);
- invoke_instruction->GetBlock()->InsertInstructionBefore(barrier, invoke_instruction);
+ // See CompilerDriver::RequiresConstructorBarrier for more details.
+ DCHECK(obj != nullptr) << "only non-static methods can have a constructor fence";
+
+ HConstructorFence* constructor_fence =
+ new (graph_->GetArena()) HConstructorFence(obj, kNoDexPc, graph_->GetArena());
+ invoke_instruction->GetBlock()->InsertInstructionBefore(constructor_fence,
+ invoke_instruction);
}
*return_replacement = nullptr;
break;
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 978c6a2d71..8b79da8c73 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -451,10 +451,13 @@ void HInstructionBuilder::InitializeParameters() {
referrer_method_id.class_idx_,
parameter_index++,
Primitive::kPrimNot,
- true);
+ /* is_this */ true);
AppendInstruction(parameter);
UpdateLocal(locals_index++, parameter);
number_of_parameters--;
+ current_this_parameter_ = parameter;
+ } else {
+ DCHECK(current_this_parameter_ == nullptr);
}
const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id);
@@ -465,7 +468,7 @@ void HInstructionBuilder::InitializeParameters() {
arg_types->GetTypeItem(shorty_pos - 1).type_idx_,
parameter_index++,
Primitive::GetType(shorty[shorty_pos]),
- false);
+ /* is_this */ false);
++shorty_pos;
AppendInstruction(parameter);
// Store the parameter value in the local that the dex code will use
@@ -588,6 +591,8 @@ void HInstructionBuilder::Binop_22b(const Instruction& instruction, bool reverse
UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
}
+// Does the method being compiled need any constructor barriers being inserted?
+// (Always 'false' for methods that aren't <init>.)
static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDriver* driver) {
// Can be null in unit tests only.
if (UNLIKELY(cu == nullptr)) {
@@ -596,6 +601,11 @@ static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDri
Thread* self = Thread::Current();
return cu->IsConstructor()
+ && !cu->IsStatic()
+ // RequiresConstructorBarrier must only be queried for <init> methods;
+ // it's effectively "false" for every other method.
+ //
+ // See CompilerDriver::RequiresConstructBarrier for more explanation.
&& driver->RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex());
}
@@ -639,13 +649,24 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction,
Primitive::Type type,
uint32_t dex_pc) {
if (type == Primitive::kPrimVoid) {
+ // Only <init> (which is a return-void) could possibly have a constructor fence.
// This may insert additional redundant constructor fences from the super constructors.
// TODO: remove redundant constructor fences (b/36656456).
if (RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)) {
- AppendInstruction(new (arena_) HMemoryBarrier(kStoreStore, dex_pc));
+ // Compiling instance constructor.
+ if (kIsDebugBuild) {
+ std::string method_name = graph_->GetMethodName();
+ CHECK_EQ(std::string("<init>"), method_name);
+ }
+
+ HInstruction* fence_target = current_this_parameter_;
+ DCHECK(fence_target != nullptr);
+
+ AppendInstruction(new (arena_) HConstructorFence(fence_target, dex_pc, arena_));
}
AppendInstruction(new (arena_) HReturnVoid(dex_pc));
} else {
+ DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_));
HInstruction* value = LoadLocal(instruction.VRegA(), type);
AppendInstruction(new (arena_) HReturn(value, dex_pc));
}
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 7fdc1883ca..2fb5c7b94d 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -62,6 +62,7 @@ class HInstructionBuilder : public ValueObject {
current_block_(nullptr),
current_locals_(nullptr),
latest_result_(nullptr),
+ current_this_parameter_(nullptr),
compiler_driver_(driver),
code_generator_(code_generator),
dex_compilation_unit_(dex_compilation_unit),
@@ -325,6 +326,11 @@ class HInstructionBuilder : public ValueObject {
HBasicBlock* current_block_;
ArenaVector<HInstruction*>* current_locals_;
HInstruction* latest_result_;
+ // Current "this" parameter.
+ // Valid only after InitializeParameters() finishes.
+ // * Null for static methods.
+ // * Non-null for instance methods.
+ HParameterValue* current_this_parameter_;
CompilerDriver* const compiler_driver_;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 2dcc12e294..2cedde900e 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -257,7 +257,8 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
if (shift_amount->IsConstant()) {
int64_t cst = Int64FromConstant(shift_amount->AsConstant());
- if ((cst & implicit_mask) == 0) {
+ int64_t masked_cst = cst & implicit_mask;
+ if (masked_cst == 0) {
// Replace code looking like
// SHL dst, value, 0
// with
@@ -266,6 +267,17 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
instruction->GetBlock()->RemoveInstruction(instruction);
RecordSimplification();
return;
+ } else if (masked_cst != cst) {
+ // Replace code looking like
+ // SHL dst, value, cst
+ // where cst exceeds maximum distance with the equivalent
+ // SHL dst, value, cst & implicit_mask
+ // (as defined by shift semantics). This ensures other
+ // optimizations do not need to special case for such situations.
+ DCHECK_EQ(shift_amount->GetType(), Primitive::kPrimInt);
+ instruction->ReplaceInput(GetGraph()->GetIntConstant(masked_cst), /* index */ 1);
+ RecordSimplification();
+ return;
}
}
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 750f9cc213..69cf9a126f 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1010,17 +1010,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM* code
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// Need to make sure the reference stored in the field is a to-space
// one before attempting the CAS or the CAS could fail incorrectly.
- codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ codegen->UpdateReferenceFieldWithBakerReadBarrier(
invoke,
out_loc, // Unused, used only as a "temporary" within the read barrier.
base,
- /* offset */ 0u,
- /* index */ offset_loc,
- ScaleFactor::TIMES_1,
+ /* field_offset */ offset_loc,
tmp_ptr_loc,
/* needs_null_check */ false,
- /* always_update_field */ true,
- &tmp);
+ tmp);
}
}
@@ -1648,6 +1645,8 @@ void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
// is clobbered by ReadBarrierMarkRegX entry points). Get an extra
// temporary register from the register allocator.
locations->AddTemp(Location::RequiresRegister());
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen_);
+ arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations);
}
}
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 4d360158a2..65a82229e9 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1154,17 +1154,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* co
Register temp = WRegisterFrom(locations->GetTemp(0));
// Need to make sure the reference stored in the field is a to-space
// one before attempting the CAS or the CAS could fail incorrectly.
- codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ codegen->UpdateReferenceFieldWithBakerReadBarrier(
invoke,
out_loc, // Unused, used only as a "temporary" within the read barrier.
base,
- /* offset */ 0u,
- /* index */ offset_loc,
- /* scale_factor */ 0u,
+ /* field_offset */ offset_loc,
temp,
/* needs_null_check */ false,
- /* use_load_acquire */ false,
- /* always_update_field */ true);
+ /* use_load_acquire */ false);
}
}
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index fd8a37ae05..356d5bcb0c 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -1347,17 +1347,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL*
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// Need to make sure the reference stored in the field is a to-space
// one before attempting the CAS or the CAS could fail incorrectly.
- codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ codegen->UpdateReferenceFieldWithBakerReadBarrier(
invoke,
out_loc, // Unused, used only as a "temporary" within the read barrier.
base,
- /* offset */ 0u,
- /* index */ offset_loc,
- ScaleFactor::TIMES_1,
+ /* field_offset */ offset_loc,
tmp_ptr_loc,
/* needs_null_check */ false,
- /* always_update_field */ true,
- &tmp);
+ tmp);
}
}
@@ -2026,6 +2023,8 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
// is clobbered by ReadBarrierMarkRegX entry points). Get an extra
// temporary register from the register allocator.
locations->AddTemp(Location::RequiresRegister());
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_);
+ arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations);
}
}
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index bfe04f5ae0..abf5b122c8 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1525,6 +1525,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall),
kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index c5e116046f..9dce59b2af 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1168,6 +1168,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall),
kIntrinsified);
+ if (can_call && kUseBakerReadBarrier) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 48699b33ae..8d8cc93b9b 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -566,14 +566,22 @@ class LSEVisitor : public HGraphVisitor {
store->GetBlock()->RemoveInstruction(store);
}
- // Eliminate allocations that are not used.
+ // Eliminate singleton-classified instructions:
+ // * - Constructor fences (they never escape this thread).
+ // * - Allocations (if they are unused).
for (HInstruction* new_instance : singleton_new_instances_) {
+ HConstructorFence::RemoveConstructorFences(new_instance);
+
if (!new_instance->HasNonEnvironmentUses()) {
new_instance->RemoveEnvironmentUsers();
new_instance->GetBlock()->RemoveInstruction(new_instance);
}
}
for (HInstruction* new_array : singleton_new_arrays_) {
+ // TODO: Delete constructor fences for new-array
+ // In the future HNewArray instructions will have HConstructorFence's for them.
+ // HConstructorFence::RemoveConstructorFences(new_array);
+
if (!new_array->HasNonEnvironmentUses()) {
new_array->RemoveEnvironmentUsers();
new_array->GetBlock()->RemoveInstruction(new_array);
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index bbc55dd16f..881802d714 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -71,7 +71,7 @@ static bool IsSignExtensionAndGet(HInstruction* instruction,
// extension when represented in the *width* of the given narrower data type
// (the fact that char normally zero extends does not matter here).
int64_t value = 0;
- if (IsInt64AndGet(instruction, &value)) {
+ if (IsInt64AndGet(instruction, /*out*/ &value)) {
switch (type) {
case Primitive::kPrimByte:
if (std::numeric_limits<int8_t>::min() <= value &&
@@ -119,7 +119,7 @@ static bool IsZeroExtensionAndGet(HInstruction* instruction,
// extension when represented in the *width* of the given narrower data type
// (the fact that byte/short normally sign extend does not matter here).
int64_t value = 0;
- if (IsInt64AndGet(instruction, &value)) {
+ if (IsInt64AndGet(instruction, /*out*/ &value)) {
switch (type) {
case Primitive::kPrimByte:
if (std::numeric_limits<uint8_t>::min() <= value &&
@@ -833,19 +833,14 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
// TODO: accept symbolic, albeit loop invariant shift factors.
HInstruction* opa = instruction->InputAt(0);
HInstruction* opb = instruction->InputAt(1);
- int64_t value = 0;
- if (VectorizeUse(node, opa, generate_code, type, restrictions) && IsInt64AndGet(opb, &value)) {
- // Make sure shift distance only looks at lower bits, as defined for sequential shifts.
- int64_t mask = (instruction->GetType() == Primitive::kPrimLong)
- ? kMaxLongShiftDistance
- : kMaxIntShiftDistance;
- int64_t distance = value & mask;
+ int64_t distance = 0;
+ if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
+ IsInt64AndGet(opb, /*out*/ &distance)) {
// Restrict shift distance to packed data type width.
int64_t max_distance = Primitive::ComponentSize(type) * 8;
if (0 <= distance && distance < max_distance) {
if (generate_code) {
- HInstruction* s = graph_->GetIntConstant(distance);
- GenerateVecOp(instruction, vector_map_->Get(opa), s, type);
+ GenerateVecOp(instruction, vector_map_->Get(opa), opb, type);
}
return true;
}
@@ -1177,14 +1172,14 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
int64_t value = 0;
if ((instruction->IsShr() ||
instruction->IsUShr()) &&
- IsInt64AndGet(instruction->InputAt(1), &value) && value == 1) {
+ IsInt64AndGet(instruction->InputAt(1), /*out*/ &value) && value == 1) {
//
// TODO: make following code less sensitive to associativity and commutativity differences.
//
HInstruction* x = instruction->InputAt(0);
// Test for an optional rounding part (x + 1) >> 1.
bool is_rounded = false;
- if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), &value) && value == 1) {
+ if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), /*out*/ &value) && value == 1) {
x = x->InputAt(0);
is_rounded = true;
}
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index ca953a1a7e..a8bfe610de 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -528,6 +528,15 @@ HCurrentMethod* HGraph::GetCurrentMethod() {
return cached_current_method_;
}
+const char* HGraph::GetMethodName() const {
+ const DexFile::MethodId& method_id = dex_file_.GetMethodId(method_idx_);
+ return dex_file_.GetMethodName(method_id);
+}
+
+std::string HGraph::PrettyMethod(bool with_signature) const {
+ return dex_file_.PrettyMethod(method_idx_, with_signature);
+}
+
HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value, uint32_t dex_pc) {
switch (type) {
case Primitive::Type::kPrimBoolean:
@@ -1150,6 +1159,81 @@ void HVariableInputSizeInstruction::RemoveInputAt(size_t index) {
}
}
+void HVariableInputSizeInstruction::RemoveAllInputs() {
+ RemoveAsUserOfAllInputs();
+ DCHECK(!HasNonEnvironmentUses());
+
+ inputs_.clear();
+ DCHECK_EQ(0u, InputCount());
+}
+
+void HConstructorFence::RemoveConstructorFences(HInstruction* instruction) {
+ DCHECK(instruction->GetBlock() != nullptr);
+ // Removing constructor fences only makes sense for instructions with an object return type.
+ DCHECK_EQ(Primitive::kPrimNot, instruction->GetType());
+
+ // Efficient implementation that simultaneously (in one pass):
+ // * Scans the uses list for all constructor fences.
+ // * Deletes that constructor fence from the uses list of `instruction`.
+ // * Deletes `instruction` from the constructor fence's inputs.
+ // * Deletes the constructor fence if it now has 0 inputs.
+
+ const HUseList<HInstruction*>& uses = instruction->GetUses();
+ // Warning: Although this is "const", we might mutate the list when calling RemoveInputAt.
+ for (auto it = uses.begin(), end = uses.end(); it != end; ) {
+ const HUseListNode<HInstruction*>& use_node = *it;
+ HInstruction* const use_instruction = use_node.GetUser();
+
+ // Advance the iterator immediately once we fetch the use_node.
+ // Warning: If the input is removed, the current iterator becomes invalid.
+ ++it;
+
+ if (use_instruction->IsConstructorFence()) {
+ HConstructorFence* ctor_fence = use_instruction->AsConstructorFence();
+ size_t input_index = use_node.GetIndex();
+
+ // Process the candidate instruction for removal
+ // from the graph.
+
+ // Constructor fence instructions are never
+ // used by other instructions.
+ //
+ // If we wanted to make this more generic, it
+ // could be a runtime if statement.
+ DCHECK(!ctor_fence->HasUses());
+
+ // A constructor fence's return type is "kPrimVoid"
+ // and therefore it can't have any environment uses.
+ DCHECK(!ctor_fence->HasEnvironmentUses());
+
+ // Remove the inputs first, otherwise removing the instruction
+ // will try to remove its uses while we are already removing uses
+ // and this operation will fail.
+ DCHECK_EQ(instruction, ctor_fence->InputAt(input_index));
+
+ // Removing the input will also remove the `use_node`.
+ // (Do not look at `use_node` after this, it will be a dangling reference).
+ ctor_fence->RemoveInputAt(input_index);
+
+ // Once all inputs are removed, the fence is considered dead and
+ // is removed.
+ if (ctor_fence->InputCount() == 0u) {
+ ctor_fence->GetBlock()->RemoveInstruction(ctor_fence);
+ }
+ }
+ }
+
+ if (kIsDebugBuild) {
+ // Post-condition checks:
+ // * None of the uses of `instruction` are a constructor fence.
+ // * The `instruction` itself did not get removed from a block.
+ for (const HUseListNode<HInstruction*>& use_node : instruction->GetUses()) {
+ CHECK(!use_node.GetUser()->IsConstructorFence());
+ }
+ CHECK(instruction->GetBlock() != nullptr);
+ }
+}
+
#define DEFINE_ACCEPT(name, super) \
void H##name::Accept(HGraphVisitor* visitor) { \
visitor->Visit##name(this); \
@@ -1338,18 +1422,6 @@ std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs) {
}
}
-std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs) {
- switch (rhs) {
- case HDeoptimize::Kind::kBCE:
- return os << "bce";
- case HDeoptimize::Kind::kInline:
- return os << "inline";
- default:
- LOG(FATAL) << "Unknown Deoptimization kind: " << static_cast<int>(rhs);
- UNREACHABLE();
- }
-}
-
bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const {
return this == instruction->GetPreviousDisregardingMoves();
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 36c7df70ce..b4da20b558 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -30,6 +30,7 @@
#include "base/transform_array_ref.h"
#include "dex_file.h"
#include "dex_file_types.h"
+#include "deoptimization_kind.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "handle.h"
#include "handle_scope.h"
@@ -46,6 +47,7 @@ namespace art {
class GraphChecker;
class HBasicBlock;
+class HConstructorFence;
class HCurrentMethod;
class HDoubleConstant;
class HEnvironment;
@@ -57,6 +59,7 @@ class HIntConstant;
class HInvoke;
class HLongConstant;
class HNullConstant;
+class HParameterValue;
class HPhi;
class HSuspendCheck;
class HTryBoundary;
@@ -537,6 +540,12 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
return method_idx_;
}
+ // Get the method name (without the signature), e.g. "<init>"
+ const char* GetMethodName() const;
+
+ // Get the pretty method name (class + name + optionally signature).
+ std::string PrettyMethod(bool with_signature = true) const;
+
InvokeType GetInvokeType() const {
return invoke_type_;
}
@@ -1297,6 +1306,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(ClearException, Instruction) \
M(ClinitCheck, Instruction) \
M(Compare, BinaryOperation) \
+ M(ConstructorFence, Instruction) \
M(CurrentMethod, Instruction) \
M(ShouldDeoptimizeFlag, Instruction) \
M(Deoptimize, Instruction) \
@@ -1476,8 +1486,11 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION)
template <typename T>
class HUseListNode : public ArenaObject<kArenaAllocUseListNode> {
public:
+ // Get the instruction which has this use as one of the inputs.
T GetUser() const { return user_; }
+ // Get the position of the input record that this use corresponds to.
size_t GetIndex() const { return index_; }
+ // Set the position of the input record that this use corresponds to.
void SetIndex(size_t index) { index_ = index; }
// Hook for the IntrusiveForwardList<>.
@@ -2037,7 +2050,8 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
!IsNativeDebugInfo() &&
!IsParameterValue() &&
// If we added an explicit barrier then we should keep it.
- !IsMemoryBarrier();
+ !IsMemoryBarrier() &&
+ !IsConstructorFence();
}
bool IsDeadAndRemovable() const {
@@ -2431,6 +2445,11 @@ class HVariableInputSizeInstruction : public HInstruction {
void InsertInputAt(size_t index, HInstruction* input);
void RemoveInputAt(size_t index);
+ // Removes all the inputs.
+ // Also removes this instructions from each input's use list
+ // (for non-environment uses only).
+ void RemoveAllInputs();
+
protected:
HVariableInputSizeInstruction(SideEffects side_effects,
uint32_t dex_pc,
@@ -2973,15 +2992,9 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> {
// Deoptimize to interpreter, upon checking a condition.
class HDeoptimize FINAL : public HVariableInputSizeInstruction {
public:
- enum class Kind {
- kBCE,
- kInline,
- kLast = kInline
- };
-
// Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move
// across.
- HDeoptimize(ArenaAllocator* arena, HInstruction* cond, Kind kind, uint32_t dex_pc)
+ HDeoptimize(ArenaAllocator* arena, HInstruction* cond, DeoptimizationKind kind, uint32_t dex_pc)
: HVariableInputSizeInstruction(
SideEffects::All(),
dex_pc,
@@ -3001,7 +3014,7 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction {
HDeoptimize(ArenaAllocator* arena,
HInstruction* cond,
HInstruction* guard,
- Kind kind,
+ DeoptimizationKind kind,
uint32_t dex_pc)
: HVariableInputSizeInstruction(
SideEffects::CanTriggerGC(),
@@ -3025,7 +3038,7 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction {
bool CanThrow() const OVERRIDE { return true; }
- Kind GetKind() const { return GetPackedField<DeoptimizeKindField>(); }
+ DeoptimizationKind GetDeoptimizationKind() const { return GetPackedField<DeoptimizeKindField>(); }
Primitive::Type GetType() const OVERRIDE {
return GuardsAnInput() ? GuardedInput()->GetType() : Primitive::kPrimVoid;
@@ -3050,18 +3063,17 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction {
static constexpr size_t kFieldCanBeMoved = kNumberOfGenericPackedBits;
static constexpr size_t kFieldDeoptimizeKind = kNumberOfGenericPackedBits + 1;
static constexpr size_t kFieldDeoptimizeKindSize =
- MinimumBitsToStore(static_cast<size_t>(Kind::kLast));
+ MinimumBitsToStore(static_cast<size_t>(DeoptimizationKind::kLast));
static constexpr size_t kNumberOfDeoptimizePackedBits =
kFieldDeoptimizeKind + kFieldDeoptimizeKindSize;
static_assert(kNumberOfDeoptimizePackedBits <= kMaxNumberOfPackedBits,
"Too many packed fields.");
- using DeoptimizeKindField = BitField<Kind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>;
+ using DeoptimizeKindField =
+ BitField<DeoptimizationKind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>;
DISALLOW_COPY_AND_ASSIGN(HDeoptimize);
};
-std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs);
-
// Represents a should_deoptimize flag. Currently used for CHA-based devirtualization.
// The compiled code checks this flag value in a guard before devirtualized call and
// if it's true, starts to do deoptimization.
@@ -5069,7 +5081,7 @@ class HParameterValue FINAL : public HExpression<0> {
const DexFile& GetDexFile() const { return dex_file_; }
dex::TypeIndex GetTypeIndex() const { return type_index_; }
uint8_t GetIndex() const { return index_; }
- bool IsThis() const ATTRIBUTE_UNUSED { return GetPackedFlag<kFlagIsThis>(); }
+ bool IsThis() const { return GetPackedFlag<kFlagIsThis>(); }
bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); }
void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); }
@@ -6507,6 +6519,137 @@ class HMemoryBarrier FINAL : public HTemplateInstruction<0> {
DISALLOW_COPY_AND_ASSIGN(HMemoryBarrier);
};
+// A constructor fence orders all prior stores to fields that could be accessed via a final field of
+// the specified object(s), with respect to any subsequent store that might "publish"
+// (i.e. make visible) the specified object to another thread.
+//
+// JLS 17.5.1 "Semantics of final fields" states that a freeze action happens
+// for all final fields (that were set) at the end of the invoked constructor.
+//
+// The constructor fence models the freeze actions for the final fields of an object
+// being constructed (semantically at the end of the constructor). Constructor fences
+// have a per-object affinity; two separate objects being constructed get two separate
+// constructor fences.
+//
+// (Note: that if calling a super-constructor or forwarding to another constructor,
+// the freezes would happen at the end of *that* constructor being invoked).
+//
+// The memory model guarantees that when the object being constructed is "published" after
+// constructor completion (i.e. escapes the current thread via a store), then any final field
+// writes must be observable on other threads (once they observe that publication).
+//
+// Further, anything written before the freeze, and read by dereferencing through the final field,
+// must also be visible (so final object field could itself have an object with non-final fields;
+// yet the freeze must also extend to them).
+//
+// Constructor example:
+//
+// class HasFinal {
+// final int field; Optimizing IR for <init>()V:
+// HasFinal() {
+// field = 123; HInstanceFieldSet(this, HasFinal.field, 123)
+// // freeze(this.field); HConstructorFence(this)
+// } HReturn
+// }
+//
+// HConstructorFence can serve double duty as a fence for new-instance/new-array allocations of
+// already-initialized classes; in that case the allocation must act as a "default-initializer"
+// of the object which effectively writes the class pointer "final field".
+//
+// For example, we can model default-initialiation as roughly the equivalent of the following:
+//
+// class Object {
+// private final Class header;
+// }
+//
+// Java code: Optimizing IR:
+//
+// T new_instance<T>() {
+// Object obj = allocate_memory(T.class.size); obj = HInvoke(art_quick_alloc_object, T)
+// obj.header = T.class; // header write is done by above call.
+// // freeze(obj.header) HConstructorFence(obj)
+// return (T)obj;
+// }
+//
+// See also:
+// * CompilerDriver::RequiresConstructorBarrier
+// * QuasiAtomic::ThreadFenceForConstructor
+//
+class HConstructorFence FINAL : public HVariableInputSizeInstruction {
+ // A fence has variable inputs because the inputs can be removed
+ // after prepare_for_register_allocation phase.
+ // (TODO: In the future a fence could freeze multiple objects
+ // after merging two fences together.)
+ public:
+ // `fence_object` is the reference that needs to be protected for correct publication.
+ //
+ // It makes sense in the following situations:
+ // * <init> constructors, it's the "this" parameter (i.e. HParameterValue, s.t. IsThis() == true).
+ // * new-instance-like instructions, it's the return value (i.e. HNewInstance).
+ //
+ // After construction the `fence_object` becomes the 0th input.
+ // This is not an input in a real sense, but just a convenient place to stash the information
+ // about the associated object.
+ HConstructorFence(HInstruction* fence_object,
+ uint32_t dex_pc,
+ ArenaAllocator* arena)
+ // We strongly suspect there is not a more accurate way to describe the fine-grained reordering
+ // constraints described in the class header. We claim that these SideEffects constraints
+ // enforce a superset of the real constraints.
+ //
+ // The ordering described above is conservatively modeled with SideEffects as follows:
+ //
+ // * To prevent reordering of the publication stores:
+ // ----> "Reads of objects" is the initial SideEffect.
+ // * For every primitive final field store in the constructor:
+ // ----> Union that field's type as a read (e.g. "Read of T") into the SideEffect.
+ // * If there are any stores to reference final fields in the constructor:
+ // ----> Use a more conservative "AllReads" SideEffect because any stores to any references
+ // that are reachable from `fence_object` also need to be prevented for reordering
+ // (and we do not want to do alias analysis to figure out what those stores are).
+ //
+ // In the implementation, this initially starts out as an "all reads" side effect; this is an
+ // even more conservative approach than the one described above, and prevents all of the
+ // above reordering without analyzing any of the instructions in the constructor.
+ //
+ // If in a later phase we discover that there are no writes to reference final fields,
+ // we can refine the side effect to a smaller set of type reads (see above constraints).
+ : HVariableInputSizeInstruction(SideEffects::AllReads(),
+ dex_pc,
+ arena,
+ /* number_of_inputs */ 1,
+ kArenaAllocConstructorFenceInputs) {
+ DCHECK(fence_object != nullptr);
+ SetRawInputAt(0, fence_object);
+ }
+
+ // The object associated with this constructor fence.
+ //
+ // (Note: This will be null after the prepare_for_register_allocation phase,
+ // as all constructor fence inputs are removed there).
+ HInstruction* GetFenceObject() const {
+ return InputAt(0);
+ }
+
+ // Find all the HConstructorFence uses (`fence_use`) for `this` and:
+ // - Delete `fence_use` from `this`'s use list.
+ // - Delete `this` from `fence_use`'s inputs list.
+ // - If the `fence_use` is dead, remove it from the graph.
+ //
+ // A fence is considered dead once it no longer has any uses
+ // and all of the inputs are dead.
+ //
+ // This must *not* be called during/after prepare_for_register_allocation,
+ // because that removes all the inputs to the fences but the fence is actually
+ // still considered live.
+ static void RemoveConstructorFences(HInstruction* instruction);
+
+ DECLARE_INSTRUCTION(ConstructorFence);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HConstructorFence);
+};
+
class HMonitorOperation FINAL : public HTemplateInstruction<1> {
public:
enum class OperationKind {
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 66bfea9860..c3c141bff7 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -167,6 +167,13 @@ void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) {
}
}
+void PrepareForRegisterAllocation::VisitConstructorFence(HConstructorFence* constructor_fence) {
+ // Delete all the inputs to the constructor fence;
+ // they aren't used by the InstructionCodeGenerator and this lets us avoid creating a
+ // LocationSummary in the LocationsBuilder.
+ constructor_fence->RemoveAllInputs();
+}
+
void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
if (invoke->IsStaticWithExplicitClinitCheck()) {
HLoadClass* last_input = invoke->GetInputs().back()->AsLoadClass();
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 7ffbe44ef6..395d4ba2ee 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -43,6 +43,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
void VisitArraySet(HArraySet* instruction) OVERRIDE;
void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
void VisitCondition(HCondition* condition) OVERRIDE;
+ void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE;
void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc
index a1016d1d47..029eb4ba61 100644
--- a/compiler/optimizing/ssa_liveness_analysis_test.cc
+++ b/compiler/optimizing/ssa_liveness_analysis_test.cc
@@ -190,7 +190,7 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) {
HInstruction* ae = new (&allocator_) HAboveOrEqual(index, length);
block->AddInstruction(ae);
HInstruction* deoptimize =
- new(&allocator_) HDeoptimize(&allocator_, ae, HDeoptimize::Kind::kBCE, /* dex_pc */ 0u);
+ new(&allocator_) HDeoptimize(&allocator_, ae, DeoptimizationKind::kBlockBCE, /* dex_pc */ 0u);
block->AddInstruction(deoptimize);
HEnvironment* deoptimize_env = new (&allocator_) HEnvironment(&allocator_,
/* number_of_vregs */ 5,