summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc11
-rw-r--r--compiler/optimizing/code_generator.cc6
-rw-r--r--compiler/optimizing/code_generator_arm.cc247
-rw-r--r--compiler/optimizing/code_generator_arm.h20
-rw-r--r--compiler/optimizing/code_generator_arm64.cc257
-rw-r--r--compiler/optimizing/code_generator_arm64.h16
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc5
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h2
-rw-r--r--compiler/optimizing/code_generator_x86.cc236
-rw-r--r--compiler/optimizing/code_generator_x86.h23
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc253
-rw-r--r--compiler/optimizing/code_generator_x86_64.h24
-rw-r--r--compiler/optimizing/dead_code_elimination.cc38
-rw-r--r--compiler/optimizing/gvn.cc4
-rw-r--r--compiler/optimizing/induction_var_analysis.cc3
-rw-r--r--compiler/optimizing/inliner.cc7
-rw-r--r--compiler/optimizing/instruction_builder.cc7
-rw-r--r--compiler/optimizing/instruction_simplifier.cc16
-rw-r--r--compiler/optimizing/intrinsics.cc3
-rw-r--r--compiler/optimizing/intrinsics_arm.cc130
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc97
-rw-r--r--compiler/optimizing/intrinsics_x86.cc93
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc91
-rw-r--r--compiler/optimizing/licm.cc4
-rw-r--r--compiler/optimizing/linear_order.cc3
-rw-r--r--compiler/optimizing/linear_order.h6
-rw-r--r--compiler/optimizing/liveness_test.cc3
-rw-r--r--compiler/optimizing/load_store_elimination.cc8
-rw-r--r--compiler/optimizing/nodes.cc36
-rw-r--r--compiler/optimizing/nodes.h66
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc3
-rw-r--r--compiler/optimizing/reference_type_propagation.cc7
-rw-r--r--compiler/optimizing/register_allocator_graph_color.cc2
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.cc2
-rw-r--r--compiler/optimizing/select_generator.cc3
-rw-r--r--compiler/optimizing/side_effects_analysis.cc7
-rw-r--r--compiler/optimizing/ssa_builder.cc14
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc10
-rw-r--r--compiler/optimizing/ssa_phi_elimination.cc9
39 files changed, 1290 insertions, 482 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 529fc9e261..d2357a5d05 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1845,8 +1845,8 @@ void BoundsCheckElimination::Run() {
// that value dominated by that instruction fits in that range. Range of that
// value can be narrowed further down in the dominator tree.
BCEVisitor visitor(graph_, side_effects_, induction_analysis_);
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* current = it.Current();
+ for (size_t i = 0, size = graph_->GetReversePostOrder().size(); i != size; ++i) {
+ HBasicBlock* current = graph_->GetReversePostOrder()[i];
if (visitor.IsAddedBlock(current)) {
// Skip added blocks. Their effects are already taken care of.
continue;
@@ -1855,8 +1855,11 @@ void BoundsCheckElimination::Run() {
// Skip forward to the current block in case new basic blocks were inserted
// (which always appear earlier in reverse post order) to avoid visiting the
// same basic block twice.
- for ( ; !it.Done() && it.Current() != current; it.Advance()) {
- }
+ size_t new_size = graph_->GetReversePostOrder().size();
+ DCHECK_GE(new_size, size);
+ i += new_size - size;
+ DCHECK_EQ(current, graph_->GetReversePostOrder()[i]);
+ size = new_size;
}
// Perform cleanup.
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 0f8cdbb19b..8b450e11dc 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -671,9 +671,9 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph,
return;
}
ArenaVector<HSuspendCheck*> loop_headers(graph.GetArena()->Adapter(kArenaAllocMisc));
- for (HReversePostOrderIterator it(graph); !it.Done(); it.Advance()) {
- if (it.Current()->IsLoopHeader()) {
- HSuspendCheck* suspend_check = it.Current()->GetLoopInformation()->GetSuspendCheck();
+ for (HBasicBlock* block : graph.GetReversePostOrder()) {
+ if (block->IsLoopHeader()) {
+ HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck();
if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) {
loop_headers.push_back(suspend_check);
}
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 9f92b20929..be65f89ef1 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -601,11 +601,21 @@ class ArraySetSlowPathARM : public SlowPathCodeARM {
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
};
-// Slow path marking an object during a read barrier.
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
public:
- ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location obj)
- : SlowPathCodeARM(instruction), obj_(obj) {
+ ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location ref)
+ : SlowPathCodeARM(instruction), ref_(ref) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -613,9 +623,9 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
- Register reg = obj_.AsRegister<Register>();
+ Register ref_reg = ref_.AsRegister<Register>();
DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
@@ -634,40 +644,213 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
- DCHECK_NE(reg, SP);
- DCHECK_NE(reg, LR);
- DCHECK_NE(reg, PC);
+ DCHECK_NE(ref_reg, SP);
+ DCHECK_NE(ref_reg, LR);
+ DCHECK_NE(ref_reg, PC);
// IP is used internally by the ReadBarrierMarkRegX entry point
// as a temporary, it cannot be the entry point's input/output.
- DCHECK_NE(reg, IP);
- DCHECK(0 <= reg && reg < kNumberOfCoreRegisters) << reg;
+ DCHECK_NE(ref_reg, IP);
+ DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg;
// "Compact" slow path, saving two moves.
//
// Instead of using the standard runtime calling convention (input
// and output in R0):
//
- // R0 <- obj
+ // R0 <- ref
// R0 <- ReadBarrierMark(R0)
- // obj <- R0
+ // ref <- R0
//
- // we just use rX (the register holding `obj`) as input and output
+ // we just use rX (the register containing `ref`) as input and output
// of a dedicated entrypoint:
//
// rX <- ReadBarrierMarkRegX(rX)
//
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(reg);
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
// This runtime call does not require a stack map.
arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ b(GetExitLabel());
}
private:
- const Location obj_;
+ // The location (register) of the marked object reference.
+ const Location ref_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
};
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathARM above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM {
+ public:
+ ReadBarrierMarkAndUpdateFieldSlowPathARM(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ Location field_offset,
+ Register temp1,
+ Register temp2)
+ : SlowPathCodeARM(instruction),
+ ref_(ref),
+ obj_(obj),
+ field_offset_(field_offset),
+ temp1_(temp1),
+ temp2_(temp2) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathARM"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register ref_reg = ref_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking and field updating slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+ DCHECK(field_offset_.IsRegisterPair()) << field_offset_;
+
+ __ Bind(GetEntryLabel());
+
+ // Save the old reference.
+ // Note that we cannot use IP to save the old reference, as IP is
+ // used internally by the ReadBarrierMarkRegX entry point, and we
+ // need the old reference after the call to that entry point.
+ DCHECK_NE(temp1_, IP);
+ __ Mov(temp1_, ref_reg);
+
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ DCHECK_NE(ref_reg, SP);
+ DCHECK_NE(ref_reg, LR);
+ DCHECK_NE(ref_reg, PC);
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(ref_reg, IP);
+ DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in R0):
+ //
+ // R0 <- ref
+ // R0 <- ReadBarrierMark(R0)
+ // ref <- R0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+
+ // If the new reference is different from the old reference,
+ // update the field in the holder (`*(obj_ + field_offset_)`).
+ //
+ // Note that this field could also hold a different object, if
+ // another thread had concurrently changed it. In that case, the
+ // LDREX/SUBS/ITNE sequence of instructions in the compare-and-set
+ // (CAS) operation below would abort the CAS, leaving the field
+ // as-is.
+ Label done;
+ __ cmp(temp1_, ShifterOperand(ref_reg));
+ __ b(&done, EQ);
+
+ // Update the the holder's field atomically. This may fail if
+ // mutator updates before us, but it's OK. This is achieved
+ // using a strong compare-and-set (CAS) operation with relaxed
+ // memory synchronization ordering, where the expected value is
+ // the old reference and the desired value is the new reference.
+
+ // Convenience aliases.
+ Register base = obj_;
+ // The UnsafeCASObject intrinsic uses a register pair as field
+ // offset ("long offset"), of which only the low part contains
+ // data.
+ Register offset = field_offset_.AsRegisterPairLow<Register>();
+ Register expected = temp1_;
+ Register value = ref_reg;
+ Register tmp_ptr = IP; // Pointer to actual memory.
+ Register tmp = temp2_; // Value in memory.
+
+ __ add(tmp_ptr, base, ShifterOperand(offset));
+
+ if (kPoisonHeapReferences) {
+ __ PoisonHeapReference(expected);
+ if (value == expected) {
+ // Do not poison `value`, as it is the same register as
+ // `expected`, which has just been poisoned.
+ } else {
+ __ PoisonHeapReference(value);
+ }
+ }
+
+ // do {
+ // tmp = [r_ptr] - expected;
+ // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+
+ Label loop_head, exit_loop;
+ __ Bind(&loop_head);
+
+ __ ldrex(tmp, tmp_ptr);
+
+ __ subs(tmp, tmp, ShifterOperand(expected));
+
+ __ it(NE);
+ __ clrex(NE);
+
+ __ b(&exit_loop, NE);
+
+ __ strex(tmp, value, tmp_ptr);
+ __ cmp(tmp, ShifterOperand(1));
+ __ b(&loop_head, EQ);
+
+ __ Bind(&exit_loop);
+
+ if (kPoisonHeapReferences) {
+ __ UnpoisonHeapReference(expected);
+ if (value == expected) {
+ // Do not unpoison `value`, as it is the same register as
+ // `expected`, which has just been unpoisoned.
+ } else {
+ __ UnpoisonHeapReference(value);
+ }
+ }
+
+ __ Bind(&done);
+ __ b(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+ // The register containing the object holding the marked object reference field.
+ const Register obj_;
+ // The location of the offset of the marked reference field within `obj_`.
+ Location field_offset_;
+
+ const Register temp1_;
+ const Register temp2_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM);
+};
+
// Slow path generating a read barrier for a heap reference.
class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCodeARM {
public:
@@ -5773,7 +5956,7 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) {
__ movt(temp, /* placeholder */ 0u);
__ BindTrackedLabel(&labels->add_pc_label);
__ add(temp, temp, ShifterOperand(PC));
- GenerateGcRootFieldLoad(load, out_loc, temp, 0);
+ GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kEmitCompilerReadBarrier);
SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
codegen_->AddSlowPath(slow_path);
__ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
@@ -6644,7 +6827,9 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
Location index,
ScaleFactor scale_factor,
Location temp,
- bool needs_null_check) {
+ bool needs_null_check,
+ bool always_update_field,
+ Register* temp2) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -6689,8 +6874,9 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// The actual reference load.
if (index.IsValid()) {
- // Load types involving an "index": ArrayGet and
- // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+ // Load types involving an "index": ArrayGet,
+ // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+ // intrinsics.
// /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
if (index.IsConstant()) {
size_t computed_offset =
@@ -6698,9 +6884,9 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
__ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
} else {
// Handle the special case of the
- // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics, which use
- // a register pair as index ("long offset"), of which only the low
- // part contains data.
+ // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+ // intrinsics, which use a register pair as index ("long
+ // offset"), of which only the low part contains data.
Register index_reg = index.IsRegisterPair()
? index.AsRegisterPairLow<Register>()
: index.AsRegister<Register>();
@@ -6716,8 +6902,21 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
__ MaybeUnpoisonHeapReference(ref_reg);
// Slow path marking the object `ref` when it is gray.
- SlowPathCodeARM* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref);
+ SlowPathCodeARM* slow_path;
+ if (always_update_field) {
+ DCHECK(temp2 != nullptr);
+ // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address
+ // of the form `obj + field_offset`, where `obj` is a register and
+ // `field_offset` is a register pair (of which only the lower half
+ // is used). Thus `offset` and `scale_factor` above are expected
+ // to be null in this code path.
+ DCHECK_EQ(offset, 0u);
+ DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM(
+ instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref);
+ }
AddSlowPath(slow_path);
// if (rb_state == ReadBarrier::gray_ptr_)
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 4d59b47861..3d46aab31f 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -283,12 +283,12 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
//
// root <- *(obj + offset)
//
- // while honoring read barriers if requires_read_barrier is true.
+ // while honoring read barriers if `requires_read_barrier` is true.
void GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
Register obj,
uint32_t offset,
- bool requires_read_barrier = kEmitCompilerReadBarrier);
+ bool requires_read_barrier);
void GenerateTestAndBranch(HInstruction* instruction,
size_t condition_input_index,
Label* true_target,
@@ -508,6 +508,18 @@ class CodeGeneratorARM : public CodeGenerator {
bool needs_null_check);
// Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
// and GenerateArrayLoadWithBakerReadBarrier.
+
+ // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+ // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+ //
+ // Load the object reference located at the address
+ // `obj + offset + (index << scale_factor)`, held by object `obj`, into
+ // `ref`, and mark it if needed.
+ //
+ // If `always_update_field` is true, the value of the reference is
+ // atomically updated in the holder (`obj`). This operation
+ // requires an extra temporary register, which must be provided as a
+ // non-null pointer (`temp2`).
void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
Register obj,
@@ -515,7 +527,9 @@ class CodeGeneratorARM : public CodeGenerator {
Location index,
ScaleFactor scale_factor,
Location temp,
- bool needs_null_check);
+ bool needs_null_check,
+ bool always_update_field = false,
+ Register* temp2 = nullptr);
// Generate a read barrier for a heap reference within `instruction`
// using a slow path.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 9e59d8cc38..b53750966d 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -589,11 +589,21 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
}
}
-// Slow path marking an object during a read barrier.
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathARM64 below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
public:
- ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location obj)
- : SlowPathCodeARM64(instruction), obj_(obj) {
+ ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location ref)
+ : SlowPathCodeARM64(instruction), ref_(ref) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -602,7 +612,8 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(obj_.reg()));
+ DCHECK(ref_.IsRegister()) << ref_;
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
@@ -621,40 +632,207 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- DCHECK_NE(obj_.reg(), LR);
- DCHECK_NE(obj_.reg(), WSP);
- DCHECK_NE(obj_.reg(), WZR);
+ DCHECK_NE(ref_.reg(), LR);
+ DCHECK_NE(ref_.reg(), WSP);
+ DCHECK_NE(ref_.reg(), WZR);
// IP0 is used internally by the ReadBarrierMarkRegX entry point
// as a temporary, it cannot be the entry point's input/output.
- DCHECK_NE(obj_.reg(), IP0);
- DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg();
+ DCHECK_NE(ref_.reg(), IP0);
+ DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
// "Compact" slow path, saving two moves.
//
// Instead of using the standard runtime calling convention (input
// and output in W0):
//
- // W0 <- obj
+ // W0 <- ref
// W0 <- ReadBarrierMark(W0)
- // obj <- W0
+ // ref <- W0
//
- // we just use rX (the register holding `obj`) as input and output
+ // we just use rX (the register containing `ref`) as input and output
// of a dedicated entrypoint:
//
// rX <- ReadBarrierMarkRegX(rX)
//
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(obj_.reg());
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
// This runtime call does not require a stack map.
arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ B(GetExitLabel());
}
private:
- const Location obj_;
+ // The location (register) of the marked object reference.
+ const Location ref_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
};
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathARM64 above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+ ReadBarrierMarkAndUpdateFieldSlowPathARM64(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ Location field_offset,
+ Register temp)
+ : SlowPathCodeARM64(instruction),
+ ref_(ref),
+ obj_(obj),
+ field_offset_(field_offset),
+ temp_(temp) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "ReadBarrierMarkAndUpdateFieldSlowPathARM64";
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register ref_reg = WRegisterFrom(ref_);
+ DCHECK(locations->CanCall());
+ DCHECK(ref_.IsRegister()) << ref_;
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
+ // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking and field updating slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+ DCHECK(field_offset_.IsRegister()) << field_offset_;
+
+ __ Bind(GetEntryLabel());
+
+ // Save the old reference.
+ // Note that we cannot use IP to save the old reference, as IP is
+ // used internally by the ReadBarrierMarkRegX entry point, and we
+ // need the old reference after the call to that entry point.
+ DCHECK_NE(LocationFrom(temp_).reg(), IP0);
+ __ Mov(temp_.W(), ref_reg);
+
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ DCHECK_NE(ref_.reg(), LR);
+ DCHECK_NE(ref_.reg(), WSP);
+ DCHECK_NE(ref_.reg(), WZR);
+ // IP0 is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(ref_.reg(), IP0);
+ DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in W0):
+ //
+ // W0 <- ref
+ // W0 <- ReadBarrierMark(W0)
+ // ref <- W0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
+ // This runtime call does not require a stack map.
+ arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+
+ // If the new reference is different from the old reference,
+ // update the field in the holder (`*(obj_ + field_offset_)`).
+ //
+ // Note that this field could also hold a different object, if
+ // another thread had concurrently changed it. In that case, the
+ // LDXR/CMP/BNE sequence of instructions in the compare-and-set
+ // (CAS) operation below would abort the CAS, leaving the field
+ // as-is.
+ vixl::aarch64::Label done;
+ __ Cmp(temp_.W(), ref_reg);
+ __ B(eq, &done);
+
+ // Update the the holder's field atomically. This may fail if
+ // mutator updates before us, but it's OK. This is achieved
+ // using a strong compare-and-set (CAS) operation with relaxed
+ // memory synchronization ordering, where the expected value is
+ // the old reference and the desired value is the new reference.
+
+ MacroAssembler* masm = arm64_codegen->GetVIXLAssembler();
+ UseScratchRegisterScope temps(masm);
+
+ // Convenience aliases.
+ Register base = obj_.W();
+ Register offset = XRegisterFrom(field_offset_);
+ Register expected = temp_.W();
+ Register value = ref_reg;
+ Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory.
+ Register tmp_value = temps.AcquireW(); // Value in memory.
+
+ __ Add(tmp_ptr, base.X(), Operand(offset));
+
+ if (kPoisonHeapReferences) {
+ arm64_codegen->GetAssembler()->PoisonHeapReference(expected);
+ if (value.Is(expected)) {
+ // Do not poison `value`, as it is the same register as
+ // `expected`, which has just been poisoned.
+ } else {
+ arm64_codegen->GetAssembler()->PoisonHeapReference(value);
+ }
+ }
+
+ // do {
+ // tmp_value = [tmp_ptr] - expected;
+ // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
+
+ vixl::aarch64::Label loop_head, comparison_failed, exit_loop;
+ __ Bind(&loop_head);
+ __ Ldxr(tmp_value, MemOperand(tmp_ptr));
+ __ Cmp(tmp_value, expected);
+ __ B(&comparison_failed, ne);
+ __ Stxr(tmp_value, value, MemOperand(tmp_ptr));
+ __ Cbnz(tmp_value, &loop_head);
+ __ B(&exit_loop);
+ __ Bind(&comparison_failed);
+ __ Clrex();
+ __ Bind(&exit_loop);
+
+ if (kPoisonHeapReferences) {
+ arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected);
+ if (value.Is(expected)) {
+ // Do not unpoison `value`, as it is the same register as
+ // `expected`, which has just been unpoisoned.
+ } else {
+ arm64_codegen->GetAssembler()->UnpoisonHeapReference(value);
+ }
+ }
+
+ __ Bind(&done);
+ __ B(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+ // The register containing the object holding the marked object reference field.
+ const Register obj_;
+ // The location of the offset of the marked reference field within `obj_`.
+ Location field_offset_;
+
+ const Register temp_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM64);
+};
+
// Slow path generating a read barrier for a heap reference.
class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
public:
@@ -768,7 +946,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
(instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
<< instruction_->AsInvoke()->GetIntrinsic();
- DCHECK_EQ(offset_, 0U);
+ DCHECK_EQ(offset_, 0u);
DCHECK(index_.IsRegister());
}
}
@@ -4098,7 +4276,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
out_loc,
current_method,
ArtMethod::DeclaringClassOffset().Int32Value(),
- /*fixup_label*/ nullptr,
+ /* fixup_label */ nullptr,
requires_read_barrier);
break;
}
@@ -4143,7 +4321,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
out_loc,
out.X(),
offset,
- /*fixup_label*/ nullptr,
+ /* fixup_label */ nullptr,
requires_read_barrier);
generate_null_check = !cls->IsInDexCache();
break;
@@ -4180,7 +4358,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
out_loc,
out.X(),
CodeGenerator::GetCacheOffset(cls->GetTypeIndex()),
- /*fixup_label*/ nullptr,
+ /* fixup_label */ nullptr,
requires_read_barrier);
generate_null_check = !cls->IsInDexCache();
break;
@@ -4319,8 +4497,9 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
GenerateGcRootFieldLoad(load,
load->GetLocations()->Out(),
temp,
- /* placeholder */ 0u,
- ldr_label);
+ /* offset placeholder */ 0u,
+ ldr_label,
+ kEmitCompilerReadBarrier);
SlowPathCodeARM64* slow_path =
new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label);
codegen_->AddSlowPath(slow_path);
@@ -5174,7 +5353,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins
// /* HeapReference<Object> */ ref = *(obj + offset)
Location no_index = Location::NoLocation();
- size_t no_scale_factor = 0U;
+ size_t no_scale_factor = 0u;
GenerateReferenceLoadWithBakerReadBarrier(instruction,
ref,
obj,
@@ -5225,7 +5404,8 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
size_t scale_factor,
Register temp,
bool needs_null_check,
- bool use_load_acquire) {
+ bool use_load_acquire,
+ bool always_update_field) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
// If we are emitting an array load, we should not be using a
@@ -5278,7 +5458,9 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
// The actual reference load.
if (index.IsValid()) {
- // Load types involving an "index".
+ // Load types involving an "index": ArrayGet,
+ // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+ // intrinsics.
if (use_load_acquire) {
// UnsafeGetObjectVolatile intrinsic case.
// Register `index` is not an index in an object array, but an
@@ -5287,9 +5469,9 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
DCHECK(instruction->GetLocations()->Intrinsified());
DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)
<< instruction->AsInvoke()->GetIntrinsic();
- DCHECK_EQ(offset, 0U);
- DCHECK_EQ(scale_factor, 0U);
- DCHECK_EQ(needs_null_check, 0U);
+ DCHECK_EQ(offset, 0u);
+ DCHECK_EQ(scale_factor, 0u);
+ DCHECK_EQ(needs_null_check, 0u);
// /* HeapReference<Object> */ ref = *(obj + index)
MemOperand field = HeapOperand(obj, XRegisterFrom(index));
LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
@@ -5300,10 +5482,10 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
Load(type, ref_reg, HeapOperand(obj, computed_offset));
} else {
- Register temp2 = temps.AcquireW();
- __ Add(temp2, obj, offset);
- Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, scale_factor));
- temps.Release(temp2);
+ Register temp3 = temps.AcquireW();
+ __ Add(temp3, obj, offset);
+ Load(type, ref_reg, HeapOperand(temp3, XRegisterFrom(index), LSL, scale_factor));
+ temps.Release(temp3);
}
}
} else {
@@ -5320,8 +5502,19 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
// Slow path marking the object `ref` when it is gray.
- SlowPathCodeARM64* slow_path =
- new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref);
+ SlowPathCodeARM64* slow_path;
+ if (always_update_field) {
+ // ReadBarrierMarkAndUpdateFieldSlowPathARM64 only supports
+ // address of the form `obj + field_offset`, where `obj` is a
+ // register and `field_offset` is a register. Thus `offset` and
+ // `scale_factor` above are expected to be null in this code path.
+ DCHECK_EQ(offset, 0u);
+ DCHECK_EQ(scale_factor, 0u); /* "times 1" */
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM64(
+ instruction, ref, obj, /* field_offset */ index, temp);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref);
+ }
AddSlowPath(slow_path);
// if (rb_state == ReadBarrier::gray_ptr_)
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index eb28ecb427..7f54b4b6b2 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -289,13 +289,13 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
//
// root <- *(obj + offset)
//
- // while honoring read barriers (if any).
+ // while honoring read barriers if `requires_read_barrier` is true.
void GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
vixl::aarch64::Register obj,
uint32_t offset,
- vixl::aarch64::Label* fixup_label = nullptr,
- bool requires_read_barrier = kEmitCompilerReadBarrier);
+ vixl::aarch64::Label* fixup_label,
+ bool requires_read_barrier);
// Generate a floating-point comparison.
void GenerateFcmp(HInstruction* instruction);
@@ -594,6 +594,13 @@ class CodeGeneratorARM64 : public CodeGenerator {
bool needs_null_check);
// Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
// and GenerateArrayLoadWithBakerReadBarrier.
+ //
+ // Load the object reference located at the address
+ // `obj + offset + (index << scale_factor)`, held by object `obj`, into
+ // `ref`, and mark it if needed.
+ //
+ // If `always_update_field` is true, the value of the reference is
+ // atomically updated in the holder (`obj`).
void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl::aarch64::Register obj,
@@ -602,7 +609,8 @@ class CodeGeneratorARM64 : public CodeGenerator {
size_t scale_factor,
vixl::aarch64::Register temp,
bool needs_null_check,
- bool use_load_acquire);
+ bool use_load_acquire,
+ bool always_update_field = false);
// Generate a read barrier for a heap reference within `instruction`
// using a slow path.
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index cac0543da3..b9e049ae48 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -3357,7 +3357,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) {
GenerateGcRootFieldLoad(cls,
out_loc,
current_method,
- ArtMethod::DeclaringClassOffset().Int32Value());
+ ArtMethod::DeclaringClassOffset().Int32Value(),
+ kEmitCompilerReadBarrier);
break;
}
case HLoadClass::LoadKind::kDexCacheViaMethod: {
@@ -3369,7 +3370,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) {
GetAssembler()->LoadFromOffset(kLoadWord, out, current_method, resolved_types_offset);
// /* GcRoot<mirror::Class> */ out = out[type_index]
size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
- GenerateGcRootFieldLoad(cls, out_loc, out, offset);
+ GenerateGcRootFieldLoad(cls, out_loc, out, offset, kEmitCompilerReadBarrier);
generate_null_check = !cls->IsInDexCache();
break;
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 1cd6184dd4..b0fa03899b 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -339,7 +339,7 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
Location root,
vixl::aarch32::Register obj,
uint32_t offset,
- bool requires_read_barrier = kEmitCompilerReadBarrier);
+ bool requires_read_barrier);
void GenerateTestAndBranch(HInstruction* instruction,
size_t condition_input_index,
vixl::aarch32::Label* true_target,
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 02c1c3b69f..efd33c7025 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -426,11 +426,25 @@ class ArraySetSlowPathX86 : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
};
-// Slow path marking an object during a read barrier.
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
public:
- ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj, bool unpoison)
- : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) {
+ ReadBarrierMarkSlowPathX86(HInstruction* instruction,
+ Location ref,
+ bool unpoison_ref_before_marking)
+ : SlowPathCode(instruction),
+ ref_(ref),
+ unpoison_ref_before_marking_(unpoison_ref_before_marking) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -438,9 +452,9 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
- Register reg = obj_.AsRegister<Register>();
+ Register ref_reg = ref_.AsRegister<Register>();
DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
@@ -455,44 +469,211 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- if (unpoison_) {
+ if (unpoison_ref_before_marking_) {
// Object* ref = ref_addr->AsMirrorPtr()
- __ MaybeUnpoisonHeapReference(reg);
+ __ MaybeUnpoisonHeapReference(ref_reg);
}
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
- DCHECK_NE(reg, ESP);
- DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
+ DCHECK_NE(ref_reg, ESP);
+ DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
// "Compact" slow path, saving two moves.
//
// Instead of using the standard runtime calling convention (input
// and output in EAX):
//
- // EAX <- obj
+ // EAX <- ref
// EAX <- ReadBarrierMark(EAX)
- // obj <- EAX
+ // ref <- EAX
//
- // we just use rX (the register holding `obj`) as input and output
+ // we just use rX (the register containing `ref`) as input and output
// of a dedicated entrypoint:
//
// rX <- ReadBarrierMarkRegX(rX)
//
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(reg);
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
// This runtime call does not require a stack map.
x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ jmp(GetExitLabel());
}
private:
- const Location obj_;
- const bool unpoison_;
+ // The location (register) of the marked object reference.
+ const Location ref_;
+ // Should the reference in `ref_` be unpoisoned prior to marking it?
+ const bool unpoison_ref_before_marking_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
};
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathX86 above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
+ public:
+ ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ const Address& field_addr,
+ bool unpoison_ref_before_marking,
+ Register temp)
+ : SlowPathCode(instruction),
+ ref_(ref),
+ obj_(obj),
+ field_addr_(field_addr),
+ unpoison_ref_before_marking_(unpoison_ref_before_marking),
+ temp_(temp) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register ref_reg = ref_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking and field updating slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+
+ __ Bind(GetEntryLabel());
+ if (unpoison_ref_before_marking_) {
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(ref_reg);
+ }
+
+ // Save the old (unpoisoned) reference.
+ __ movl(temp_, ref_reg);
+
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+ DCHECK_NE(ref_reg, ESP);
+ DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in EAX):
+ //
+ // EAX <- ref
+ // EAX <- ReadBarrierMark(EAX)
+ // ref <- EAX
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
+ // This runtime call does not require a stack map.
+ x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+
+ // If the new reference is different from the old reference,
+ // update the field in the holder (`*field_addr`).
+ //
+ // Note that this field could also hold a different object, if
+ // another thread had concurrently changed it. In that case, the
+ // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
+ // operation below would abort the CAS, leaving the field as-is.
+ NearLabel done;
+ __ cmpl(temp_, ref_reg);
+ __ j(kEqual, &done);
+
+ // Update the the holder's field atomically. This may fail if
+ // mutator updates before us, but it's OK. This is achieved
+ // using a strong compare-and-set (CAS) operation with relaxed
+ // memory synchronization ordering, where the expected value is
+ // the old reference and the desired value is the new reference.
+ // This operation is implemented with a 32-bit LOCK CMPXLCHG
+ // instruction, which requires the expected value (the old
+ // reference) to be in EAX. Save EAX beforehand, and move the
+ // expected value (stored in `temp_`) into EAX.
+ __ pushl(EAX);
+ __ movl(EAX, temp_);
+
+ // Convenience aliases.
+ Register base = obj_;
+ Register expected = EAX;
+ Register value = ref_reg;
+
+ bool base_equals_value = (base == value);
+ if (kPoisonHeapReferences) {
+ if (base_equals_value) {
+ // If `base` and `value` are the same register location, move
+ // `value` to a temporary register. This way, poisoning
+ // `value` won't invalidate `base`.
+ value = temp_;
+ __ movl(value, base);
+ }
+
+ // Check that the register allocator did not assign the location
+ // of `expected` (EAX) to `value` nor to `base`, so that heap
+ // poisoning (when enabled) works as intended below.
+ // - If `value` were equal to `expected`, both references would
+ // be poisoned twice, meaning they would not be poisoned at
+ // all, as heap poisoning uses address negation.
+ // - If `base` were equal to `expected`, poisoning `expected`
+ // would invalidate `base`.
+ DCHECK_NE(value, expected);
+ DCHECK_NE(base, expected);
+
+ __ PoisonHeapReference(expected);
+ __ PoisonHeapReference(value);
+ }
+
+ __ LockCmpxchgl(field_addr_, value);
+
+ // If heap poisoning is enabled, we need to unpoison the values
+ // that were poisoned earlier.
+ if (kPoisonHeapReferences) {
+ if (base_equals_value) {
+ // `value` has been moved to a temporary register, no need
+ // to unpoison it.
+ } else {
+ __ UnpoisonHeapReference(value);
+ }
+ // No need to unpoison `expected` (EAX), as it is be overwritten below.
+ }
+
+ // Restore EAX.
+ __ popl(EAX);
+
+ __ Bind(&done);
+ __ jmp(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+ // The register containing the object holding the marked object reference field.
+ const Register obj_;
+ // The address of the marked reference field. The base of this address must be `obj_`.
+ const Address field_addr_;
+
+ // Should the reference in `ref_` be unpoisoned prior to marking it?
+ const bool unpoison_ref_before_marking_;
+
+ const Register temp_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
+};
+
// Slow path generating a read barrier for a heap reference.
class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
public:
@@ -5897,7 +6078,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
cls,
out_loc,
Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
- /*fixup_label*/ nullptr,
+ /* fixup_label */ nullptr,
requires_read_barrier);
break;
}
@@ -5929,7 +6110,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
GenerateGcRootFieldLoad(cls,
out_loc,
Address::Absolute(address),
- /*fixup_label*/ nullptr,
+ /* fixup_label */ nullptr,
requires_read_barrier);
generate_null_check = !cls->IsInDexCache();
break;
@@ -5957,7 +6138,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
GenerateGcRootFieldLoad(cls,
out_loc,
Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())),
- /*fixup_label*/ nullptr,
+ /* fixup_label */ nullptr,
requires_read_barrier);
generate_null_check = !cls->IsInDexCache();
break;
@@ -6099,7 +6280,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) {
Address address = Address(method_address, CodeGeneratorX86::kDummy32BitOffset);
Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
// /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label);
+ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kEmitCompilerReadBarrier);
SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
codegen_->AddSlowPath(slow_path);
__ testl(out, out);
@@ -6831,7 +7012,7 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct
// Slow path marking the GC root `root`.
SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
- instruction, root, /* unpoison */ false);
+ instruction, root, /* unpoison_ref_before_marking */ false);
codegen_->AddSlowPath(slow_path);
__ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>().Int32Value()),
@@ -6896,7 +7077,9 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
Location ref,
Register obj,
const Address& src,
- bool needs_null_check) {
+ bool needs_null_check,
+ bool always_update_field,
+ Register* temp) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -6953,8 +7136,15 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
// Slow path marking the object `ref` when it is gray.
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
- instruction, ref, /* unpoison */ true);
+ SlowPathCode* slow_path;
+ if (always_update_field) {
+ DCHECK(temp != nullptr);
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
+ instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
+ instruction, ref, /* unpoison_ref_before_marking */ true);
+ }
AddSlowPath(slow_path);
// We have done the "if" of the gray bit check above, now branch based on the flags.
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index e7d9a43f58..1b51999546 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -259,12 +259,12 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
//
// root <- *address
//
- // while honoring read barriers (if any).
+ // while honoring read barriers if `requires_read_barrier` is true.
void GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
const Address& address,
- Label* fixup_label = nullptr,
- bool requires_read_barrier = kEmitCompilerReadBarrier);
+ Label* fixup_label,
+ bool requires_read_barrier);
// Push value to FPU stack. `is_fp` specifies whether the value is floating point or not.
// `is_wide` specifies whether it is long/double or not.
@@ -499,13 +499,24 @@ class CodeGeneratorX86 : public CodeGenerator {
uint32_t data_offset,
Location index,
bool needs_null_check);
- // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
- // and GenerateArrayLoadWithBakerReadBarrier.
+ // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+ // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+ //
+ // Load the object reference located at address `src`, held by
+ // object `obj`, into `ref`, and mark it if needed. The base of
+ // address `src` must be `obj`.
+ //
+ // If `always_update_field` is true, the value of the reference is
+ // atomically updated in the holder (`obj`). This operation
+ // requires a temporary register, which must be provided as a
+ // non-null pointer (`temp`).
void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
Register obj,
const Address& src,
- bool needs_null_check);
+ bool needs_null_check,
+ bool always_update_field = false,
+ Register* temp = nullptr);
// Generate a read barrier for a heap reference within `instruction`
// using a slow path.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 4b64c1b6ff..fcabeeae5d 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -445,11 +445,25 @@ class ArraySetSlowPathX86_64 : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
};
-// Slow path marking an object during a read barrier.
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
public:
- ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj, bool unpoison)
- : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) {
+ ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
+ Location ref,
+ bool unpoison_ref_before_marking)
+ : SlowPathCode(instruction),
+ ref_(ref),
+ unpoison_ref_before_marking_(unpoison_ref_before_marking) {
DCHECK(kEmitCompilerReadBarrier);
}
@@ -457,10 +471,10 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
- CpuRegister cpu_reg = obj_.AsRegister<CpuRegister>();
- Register reg = cpu_reg.AsRegister();
+ CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
+ Register ref_reg = ref_cpu_reg.AsRegister();
DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
@@ -475,44 +489,218 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
<< instruction_->DebugName();
__ Bind(GetEntryLabel());
- if (unpoison_) {
+ if (unpoison_ref_before_marking_) {
// Object* ref = ref_addr->AsMirrorPtr()
- __ MaybeUnpoisonHeapReference(cpu_reg);
+ __ MaybeUnpoisonHeapReference(ref_cpu_reg);
}
// No need to save live registers; it's taken care of by the
// entrypoint. Also, there is no need to update the stack mask,
// as this runtime call will not trigger a garbage collection.
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
- DCHECK_NE(reg, RSP);
- DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
+ DCHECK_NE(ref_reg, RSP);
+ DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
// "Compact" slow path, saving two moves.
//
// Instead of using the standard runtime calling convention (input
// and output in R0):
//
- // RDI <- obj
+ // RDI <- ref
// RAX <- ReadBarrierMark(RDI)
- // obj <- RAX
+ // ref <- RAX
//
- // we just use rX (the register holding `obj`) as input and output
+ // we just use rX (the register containing `ref`) as input and output
// of a dedicated entrypoint:
//
// rX <- ReadBarrierMarkRegX(rX)
//
int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(reg);
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
// This runtime call does not require a stack map.
x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
__ jmp(GetExitLabel());
}
private:
- const Location obj_;
- const bool unpoison_;
+ // The location (register) of the marked object reference.
+ const Location ref_;
+ // Should the reference in `ref_` be unpoisoned prior to marking it?
+ const bool unpoison_ref_before_marking_;
DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
};
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathX86_64 above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
+ public:
+ ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
+ Location ref,
+ CpuRegister obj,
+ const Address& field_addr,
+ bool unpoison_ref_before_marking,
+ CpuRegister temp1,
+ CpuRegister temp2)
+ : SlowPathCode(instruction),
+ ref_(ref),
+ obj_(obj),
+ field_addr_(field_addr),
+ unpoison_ref_before_marking_(unpoison_ref_before_marking),
+ temp1_(temp1),
+ temp2_(temp2) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
+ Register ref_reg = ref_cpu_reg.AsRegister();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking and field updating slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+
+ __ Bind(GetEntryLabel());
+ if (unpoison_ref_before_marking_) {
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(ref_cpu_reg);
+ }
+
+ // Save the old (unpoisoned) reference.
+ __ movl(temp1_, ref_cpu_reg);
+
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ DCHECK_NE(ref_reg, RSP);
+ DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in R0):
+ //
+ // RDI <- ref
+ // RAX <- ReadBarrierMark(RDI)
+ // ref <- RAX
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
+ // This runtime call does not require a stack map.
+ x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+
+ // If the new reference is different from the old reference,
+ // update the field in the holder (`*field_addr`).
+ //
+ // Note that this field could also hold a different object, if
+ // another thread had concurrently changed it. In that case, the
+ // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
+ // operation below would abort the CAS, leaving the field as-is.
+ NearLabel done;
+ __ cmpl(temp1_, ref_cpu_reg);
+ __ j(kEqual, &done);
+
+ // Update the the holder's field atomically. This may fail if
+ // mutator updates before us, but it's OK. This is achived
+ // using a strong compare-and-set (CAS) operation with relaxed
+ // memory synchronization ordering, where the expected value is
+ // the old reference and the desired value is the new reference.
+ // This operation is implemented with a 32-bit LOCK CMPXLCHG
+ // instruction, which requires the expected value (the old
+ // reference) to be in EAX. Save RAX beforehand, and move the
+ // expected value (stored in `temp1_`) into EAX.
+ __ movq(temp2_, CpuRegister(RAX));
+ __ movl(CpuRegister(RAX), temp1_);
+
+ // Convenience aliases.
+ CpuRegister base = obj_;
+ CpuRegister expected = CpuRegister(RAX);
+ CpuRegister value = ref_cpu_reg;
+
+ bool base_equals_value = (base.AsRegister() == value.AsRegister());
+ Register value_reg = ref_reg;
+ if (kPoisonHeapReferences) {
+ if (base_equals_value) {
+ // If `base` and `value` are the same register location, move
+ // `value_reg` to a temporary register. This way, poisoning
+ // `value_reg` won't invalidate `base`.
+ value_reg = temp1_.AsRegister();
+ __ movl(CpuRegister(value_reg), base);
+ }
+
+ // Check that the register allocator did not assign the location
+ // of `expected` (RAX) to `value` nor to `base`, so that heap
+ // poisoning (when enabled) works as intended below.
+ // - If `value` were equal to `expected`, both references would
+ // be poisoned twice, meaning they would not be poisoned at
+ // all, as heap poisoning uses address negation.
+ // - If `base` were equal to `expected`, poisoning `expected`
+ // would invalidate `base`.
+ DCHECK_NE(value_reg, expected.AsRegister());
+ DCHECK_NE(base.AsRegister(), expected.AsRegister());
+
+ __ PoisonHeapReference(expected);
+ __ PoisonHeapReference(CpuRegister(value_reg));
+ }
+
+ __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
+
+ // If heap poisoning is enabled, we need to unpoison the values
+ // that were poisoned earlier.
+ if (kPoisonHeapReferences) {
+ if (base_equals_value) {
+ // `value_reg` has been moved to a temporary register, no need
+ // to unpoison it.
+ } else {
+ __ UnpoisonHeapReference(CpuRegister(value_reg));
+ }
+ // No need to unpoison `expected` (RAX), as it is be overwritten below.
+ }
+
+ // Restore RAX.
+ __ movq(CpuRegister(RAX), temp2_);
+
+ __ Bind(&done);
+ __ jmp(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+ // The register containing the object holding the marked object reference field.
+ const CpuRegister obj_;
+ // The address of the marked reference field. The base of this address must be `obj_`.
+ const Address field_addr_;
+
+ // Should the reference in `ref_` be unpoisoned prior to marking it?
+ const bool unpoison_ref_before_marking_;
+
+ const CpuRegister temp1_;
+ const CpuRegister temp2_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
+};
+
// Slow path generating a read barrier for a heap reference.
class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
public:
@@ -4122,7 +4310,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
// /* HeapReference<Object> */ out = *(base + offset)
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// Note that a potential implicit null check is handled in this
- // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
+ // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
instruction, out, base, offset, /* needs_null_check */ true);
if (is_volatile) {
@@ -4569,7 +4757,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// Note that a potential implicit null check is handled in this
- // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
+ // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
} else {
@@ -5318,7 +5506,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
cls,
out_loc,
Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
- /*fixup_label*/nullptr,
+ /* fixup_label */ nullptr,
requires_read_barrier);
break;
}
@@ -5343,7 +5531,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
GenerateGcRootFieldLoad(cls,
out_loc,
address,
- /*fixup_label*/nullptr,
+ /* fixup_label */ nullptr,
requires_read_barrier);
} else {
// TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
@@ -5351,7 +5539,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
GenerateGcRootFieldLoad(cls,
out_loc,
Address(out, 0),
- /*fixup_label*/nullptr,
+ /* fixup_label */ nullptr,
requires_read_barrier);
}
generate_null_check = !cls->IsInDexCache();
@@ -5379,7 +5567,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
cls,
out_loc,
Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())),
- /*fixup_label*/nullptr,
+ /* fixup_label */ nullptr,
requires_read_barrier);
generate_null_check = !cls->IsInDexCache();
break;
@@ -5496,7 +5684,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
/* no_rip */ false);
Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
// /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label);
+ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kEmitCompilerReadBarrier);
SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
codegen_->AddSlowPath(slow_path);
__ testl(out, out);
@@ -6264,7 +6452,7 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr
// Slow path marking the GC root `root`.
SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
- instruction, root, /* unpoison */ false);
+ instruction, root, /* unpoison_ref_before_marking */ false);
codegen_->AddSlowPath(slow_path);
__ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>().Int32Value(),
@@ -6330,7 +6518,10 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
Location ref,
CpuRegister obj,
const Address& src,
- bool needs_null_check) {
+ bool needs_null_check,
+ bool always_update_field,
+ CpuRegister* temp1,
+ CpuRegister* temp2) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -6387,8 +6578,16 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
// Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
// Slow path marking the object `ref` when it is gray.
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
- instruction, ref, /* unpoison */ true);
+ SlowPathCode* slow_path;
+ if (always_update_field) {
+ DCHECK(temp1 != nullptr);
+ DCHECK(temp2 != nullptr);
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
+ instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
+ instruction, ref, /* unpoison_ref_before_marking */ true);
+ }
AddSlowPath(slow_path);
// We have done the "if" of the gray bit check above, now branch based on the flags.
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 57ef83f621..8b19dad0d0 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -253,12 +253,12 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
//
// root <- *address
//
- // while honoring read barriers (if any).
+ // while honoring read barriers if `requires_read_barrier` is true.
void GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
const Address& address,
- Label* fixup_label = nullptr,
- bool requires_read_barrier = kEmitCompilerReadBarrier);
+ Label* fixup_label,
+ bool requires_read_barrier);
void PushOntoFPStack(Location source, uint32_t temp_offset,
uint32_t stack_adjustment, bool is_float);
@@ -434,13 +434,25 @@ class CodeGeneratorX86_64 : public CodeGenerator {
uint32_t data_offset,
Location index,
bool needs_null_check);
- // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
- // and GenerateArrayLoadWithBakerReadBarrier.
+ // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+ // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+ //
+ // Load the object reference located at address `src`, held by
+ // object `obj`, into `ref`, and mark it if needed. The base of
+ // address `src` must be `obj`.
+ //
+ // If `always_update_field` is true, the value of the reference is
+ // atomically updated in the holder (`obj`). This operation
+ // requires two temporary registers, which must be provided as
+ // non-null pointers (`temp1` and `temp2`).
void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
CpuRegister obj,
const Address& src,
- bool needs_null_check);
+ bool needs_null_check,
+ bool always_update_field = false,
+ CpuRegister* temp1 = nullptr,
+ CpuRegister* temp2 = nullptr);
// Generate a read barrier for a heap reference within `instruction`
// using a slow path.
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index adfe09ba9f..9de521ad8d 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -18,6 +18,7 @@
#include "base/array_ref.h"
#include "base/bit_vector-inl.h"
+#include "base/stl_util.h"
#include "ssa_phi_elimination.h"
namespace art {
@@ -168,8 +169,7 @@ bool HDeadCodeElimination::SimplifyIfs() {
bool simplified_one_or_more_ifs = false;
bool rerun_dominance_and_loop_analysis = false;
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
HInstruction* last = block->GetLastInstruction();
HInstruction* first = block->GetFirstInstruction();
if (last->IsIf() &&
@@ -271,20 +271,22 @@ bool HDeadCodeElimination::SimplifyIfs() {
}
void HDeadCodeElimination::ConnectSuccessiveBlocks() {
- // Order does not matter.
- for (HReversePostOrderIterator it(*graph_); !it.Done();) {
- HBasicBlock* block = it.Current();
- if (block->IsEntryBlock() || !block->GetLastInstruction()->IsGoto()) {
- it.Advance();
- continue;
- }
- HBasicBlock* successor = block->GetSingleSuccessor();
- if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
- it.Advance();
- continue;
+ // Order does not matter. Skip the entry block by starting at index 1 in reverse post order.
+ for (size_t i = 1u, size = graph_->GetReversePostOrder().size(); i != size; ++i) {
+ HBasicBlock* block = graph_->GetReversePostOrder()[i];
+ DCHECK(!block->IsEntryBlock());
+ while (block->GetLastInstruction()->IsGoto()) {
+ HBasicBlock* successor = block->GetSingleSuccessor();
+ if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
+ break;
+ }
+ DCHECK_LT(i, IndexOfElement(graph_->GetReversePostOrder(), successor));
+ block->MergeWith(successor);
+ --size;
+ DCHECK_EQ(size, graph_->GetReversePostOrder().size());
+ DCHECK_EQ(block, graph_->GetReversePostOrder()[i]);
+ // Reiterate on this block in case it can be merged with its new successor.
}
- block->MergeWith(successor);
- // Reiterate on this block in case it can be merged with its new successor.
}
}
@@ -300,8 +302,7 @@ bool HDeadCodeElimination::RemoveDeadBlocks() {
// Remove all dead blocks. Iterate in post order because removal needs the
// block's chain of dominators and nested loops need to be updated from the
// inside out.
- for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
int id = block->GetBlockId();
if (!live_blocks.IsBitSet(id)) {
MaybeRecordDeadBlock(block);
@@ -332,8 +333,7 @@ bool HDeadCodeElimination::RemoveDeadBlocks() {
void HDeadCodeElimination::RemoveDeadInstructions() {
// Process basic blocks in post-order in the dominator tree, so that
// a dead instruction depending on another dead instruction is removed.
- for (HPostOrderIterator b(*graph_); !b.Done(); b.Advance()) {
- HBasicBlock* block = b.Current();
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
// Traverse this block's instructions in backward order and remove
// the unused ones.
HBackwardInstructionIterator i(block->GetInstructions());
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index 1e86b75075..f5931a2f81 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -411,8 +411,8 @@ void GlobalValueNumberer::Run() {
// Use the reverse post order to ensure the non back-edge predecessors of a block are
// visited before the block itself.
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- VisitBasicBlock(it.Current());
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ VisitBasicBlock(block);
}
}
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index 1d1921a246..f2602fbf8c 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -103,8 +103,7 @@ void HInductionVarAnalysis::Run() {
// Detects sequence variables (generalized induction variables) during an outer to inner
// traversal of all loops using Gerlek's algorithm. The order is important to enable
// range analysis on outer loop while visiting inner loops.
- for (HReversePostOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) {
- HBasicBlock* graph_block = it_graph.Current();
+ for (HBasicBlock* graph_block : graph_->GetReversePostOrder()) {
// Don't analyze irreducible loops.
if (graph_block->IsLoopHeader() && !graph_block->GetLoopInformation()->IsIrreducible()) {
VisitLoop(graph_block->GetLoopInformation());
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 9faa98a388..cc420b3260 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -1219,16 +1219,13 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
return false;
}
- HReversePostOrderIterator it(*callee_graph);
- it.Advance(); // Past the entry block, it does not contain instructions that prevent inlining.
size_t number_of_instructions = 0;
bool can_inline_environment =
total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters;
- for (; !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
-
+ // Skip the entry block, it does not contain instructions that prevent inlining.
+ for (HBasicBlock* block : callee_graph->GetReversePostOrderSkipEntryBlock()) {
if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) {
// Don't inline methods with irreducible loops, they could prevent some
// optimizations to run.
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 613e00843f..c8c4ca76fd 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -81,8 +81,7 @@ void HInstructionBuilder::InitializeBlockLocals() {
// locals (guaranteed by HGraphBuilder) and that all try blocks have been
// visited already (from HTryBoundary scoping and reverse post order).
bool catch_block_visited = false;
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* current = it.Current();
+ for (HBasicBlock* current : graph_->GetReversePostOrder()) {
if (current == current_block_) {
catch_block_visited = true;
} else if (current->IsTryBlock()) {
@@ -276,8 +275,8 @@ bool HInstructionBuilder::Build() {
FindNativeDebugInfoLocations(native_debug_info_locations);
}
- for (HReversePostOrderIterator block_it(*graph_); !block_it.Done(); block_it.Advance()) {
- current_block_ = block_it.Current();
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ current_block_ = block;
uint32_t block_dex_pc = current_block_->GetDexPc();
InitializeBlockLocals();
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 3bb1c1dc21..e4d280f26d 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -124,20 +124,16 @@ void InstructionSimplifier::Run() {
void InstructionSimplifierVisitor::Run() {
// Iterate in reverse post order to open up more simplifications to users
// of instructions that got simplified.
- for (HReversePostOrderIterator it(*GetGraph()); !it.Done();) {
+ for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
// The simplification of an instruction to another instruction may yield
// possibilities for other simplifications. So although we perform a reverse
// post order visit, we sometimes need to revisit an instruction index.
- simplification_occurred_ = false;
- VisitBasicBlock(it.Current());
- if (simplification_occurred_ &&
- (simplifications_at_current_position_ < kMaxSamePositionSimplifications)) {
- // New simplifications may be applicable to the instruction at the
- // current index, so don't advance the iterator.
- continue;
- }
+ do {
+ simplification_occurred_ = false;
+ VisitBasicBlock(block);
+ } while (simplification_occurred_ &&
+ (simplifications_at_current_position_ < kMaxSamePositionSimplifications));
simplifications_at_current_position_ = 0;
- it.Advance();
}
}
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 8327a4c244..fc6ff7b197 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -133,8 +133,7 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) {
void IntrinsicsRecognizer::Run() {
ScopedObjectAccess soa(Thread::Current());
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
inst_it.Advance()) {
HInstruction* inst = inst_it.Current();
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 96a6ecbee9..8790c1e4f1 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -652,9 +652,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
(invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- can_call ?
- LocationSummary::kCallOnSlowPath :
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
if (can_call && kUseBakerReadBarrier) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
@@ -663,7 +663,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(),
- can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+ (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// We need a temporary register for the read barrier marking slow
// path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
@@ -891,8 +891,13 @@ void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
HInvoke* invoke,
Primitive::Type type) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ kUseBakerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
@@ -901,36 +906,65 @@ static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
locations->SetInAt(4, Location::RequiresRegister());
// If heap poisoning is enabled, we don't want the unpoisoning
- // operations to potentially clobber the output.
- Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot)
+ // operations to potentially clobber the output. Likewise when
+ // emitting a (Baker) read barrier, which may call.
+ Location::OutputOverlap overlaps =
+ ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
? Location::kOutputOverlap
: Location::kNoOutputOverlap;
locations->SetOut(Location::RequiresRegister(), overlaps);
+ // Temporary registers used in CAS. In the object case
+ // (UnsafeCASObject intrinsic), these are also used for
+ // card-marking, and possibly for (Baker) read barrier.
locations->AddTemp(Location::RequiresRegister()); // Pointer.
locations->AddTemp(Location::RequiresRegister()); // Temp 1.
}
-static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM* codegen) {
+static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM* codegen) {
DCHECK_NE(type, Primitive::kPrimLong);
ArmAssembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
- Register out = locations->Out().AsRegister<Register>(); // Boolean result.
+ Location out_loc = locations->Out();
+ Register out = out_loc.AsRegister<Register>(); // Boolean result.
- Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer.
- Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Offset (discard high 4B).
- Register expected_lo = locations->InAt(3).AsRegister<Register>(); // Expected.
- Register value_lo = locations->InAt(4).AsRegister<Register>(); // Value.
+ Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer.
+ Location offset_loc = locations->InAt(2);
+ Register offset = offset_loc.AsRegisterPairLow<Register>(); // Offset (discard high 4B).
+ Register expected = locations->InAt(3).AsRegister<Register>(); // Expected.
+ Register value = locations->InAt(4).AsRegister<Register>(); // Value.
- Register tmp_ptr = locations->GetTemp(0).AsRegister<Register>(); // Pointer to actual memory.
- Register tmp_lo = locations->GetTemp(1).AsRegister<Register>(); // Value in memory.
+ Location tmp_ptr_loc = locations->GetTemp(0);
+ Register tmp_ptr = tmp_ptr_loc.AsRegister<Register>(); // Pointer to actual memory.
+ Register tmp = locations->GetTemp(1).AsRegister<Register>(); // Value in memory.
if (type == Primitive::kPrimNot) {
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
// Mark card for object assuming new value is stored. Worst case we will mark an unchanged
// object and scan the receiver at the next GC for nothing.
bool value_can_be_null = true; // TODO: Worth finding out this information?
- codegen->MarkGCCard(tmp_ptr, tmp_lo, base, value_lo, value_can_be_null);
+ codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
+
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Need to make sure the reference stored in the field is a to-space
+ // one before attempting the CAS or the CAS could fail incorrectly.
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ invoke,
+ out_loc, // Unused, used only as a "temporary" within the read barrier.
+ base,
+ /* offset */ 0u,
+ /* index */ offset_loc,
+ ScaleFactor::TIMES_1,
+ tmp_ptr_loc,
+ /* needs_null_check */ false,
+ /* always_update_field */ true,
+ &tmp);
+ }
}
// Prevent reordering with prior memory operations.
@@ -942,12 +976,12 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
__ add(tmp_ptr, base, ShifterOperand(offset));
if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
- codegen->GetAssembler()->PoisonHeapReference(expected_lo);
- if (value_lo == expected_lo) {
- // Do not poison `value_lo`, as it is the same register as
- // `expected_lo`, which has just been poisoned.
+ __ PoisonHeapReference(expected);
+ if (value == expected) {
+ // Do not poison `value`, as it is the same register as
+ // `expected`, which has just been poisoned.
} else {
- codegen->GetAssembler()->PoisonHeapReference(value_lo);
+ __ PoisonHeapReference(value);
}
}
@@ -959,37 +993,29 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
Label loop_head;
__ Bind(&loop_head);
- // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
- // the reference stored in the object before attempting the CAS,
- // similar to the one in the art::Unsafe_compareAndSwapObject JNI
- // implementation.
- //
- // Note that this code is not (yet) used when read barriers are
- // enabled (see IntrinsicLocationsBuilderARM::VisitUnsafeCASObject).
- DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
- __ ldrex(tmp_lo, tmp_ptr);
+ __ ldrex(tmp, tmp_ptr);
- __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo));
+ __ subs(tmp, tmp, ShifterOperand(expected));
__ it(EQ, ItState::kItT);
- __ strex(tmp_lo, value_lo, tmp_ptr, EQ);
- __ cmp(tmp_lo, ShifterOperand(1), EQ);
+ __ strex(tmp, value, tmp_ptr, EQ);
+ __ cmp(tmp, ShifterOperand(1), EQ);
__ b(&loop_head, EQ);
__ dmb(ISH);
- __ rsbs(out, tmp_lo, ShifterOperand(1));
+ __ rsbs(out, tmp, ShifterOperand(1));
__ it(CC);
__ mov(out, ShifterOperand(0), CC);
if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
- codegen->GetAssembler()->UnpoisonHeapReference(expected_lo);
- if (value_lo == expected_lo) {
- // Do not unpoison `value_lo`, as it is the same register as
- // `expected_lo`, which has just been unpoisoned.
+ __ UnpoisonHeapReference(expected);
+ if (value == expected) {
+ // Do not unpoison `value`, as it is the same register as
+ // `expected`, which has just been unpoisoned.
} else {
- codegen->GetAssembler()->UnpoisonHeapReference(value_lo);
+ __ UnpoisonHeapReference(value);
}
}
}
@@ -998,33 +1024,23 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) {
CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic is missing a read barrier, and
- // therefore sometimes does not work as expected (b/25883050).
- // Turn it off temporarily as a quick fix, until the read barrier is
- // implemented (see TODO in GenCAS).
- //
- // TODO(rpl): Implement read barrier support in GenCAS and re-enable
- // this intrinsic.
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
}
void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) {
- GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+ GenCas(invoke, Primitive::kPrimInt, codegen_);
}
void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic is missing a read barrier, and
- // therefore sometimes does not work as expected (b/25883050).
- // Turn it off temporarily as a quick fix, until the read barrier is
- // implemented (see TODO in GenCAS).
- //
- // TODO(rpl): Implement read barrier support in GenCAS and re-enable
- // this intrinsic.
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
- GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+ GenCas(invoke, Primitive::kPrimNot, codegen_);
}
void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index e2c1802fdc..db1c022868 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -863,9 +863,9 @@ static void GenUnsafeGet(HInvoke* invoke,
codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
trg_loc,
base,
- /* offset */ 0U,
+ /* offset */ 0u,
/* index */ offset_loc,
- /* scale_factor */ 0U,
+ /* scale_factor */ 0u,
temp,
/* needs_null_check */ false,
is_volatile);
@@ -880,7 +880,7 @@ static void GenUnsafeGet(HInvoke* invoke,
if (type == Primitive::kPrimNot) {
DCHECK(trg.IsW());
- codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+ codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc);
}
}
}
@@ -890,9 +890,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke
(invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- can_call ?
- LocationSummary::kCallOnSlowPath :
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
if (can_call && kUseBakerReadBarrier) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
@@ -901,7 +901,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(),
- can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+ (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
}
void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
@@ -1086,8 +1086,13 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
HInvoke* invoke,
Primitive::Type type) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ kUseBakerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
@@ -1096,20 +1101,29 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
locations->SetInAt(4, Location::RequiresRegister());
// If heap poisoning is enabled, we don't want the unpoisoning
- // operations to potentially clobber the output.
- Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot)
+ // operations to potentially clobber the output. Likewise when
+ // emitting a (Baker) read barrier, which may call.
+ Location::OutputOverlap overlaps =
+ ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
? Location::kOutputOverlap
: Location::kNoOutputOverlap;
locations->SetOut(Location::RequiresRegister(), overlaps);
+ if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Temporary register for (Baker) read barrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
-static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) {
+static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* codegen) {
MacroAssembler* masm = codegen->GetVIXLAssembler();
+ LocationSummary* locations = invoke->GetLocations();
- Register out = WRegisterFrom(locations->Out()); // Boolean result.
+ Location out_loc = locations->Out();
+ Register out = WRegisterFrom(out_loc); // Boolean result.
Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
- Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
+ Location offset_loc = locations->InAt(2);
+ Register offset = XRegisterFrom(offset_loc); // Long offset.
Register expected = RegisterFrom(locations->InAt(3), type); // Expected.
Register value = RegisterFrom(locations->InAt(4), type); // Value.
@@ -1118,6 +1132,27 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
// Mark card for object assuming new value is stored.
bool value_can_be_null = true; // TODO: Worth finding out this information?
codegen->MarkGCCard(base, value, value_can_be_null);
+
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Register temp = WRegisterFrom(locations->GetTemp(0));
+ // Need to make sure the reference stored in the field is a to-space
+ // one before attempting the CAS or the CAS could fail incorrectly.
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ invoke,
+ out_loc, // Unused, used only as a "temporary" within the read barrier.
+ base,
+ /* offset */ 0u,
+ /* index */ offset_loc,
+ /* scale_factor */ 0u,
+ temp,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false,
+ /* always_update_field */ true);
+ }
}
UseScratchRegisterScope temps(masm);
@@ -1145,14 +1180,6 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
vixl::aarch64::Label loop_head, exit_loop;
__ Bind(&loop_head);
- // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
- // the reference stored in the object before attempting the CAS,
- // similar to the one in the art::Unsafe_compareAndSwapObject JNI
- // implementation.
- //
- // Note that this code is not (yet) used when read barriers are
- // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
- DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
__ Ldaxr(tmp_value, MemOperand(tmp_ptr));
__ Cmp(tmp_value, expected);
__ B(&exit_loop, ne);
@@ -1179,14 +1206,9 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic is missing a read barrier, and
- // therefore sometimes does not work as expected (b/25883050).
- // Turn it off temporarily as a quick fix, until the read barrier is
- // implemented (see TODO in GenCAS).
- //
- // TODO(rpl): Implement read barrier support in GenCAS and re-enable
- // this intrinsic.
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -1194,22 +1216,17 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
- GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+ GenCas(invoke, Primitive::kPrimInt, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) {
- GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_);
+ GenCas(invoke, Primitive::kPrimLong, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic is missing a read barrier, and
- // therefore sometimes does not work as expected (b/25883050).
- // Turn it off temporarily as a quick fix, until the read barrier is
- // implemented (see TODO in GenCAS).
- //
- // TODO(rpl): Implement read barrier support in GenCAS and re-enable
- // this intrinsic.
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
- GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+ GenCas(invoke, Primitive::kPrimNot, codegen_);
}
void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index f41e4d95b5..aae3899847 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2056,9 +2056,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
(invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- can_call ?
- LocationSummary::kCallOnSlowPath :
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
if (can_call && kUseBakerReadBarrier) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
@@ -2076,7 +2076,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
}
} else {
locations->SetOut(Location::RequiresRegister(),
- can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+ (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
}
}
@@ -2255,10 +2255,16 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
}
-static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
+ Primitive::Type type,
HInvoke* invoke) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ kUseBakerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
@@ -2278,7 +2284,8 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type ty
// Force a byte register for the output.
locations->SetOut(Location::RegisterLocation(EAX));
if (type == Primitive::kPrimNot) {
- // Need temp registers for card-marking.
+ // Need temporary registers for card-marking, and possibly for
+ // (Baker) read barrier.
locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
// Need a byte register for marking.
locations->AddTemp(Location::RegisterLocation(ECX));
@@ -2294,14 +2301,9 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic is missing a read barrier, and
- // therefore sometimes does not work as expected (b/25883050).
- // Turn it off temporarily as a quick fix, until the read barrier is
- // implemented (see TODO in GenCAS).
- //
- // TODO(rpl): Implement read barrier support in GenCAS and re-enable
- // this intrinsic.
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2317,7 +2319,18 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
Location out = locations->Out();
DCHECK_EQ(out.AsRegister<Register>(), EAX);
+ // The address of the field within the holding object.
+ Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
+
if (type == Primitive::kPrimNot) {
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+
Register expected = locations->InAt(3).AsRegister<Register>();
// Ensure `expected` is in EAX (required by the CMPXCHG instruction).
DCHECK_EQ(expected, EAX);
@@ -2325,11 +2338,20 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
// Mark card for object assuming new value is stored.
bool value_can_be_null = true; // TODO: Worth finding out this information?
- codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
- locations->GetTemp(1).AsRegister<Register>(),
- base,
- value,
- value_can_be_null);
+ codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
+
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Need to make sure the reference stored in the field is a to-space
+ // one before attempting the CAS or the CAS could fail incorrectly.
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ invoke,
+ temp1_loc, // Unused, used only as a "temporary" within the read barrier.
+ base,
+ field_addr,
+ /* needs_null_check */ false,
+ /* always_update_field */ true,
+ &temp2);
+ }
bool base_equals_value = (base == value);
if (kPoisonHeapReferences) {
@@ -2337,7 +2359,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
// If `base` and `value` are the same register location, move
// `value` to a temporary register. This way, poisoning
// `value` won't invalidate `base`.
- value = locations->GetTemp(0).AsRegister<Register>();
+ value = temp1;
__ movl(value, base);
}
@@ -2356,19 +2378,12 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
__ PoisonHeapReference(value);
}
- // TODO: Add a read barrier for the reference stored in the object
- // before attempting the CAS, similar to the one in the
- // art::Unsafe_compareAndSwapObject JNI implementation.
- //
- // Note that this code is not (yet) used when read barriers are
- // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject).
- DCHECK(!kEmitCompilerReadBarrier);
- __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+ __ LockCmpxchgl(field_addr, value);
// LOCK CMPXCHG has full barrier semantics, and we don't need
// scheduling barriers at this time.
- // Convert ZF into the boolean result.
+ // Convert ZF into the Boolean result.
__ setb(kZero, out.AsRegister<Register>());
__ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
@@ -2392,8 +2407,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
// Ensure the expected value is in EAX (required by the CMPXCHG
// instruction).
DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
- __ LockCmpxchgl(Address(base, offset, TIMES_1, 0),
- locations->InAt(4).AsRegister<Register>());
+ __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>());
} else if (type == Primitive::kPrimLong) {
// Ensure the expected value is in EAX:EDX and that the new
// value is in EBX:ECX (required by the CMPXCHG8B instruction).
@@ -2401,7 +2415,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
- __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
+ __ LockCmpxchg8b(field_addr);
} else {
LOG(FATAL) << "Unexpected CAS type " << type;
}
@@ -2409,7 +2423,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
// LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
// don't need scheduling barriers at this time.
- // Convert ZF into the boolean result.
+ // Convert ZF into the Boolean result.
__ setb(kZero, out.AsRegister<Register>());
__ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
}
@@ -2424,14 +2438,9 @@ void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic is missing a read barrier, and
- // therefore sometimes does not work as expected (b/25883050).
- // Turn it off temporarily as a quick fix, until the read barrier is
- // implemented (see TODO in GenCAS).
- //
- // TODO(rpl): Implement read barrier support in GenCAS and re-enable
- // this intrinsic.
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
GenCAS(Primitive::kPrimNot, invoke, codegen_);
}
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 4b0afca122..cdef22f6de 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2172,9 +2172,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke
(invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- can_call ?
- LocationSummary::kCallOnSlowPath :
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
if (can_call && kUseBakerReadBarrier) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
@@ -2183,7 +2183,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(),
- can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+ (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
@@ -2333,10 +2333,16 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
}
-static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
+ Primitive::Type type,
HInvoke* invoke) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ kUseBakerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
@@ -2347,7 +2353,8 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type ty
locations->SetOut(Location::RequiresRegister());
if (type == Primitive::kPrimNot) {
- // Need temp registers for card-marking.
+ // Need temporary registers for card-marking, and possibly for
+ // (Baker) read barrier.
locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
locations->AddTemp(Location::RequiresRegister());
}
@@ -2362,14 +2369,9 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic is missing a read barrier, and
- // therefore sometimes does not work as expected (b/25883050).
- // Turn it off temporarily as a quick fix, until the read barrier is
- // implemented (see TODO in GenCAS).
- //
- // TODO(rpl): Implement read barrier support in GenCAS and re-enable
- // this intrinsic.
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2386,16 +2388,37 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c
// Ensure `expected` is in RAX (required by the CMPXCHG instruction).
DCHECK_EQ(expected.AsRegister(), RAX);
CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ Location out_loc = locations->Out();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
if (type == Primitive::kPrimNot) {
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+ CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
+
// Mark card for object assuming new value is stored.
bool value_can_be_null = true; // TODO: Worth finding out this information?
- codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
- locations->GetTemp(1).AsRegister<CpuRegister>(),
- base,
- value,
- value_can_be_null);
+ codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
+
+ // The address of the field within the holding object.
+ Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
+
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Need to make sure the reference stored in the field is a to-space
+ // one before attempting the CAS or the CAS could fail incorrectly.
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ invoke,
+ out_loc, // Unused, used only as a "temporary" within the read barrier.
+ base,
+ field_addr,
+ /* needs_null_check */ false,
+ /* always_update_field */ true,
+ &temp1,
+ &temp2);
+ }
bool base_equals_value = (base.AsRegister() == value.AsRegister());
Register value_reg = value.AsRegister();
@@ -2404,7 +2427,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c
// If `base` and `value` are the same register location, move
// `value_reg` to a temporary register. This way, poisoning
// `value_reg` won't invalidate `base`.
- value_reg = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister();
+ value_reg = temp1.AsRegister();
__ movl(CpuRegister(value_reg), base);
}
@@ -2423,19 +2446,12 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c
__ PoisonHeapReference(CpuRegister(value_reg));
}
- // TODO: Add a read barrier for the reference stored in the object
- // before attempting the CAS, similar to the one in the
- // art::Unsafe_compareAndSwapObject JNI implementation.
- //
- // Note that this code is not (yet) used when read barriers are
- // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject).
- DCHECK(!kEmitCompilerReadBarrier);
- __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg));
+ __ LockCmpxchgl(field_addr, CpuRegister(value_reg));
// LOCK CMPXCHG has full barrier semantics, and we don't need
// scheduling barriers at this time.
- // Convert ZF into the boolean result.
+ // Convert ZF into the Boolean result.
__ setcc(kZero, out);
__ movzxb(out, out);
@@ -2468,7 +2484,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c
// LOCK CMPXCHG has full barrier semantics, and we don't need
// scheduling barriers at this time.
- // Convert ZF into the boolean result.
+ // Convert ZF into the Boolean result.
__ setcc(kZero, out);
__ movzxb(out, out);
}
@@ -2483,14 +2499,9 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic is missing a read barrier, and
- // therefore sometimes does not work as expected (b/25883050).
- // Turn it off temporarily as a quick fix, until the read barrier is
- // implemented (see TODO in GenCAS).
- //
- // TODO(rpl): Implement read barrier support in GenCAS and re-enable
- // this intrinsic.
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
GenCAS(Primitive::kPrimNot, invoke, codegen_);
}
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index a0ded74d6d..eb2d18dd88 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -15,6 +15,7 @@
*/
#include "licm.h"
+
#include "side_effects_analysis.h"
namespace art {
@@ -90,8 +91,7 @@ void LICM::Run() {
}
// Post order visit to visit inner loops before outer loops.
- for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
if (!block->IsLoopHeader()) {
// Only visit the loop when we reach the header.
continue;
diff --git a/compiler/optimizing/linear_order.cc b/compiler/optimizing/linear_order.cc
index 3af212fa48..80cecd41dc 100644
--- a/compiler/optimizing/linear_order.cc
+++ b/compiler/optimizing/linear_order.cc
@@ -94,8 +94,7 @@ void LinearizeGraph(const HGraph* graph,
// for it.
ArenaVector<uint32_t> forward_predecessors(graph->GetBlocks().size(),
allocator->Adapter(kArenaAllocLinearOrder));
- for (HReversePostOrderIterator it(*graph); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph->GetReversePostOrder()) {
size_t number_of_forward_predecessors = block->GetPredecessors().size();
if (block->IsLoopHeader()) {
number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges();
diff --git a/compiler/optimizing/linear_order.h b/compiler/optimizing/linear_order.h
index cdbdd0714b..7122d67be9 100644
--- a/compiler/optimizing/linear_order.h
+++ b/compiler/optimizing/linear_order.h
@@ -30,16 +30,12 @@ namespace art {
//
// for (HBasicBlock* block : linear_order) // linear order
//
-// for (HBasicBlock* block : LinearPostOrder(linear_order)) // linear post order
+// for (HBasicBlock* block : ReverseRange(linear_order)) // linear post order
//
void LinearizeGraph(const HGraph* graph,
ArenaAllocator* allocator,
ArenaVector<HBasicBlock*>* linear_order);
-inline auto LinearPostOrder(const ArenaVector<HBasicBlock*>& linear_order) {
- return MakeIterationRange(linear_order.rbegin(), linear_order.rend());
-}
-
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_LINEAR_ORDER_H_
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index bd74368e17..37b58ded59 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -56,8 +56,7 @@ static void TestCode(const uint16_t* data, const char* expected) {
liveness.Analyze();
std::ostringstream buffer;
- for (HInsertionOrderIterator it(*graph); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph->GetBlocks()) {
buffer << "Block " << block->GetBlockId() << std::endl;
size_t ssa_values = liveness.GetNumberOfSsaValues();
BitVector* live_in = liveness.GetLiveInSet(*block);
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 820fa29597..b91e9e6868 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -1046,8 +1046,8 @@ void LoadStoreElimination::Run() {
return;
}
HeapLocationCollector heap_location_collector(graph_);
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- heap_location_collector.VisitBasicBlock(it.Current());
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ heap_location_collector.VisitBasicBlock(block);
}
if (heap_location_collector.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) {
// Bail out if there are too many heap locations to deal with.
@@ -1065,8 +1065,8 @@ void LoadStoreElimination::Run() {
}
heap_location_collector.BuildAliasingMatrix();
LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_);
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- lse_visitor.VisitBasicBlock(it.Current());
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ lse_visitor.VisitBasicBlock(block);
}
lse_visitor.RemoveInstructions();
}
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 59cc0091bf..45c7eb1a46 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -179,16 +179,16 @@ GraphAnalysisResult HGraph::BuildDominatorTree() {
}
void HGraph::ClearDominanceInformation() {
- for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
- it.Current()->ClearDominanceInformation();
+ for (HBasicBlock* block : GetReversePostOrder()) {
+ block->ClearDominanceInformation();
}
reverse_post_order_.clear();
}
void HGraph::ClearLoopInformation() {
SetHasIrreducibleLoops(false);
- for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
- it.Current()->SetLoopInformation(nullptr);
+ for (HBasicBlock* block : GetReversePostOrder()) {
+ block->SetLoopInformation(nullptr);
}
}
@@ -275,8 +275,7 @@ void HGraph::ComputeDominanceInformation() {
bool update_occurred = true;
while (update_occurred) {
update_occurred = false;
- for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : GetReversePostOrder()) {
for (HBasicBlock* successor : block->GetSuccessors()) {
update_occurred |= UpdateDominatorOfSuccessor(block, successor);
}
@@ -287,8 +286,7 @@ void HGraph::ComputeDominanceInformation() {
// Make sure that there are no remaining blocks whose dominator information
// needs to be updated.
if (kIsDebugBuild) {
- for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : GetReversePostOrder()) {
for (HBasicBlock* successor : block->GetSuccessors()) {
DCHECK(!UpdateDominatorOfSuccessor(block, successor));
}
@@ -297,8 +295,7 @@ void HGraph::ComputeDominanceInformation() {
// Populate `dominated_blocks_` information after computing all dominators.
// The potential presence of irreducible loops requires to do it after.
- for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : GetReversePostOrder()) {
if (!block->IsEntryBlock()) {
block->GetDominator()->AddDominatedBlock(block);
}
@@ -375,8 +372,7 @@ void HGraph::SimplifyLoop(HBasicBlock* header) {
void HGraph::ComputeTryBlockInformation() {
// Iterate in reverse post order to propagate try membership information from
// predecessors to their successors.
- for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : GetReversePostOrder()) {
if (block->IsEntryBlock() || block->IsCatchBlock()) {
// Catch blocks after simplification have only exceptional predecessors
// and hence are never in tries.
@@ -446,8 +442,7 @@ GraphAnalysisResult HGraph::AnalyzeLoops() const {
// We iterate post order to ensure we visit inner loops before outer loops.
// `PopulateRecursive` needs this guarantee to know whether a natural loop
// contains an irreducible loop.
- for (HPostOrderIterator it(*this); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : GetPostOrder()) {
if (block->IsLoopHeader()) {
if (block->IsCatchBlock()) {
// TODO: Dealing with exceptional back edges could be tricky because
@@ -1134,8 +1129,8 @@ void HGraphVisitor::VisitInsertionOrder() {
}
void HGraphVisitor::VisitReversePostOrder() {
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- VisitBasicBlock(it.Current());
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ VisitBasicBlock(block);
}
}
@@ -1986,10 +1981,8 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
// Update the environments in this graph to have the invoke's environment
// as parent.
{
- HReversePostOrderIterator it(*this);
- it.Advance(); // Skip the entry block, we do not need to update the entry's suspend check.
- for (; !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ // Skip the entry block, we do not need to update the entry's suspend check.
+ for (HBasicBlock* block : GetReversePostOrderSkipEntryBlock()) {
for (HInstructionIterator instr_it(block->GetInstructions());
!instr_it.Done();
instr_it.Advance()) {
@@ -2070,8 +2063,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
// Do a reverse post order of the blocks in the callee and do (1), (2), (3)
// and (4) to the blocks that apply.
- for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
- HBasicBlock* current = it.Current();
+ for (HBasicBlock* current : GetReversePostOrder()) {
if (current != exit_block_ && current != entry_block_ && current != first) {
DCHECK(current->GetTryCatchInformation() == nullptr);
DCHECK(current->GetGraph() == this);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 257ccea799..6a45149509 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -25,6 +25,7 @@
#include "base/arena_containers.h"
#include "base/arena_object.h"
#include "base/array_ref.h"
+#include "base/iteration_range.h"
#include "base/stl_util.h"
#include "base/transform_array_ref.h"
#include "dex_file.h"
@@ -460,10 +461,23 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
return reverse_post_order_;
}
+ ArrayRef<HBasicBlock* const> GetReversePostOrderSkipEntryBlock() {
+ DCHECK(GetReversePostOrder()[0] == entry_block_);
+ return ArrayRef<HBasicBlock* const>(GetReversePostOrder()).SubArray(1);
+ }
+
+ IterationRange<ArenaVector<HBasicBlock*>::const_reverse_iterator> GetPostOrder() const {
+ return ReverseRange(GetReversePostOrder());
+ }
+
const ArenaVector<HBasicBlock*>& GetLinearOrder() const {
return linear_order_;
}
+ IterationRange<ArenaVector<HBasicBlock*>::const_reverse_iterator> GetLinearPostOrder() const {
+ return ReverseRange(GetLinearOrder());
+ }
+
bool HasBoundsChecks() const {
return has_bounds_checks_;
}
@@ -6618,58 +6632,6 @@ class HGraphDelegateVisitor : public HGraphVisitor {
DISALLOW_COPY_AND_ASSIGN(HGraphDelegateVisitor);
};
-class HInsertionOrderIterator : public ValueObject {
- public:
- explicit HInsertionOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {}
-
- bool Done() const { return index_ == graph_.GetBlocks().size(); }
- HBasicBlock* Current() const { return graph_.GetBlocks()[index_]; }
- void Advance() { ++index_; }
-
- private:
- const HGraph& graph_;
- size_t index_;
-
- DISALLOW_COPY_AND_ASSIGN(HInsertionOrderIterator);
-};
-
-class HReversePostOrderIterator : public ValueObject {
- public:
- explicit HReversePostOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {
- // Check that reverse post order of the graph has been built.
- DCHECK(!graph.GetReversePostOrder().empty());
- }
-
- bool Done() const { return index_ == graph_.GetReversePostOrder().size(); }
- HBasicBlock* Current() const { return graph_.GetReversePostOrder()[index_]; }
- void Advance() { ++index_; }
-
- private:
- const HGraph& graph_;
- size_t index_;
-
- DISALLOW_COPY_AND_ASSIGN(HReversePostOrderIterator);
-};
-
-class HPostOrderIterator : public ValueObject {
- public:
- explicit HPostOrderIterator(const HGraph& graph)
- : graph_(graph), index_(graph_.GetReversePostOrder().size()) {
- // Check that reverse post order of the graph has been built.
- DCHECK(!graph.GetReversePostOrder().empty());
- }
-
- bool Done() const { return index_ == 0; }
- HBasicBlock* Current() const { return graph_.GetReversePostOrder()[index_ - 1u]; }
- void Advance() { --index_; }
-
- private:
- const HGraph& graph_;
- size_t index_;
-
- DISALLOW_COPY_AND_ASSIGN(HPostOrderIterator);
-};
-
// Iterator over the blocks that art part of the loop. Includes blocks part
// of an inner loop. The order in which the blocks are iterated is on their
// block id.
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 7b66ef3627..0db60882db 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -20,8 +20,7 @@ namespace art {
void PrepareForRegisterAllocation::Run() {
// Order does not matter.
- for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
// No need to visit the phis.
for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
inst_it.Advance()) {
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index d93c9ddc39..d588deaace 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -123,8 +123,7 @@ void ReferenceTypePropagation::ValidateTypes() {
// TODO: move this to the graph checker.
if (kIsDebugBuild) {
ScopedObjectAccess soa(Thread::Current());
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
for (HInstructionIterator iti(block->GetInstructions()); !iti.Done(); iti.Advance()) {
HInstruction* instr = iti.Current();
if (instr->GetType() == Primitive::kPrimNot) {
@@ -158,8 +157,8 @@ void ReferenceTypePropagation::Run() {
// To properly propagate type info we need to visit in the dominator-based order.
// Reverse post order guarantees a node's dominators are visited first.
// We take advantage of this order in `VisitBasicBlock`.
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- VisitBasicBlock(it.Current());
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ VisitBasicBlock(block);
}
ProcessWorklist();
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index 961077419e..aa0d3710fa 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -758,7 +758,7 @@ bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) {
}
void RegisterAllocatorGraphColor::ProcessInstructions() {
- for (HBasicBlock* block : LinearPostOrder(codegen_->GetGraph()->GetLinearOrder())) {
+ for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) {
// Note that we currently depend on this ordering, since some helper
// code is designed for linear scan register allocation.
for (HBackwardInstructionIterator instr_it(block->GetInstructions());
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index 4e69bc8999..1a391ce9bb 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -163,7 +163,7 @@ void RegisterAllocatorLinearScan::BlockRegisters(size_t start, size_t end, bool
void RegisterAllocatorLinearScan::AllocateRegistersInternal() {
// Iterate post-order, to ensure the list is sorted, and the last added interval
// is the one with the lowest start position.
- for (HBasicBlock* block : LinearPostOrder(codegen_->GetGraph()->GetLinearOrder())) {
+ for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) {
for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done();
back_it.Advance()) {
ProcessInstruction(back_it.Current());
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index e409035d9d..46d0d0eb65 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -76,8 +76,7 @@ void HSelectGenerator::Run() {
// Iterate in post order in the unlikely case that removing one occurrence of
// the selection pattern empties a branch block of another occurrence.
// Otherwise the order does not matter.
- for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
if (!block->EndsWithIf()) continue;
// Find elements of the diamond pattern.
diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc
index 1dc69867b4..6d82e8e06d 100644
--- a/compiler/optimizing/side_effects_analysis.cc
+++ b/compiler/optimizing/side_effects_analysis.cc
@@ -26,8 +26,7 @@ void SideEffectsAnalysis::Run() {
// In DEBUG mode, ensure side effects are properly initialized to empty.
if (kIsDebugBuild) {
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
SideEffects effects = GetBlockEffects(block);
DCHECK(effects.DoesNothing());
if (block->IsLoopHeader()) {
@@ -38,9 +37,7 @@ void SideEffectsAnalysis::Run() {
}
// Do a post order visit to ensure we visit a loop header after its loop body.
- for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
-
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
SideEffects effects = SideEffects::None();
// Update `effects` with the side effects of all instructions in this block.
for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 03807ba1ee..ae1e369999 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -25,8 +25,8 @@ namespace art {
void SsaBuilder::FixNullConstantType() {
// The order doesn't matter here.
- for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) {
- for (HInstructionIterator it(itb.Current()->GetInstructions()); !it.Done(); it.Advance()) {
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* equality_instr = it.Current();
if (!equality_instr->IsEqual() && !equality_instr->IsNotEqual()) {
continue;
@@ -57,8 +57,8 @@ void SsaBuilder::FixNullConstantType() {
void SsaBuilder::EquivalentPhisCleanup() {
// The order doesn't matter here.
- for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) {
- for (HInstructionIterator it(itb.Current()->GetPhis()); !it.Done(); it.Advance()) {
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
HPhi* phi = it.Current()->AsPhi();
HPhi* next = phi->GetNextEquivalentPhiWithSameType();
if (next != nullptr) {
@@ -79,8 +79,7 @@ void SsaBuilder::EquivalentPhisCleanup() {
}
void SsaBuilder::FixEnvironmentPhis() {
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
for (HInstructionIterator it_phis(block->GetPhis()); !it_phis.Done(); it_phis.Advance()) {
HPhi* phi = it_phis.Current()->AsPhi();
// If the phi is not dead, or has no environment uses, there is nothing to do.
@@ -228,8 +227,7 @@ bool SsaBuilder::UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist) {
void SsaBuilder::RunPrimitiveTypePropagation() {
ArenaVector<HPhi*> worklist(graph_->GetArena()->Adapter(kArenaAllocGraphBuilder));
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
if (block->IsLoopHeader()) {
for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
HPhi* phi = phi_it.Current()->AsPhi();
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 76cf8fe1ae..e8e12e1a55 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -139,7 +139,7 @@ static void RecursivelyProcessInputs(HInstruction* current,
void SsaLivenessAnalysis::ComputeLiveRanges() {
// Do a post order visit, adding inputs of instructions live in the block where
// that instruction is defined, and killing instructions that are being visited.
- for (HBasicBlock* block : LinearPostOrder(graph_->GetLinearOrder())) {
+ for (HBasicBlock* block : ReverseRange(graph_->GetLinearOrder())) {
BitVector* kill = GetKillSet(*block);
BitVector* live_in = GetLiveInSet(*block);
@@ -256,15 +256,13 @@ void SsaLivenessAnalysis::ComputeLiveInAndLiveOutSets() {
do {
changed = false;
- for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- const HBasicBlock& block = *it.Current();
-
+ for (const HBasicBlock* block : graph_->GetPostOrder()) {
// The live_in set depends on the kill set (which does not
// change in this loop), and the live_out set. If the live_out
// set does not change, there is no need to update the live_in set.
- if (UpdateLiveOut(block) && UpdateLiveIn(block)) {
+ if (UpdateLiveOut(*block) && UpdateLiveIn(*block)) {
if (kIsDebugBuild) {
- CheckNoLiveInIrreducibleLoop(block);
+ CheckNoLiveInIrreducibleLoop(*block);
}
changed = true;
}
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index b1ec99ab8e..aec7a3c555 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -34,8 +34,7 @@ void SsaDeadPhiElimination::MarkDeadPhis() {
ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter(kArenaAllocSsaPhiElimination));
// Add to the worklist phis referenced by non-phi instructions.
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
HPhi* phi = inst_it.Current()->AsPhi();
if (phi->IsDead()) {
@@ -84,8 +83,7 @@ void SsaDeadPhiElimination::EliminateDeadPhis() {
// Remove phis that are not live. Visit in post order so that phis
// that are not inputs of loop phis can be removed when they have
// no users left (dead phis might use dead phis).
- for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
HInstruction* current = block->GetFirstPhi();
HInstruction* next = nullptr;
HPhi* phi;
@@ -119,8 +117,7 @@ void SsaDeadPhiElimination::EliminateDeadPhis() {
void SsaRedundantPhiElimination::Run() {
// Add all phis in the worklist. Order does not matter for correctness, and
// neither will necessarily converge faster.
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- HBasicBlock* block = it.Current();
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
worklist_.push_back(inst_it.Current()->AsPhi());
}