Add support for Baker read barriers in SystemArrayCopy intrinsics.
Benchmarks (ARM64) score variations on Nexus 5X with CPU
cores clamped at 960000 Hz (aosp_bullhead-userdebug build):
- Ritzperf - average (lower is better): -3.03% (slightly better)
- CaffeineMark - average (higher is better): +1.26% (slightly better)
- DeltaBlue (lower is better): -10.50% (better)
- Richards - average (lower is better): -3.36% (slightly better)
- SciMark2 - average (higher is better): +0.26% (virtually unchanged)
Details about Ritzperf benchmarks with meaningful variations
(lower is better):
- FormulaEvaluationActions.EvaluateAndApplyChanges: -13.26% (better)
- FormulaEvaluationActions.EvaluateCascadingSums: -10.94% (better)
- FormulaEvaluationActions.EvaluateComplexFormulas: -15.50% (better)
- FormulaEvaluationActions.EvaluateFibonacci: -10.41% (better)
- FormulaEvaluationActions.EvaluateLargeSums: +6.02% (worse)
Boot image code size variation on Nexus 5X
(aosp_bullhead-userdebug build):
- total ARM64 framework Oat files size change:
107047632 bytes -> 107154128 bytes (+0.10%)
- total ARM framework Oat files size change:
90932028 bytes -> 91009852 bytes (+0.09%)
Test: ART host and target (ARM, ARM64) tests + Nexus 5X boot.
Bug: 29516905
Bug: 29506760
Bug: 12687968
Change-Id: I85431368d09965687a0301ae2eb3c991f276ce5d
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 5152075..c532e72 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1228,7 +1228,8 @@
instruction->IsLoadString() ||
instruction->IsInstanceOf() ||
instruction->IsCheckCast() ||
- (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()))
+ (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()) ||
+ (instruction->IsInvokeStaticOrDirect() && instruction->GetLocations()->Intrinsified()))
<< "instruction->DebugName()=" << instruction->DebugName()
<< " slow_path->GetDescription()=" << slow_path->GetDescription();
}
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 4c4128c..2d95235 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -429,7 +429,8 @@
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
@@ -441,6 +442,9 @@
DCHECK_NE(reg, SP);
DCHECK_NE(reg, LR);
DCHECK_NE(reg, PC);
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(reg, IP);
DCHECK(0 <= reg && reg < kNumberOfCoreRegisters) << reg;
// "Compact" slow path, saving two moves.
//
@@ -6522,7 +6526,8 @@
// Introduce a dependency on the lock_word including the rb_state,
// which shall prevent load-load reordering without using
// a memory barrier (which would be more expensive).
- // obj is unchanged by this operation, but its value now depends on temp_reg.
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp_reg`.
__ add(obj, obj, ShifterOperand(temp_reg, LSR, 32));
// The actual reference load.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index d95e7df..14f5b24 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -595,7 +595,8 @@
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
@@ -607,7 +608,8 @@
DCHECK_NE(obj_.reg(), LR);
DCHECK_NE(obj_.reg(), WSP);
DCHECK_NE(obj_.reg(), WZR);
- // WIP0 is used by the slow path as a temp, it can not be the object register.
+ // IP0 is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
DCHECK_NE(obj_.reg(), IP0);
DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg();
// "Compact" slow path, saving two moves.
@@ -5239,7 +5241,8 @@
// Introduce a dependency on the lock_word including rb_state,
// to prevent load-load reordering, and without using
// a memory barrier (which would be more expensive).
- // obj is unchanged by this operation, but its value now depends on temp.
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp`.
__ Add(obj.X(), obj.X(), Operand(temp.X(), LSR, 32));
// The actual reference load.
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 7a561bb..87853a6 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -464,7 +464,8 @@
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
@@ -1578,15 +1579,15 @@
locations->SetOut(Location::SameAsFirstInput());
}
-void InstructionCodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
+void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
Register lhs_reg = lhs.AsRegister<Register>();
if (rhs.IsConstant()) {
int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- codegen_->Compare32BitValue(lhs_reg, value);
+ Compare32BitValue(lhs_reg, value);
} else if (rhs.IsStackSlot()) {
- __ cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
+ assembler_.cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
} else {
- __ cmpl(lhs_reg, rhs.AsRegister<Register>());
+ assembler_.cmpl(lhs_reg, rhs.AsRegister<Register>());
}
}
@@ -1619,7 +1620,7 @@
DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType()));
LocationSummary* cond_locations = condition->GetLocations();
- GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
+ codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
cond = X86Condition(condition->GetCondition());
}
} else {
@@ -1728,7 +1729,7 @@
// Clear output register: setb only sets the low byte.
__ xorl(reg, reg);
- GenerateIntCompare(lhs, rhs);
+ codegen_->GenerateIntCompare(lhs, rhs);
__ setb(X86Condition(cond->GetCondition()), reg);
return;
}
@@ -4210,7 +4211,7 @@
case Primitive::kPrimShort:
case Primitive::kPrimChar:
case Primitive::kPrimInt: {
- GenerateIntCompare(left, right);
+ codegen_->GenerateIntCompare(left, right);
break;
}
case Primitive::kPrimLong: {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f306b33..894f2e8 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -295,7 +295,6 @@
HBasicBlock* default_block);
void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double);
- void GenerateIntCompare(Location lhs, Location rhs);
X86Assembler* const assembler_;
CodeGeneratorX86* const codegen_;
@@ -431,6 +430,8 @@
Register value,
bool value_can_be_null);
+ void GenerateIntCompare(Location lhs, Location rhs);
+
void GenerateMemoryBarrier(MemBarrierKind kind);
Label* GetLabelOf(HBasicBlock* block) const {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index cf01a79..253eae6 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -485,7 +485,8 @@
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 27d9d48..0bbc0e5 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -41,6 +41,92 @@
using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM>;
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode {
+ public:
+ explicit ReadBarrierSystemArrayCopySlowPathARM(HInstruction* instruction)
+ : SlowPathCode(instruction) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+ uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+ uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ Register dest = locations->InAt(2).AsRegister<Register>();
+ Location dest_pos = locations->InAt(3);
+ Register src_curr_addr = locations->GetTemp(0).AsRegister<Register>();
+ Register dst_curr_addr = locations->GetTemp(1).AsRegister<Register>();
+ Register src_stop_addr = locations->GetTemp(2).AsRegister<Register>();
+ Register tmp = locations->GetTemp(3).AsRegister<Register>();
+
+ __ Bind(GetEntryLabel());
+ // Compute the base destination address in `dst_curr_addr`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConstant(dst_curr_addr, dest, element_size * constant + offset);
+ } else {
+ __ add(dst_curr_addr,
+ dest,
+ ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+ __ AddConstant(dst_curr_addr, offset);
+ }
+
+ Label loop;
+ __ Bind(&loop);
+ __ ldr(tmp, Address(src_curr_addr, element_size, Address::PostIndex));
+ __ MaybeUnpoisonHeapReference(tmp);
+ // TODO: Inline the mark bit check before calling the runtime?
+ // tmp = ReadBarrier::Mark(tmp);
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
+ // explanations.)
+ DCHECK_NE(tmp, SP);
+ DCHECK_NE(tmp, LR);
+ DCHECK_NE(tmp, PC);
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary (and not preserved). It thus cannot be used by
+ // any live register in this slow path.
+ DCHECK_NE(src_curr_addr, IP);
+ DCHECK_NE(dst_curr_addr, IP);
+ DCHECK_NE(src_stop_addr, IP);
+ DCHECK_NE(tmp, IP);
+ DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp;
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp);
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ __ MaybePoisonHeapReference(tmp);
+ __ str(tmp, Address(dst_curr_addr, element_size, Address::PostIndex));
+ __ cmp(src_curr_addr, ShifterOperand(src_stop_addr));
+ __ b(&loop, NE);
+ __ b(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM);
+};
+
+#undef __
+
bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) {
Dispatch(invoke);
LocationSummary* res = invoke->GetLocations();
@@ -1337,9 +1423,9 @@
}
void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -1362,6 +1448,13 @@
if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
locations->SetInAt(4, Location::RequiresRegister());
}
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Temporary register IP cannot be used in
+ // ReadBarrierSystemArrayCopySlowPathARM64 (because that register
+ // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
+ // temporary register from the register allocator.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
static void CheckPosition(ArmAssembler* assembler,
@@ -1427,9 +1520,9 @@
}
void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
ArmAssembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -1438,18 +1531,22 @@
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
Register src = locations->InAt(0).AsRegister<Register>();
Location src_pos = locations->InAt(1);
Register dest = locations->InAt(2).AsRegister<Register>();
Location dest_pos = locations->InAt(3);
Location length = locations->InAt(4);
- Register temp1 = locations->GetTemp(0).AsRegister<Register>();
- Register temp2 = locations->GetTemp(1).AsRegister<Register>();
- Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Location temp2_loc = locations->GetTemp(1);
+ Register temp2 = temp2_loc.AsRegister<Register>();
+ Location temp3_loc = locations->GetTemp(2);
+ Register temp3 = temp3_loc.AsRegister<Register>();
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
Label conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -1465,7 +1562,7 @@
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ cmp(src, ShifterOperand(dest));
- __ b(slow_path->GetEntryLabel(), EQ);
+ __ b(intrinsic_slow_path->GetEntryLabel(), EQ);
}
// Checked when building locations.
@@ -1477,7 +1574,7 @@
__ b(&conditions_on_positions_validated, NE);
}
__ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant));
- __ b(slow_path->GetEntryLabel(), GT);
+ __ b(intrinsic_slow_path->GetEntryLabel(), GT);
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -1490,19 +1587,19 @@
} else {
__ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos.AsRegister<Register>()));
}
- __ b(slow_path->GetEntryLabel(), LT);
+ __ b(intrinsic_slow_path->GetEntryLabel(), LT);
}
__ Bind(&conditions_on_positions_validated);
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
- __ CompareAndBranchIfZero(src, slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
- __ CompareAndBranchIfZero(dest, slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
}
// If the length is negative, bail out.
@@ -1511,7 +1608,7 @@
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
__ cmp(length.AsRegister<Register>(), ShifterOperand(0));
- __ b(slow_path->GetEntryLabel(), LT);
+ __ b(intrinsic_slow_path->GetEntryLabel(), LT);
}
// Validity checks: source.
@@ -1519,7 +1616,7 @@
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -1528,7 +1625,7 @@
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
@@ -1537,112 +1634,287 @@
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
- __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
- bool did_unpoison = false;
- if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
- !optimizations.GetSourceIsNonPrimitiveArray()) {
- // One or two of the references need to be unpoisoned. Unpoison them
- // both to make the identity check valid.
- __ MaybeUnpoisonHeapReference(temp1);
- __ MaybeUnpoisonHeapReference(temp2);
- did_unpoison = true;
- }
- if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
- // Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
- __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp3);
- __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
- }
-
- if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp2->component_type_
- __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
- __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp3);
- __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
- }
-
- __ cmp(temp1, ShifterOperand(temp2));
-
- if (optimizations.GetDestinationIsTypedObjectArray()) {
- Label do_copy;
- __ b(&do_copy, EQ);
- if (!did_unpoison) {
- __ MaybeUnpoisonHeapReference(temp1);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp1, temp1, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
}
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
- // No need to unpoison the result, we're comparing against null.
- __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
- __ Bind(&do_copy);
+
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ //
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp2, temp2, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+ // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+ __ cmp(temp1, ShifterOperand(temp2));
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ Label do_copy;
+ __ b(&do_copy, EQ);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ b(intrinsic_slow_path->GetEntryLabel(), NE);
+ }
} else {
- __ b(slow_path->GetEntryLabel(), NE);
+ // Non read barrier code.
+
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
+ bool did_unpoison = false;
+ if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+ !optimizations.GetSourceIsNonPrimitiveArray()) {
+ // One or two of the references need to be unpoisoned. Unpoison them
+ // both to make the identity check valid.
+ __ MaybeUnpoisonHeapReference(temp1);
+ __ MaybeUnpoisonHeapReference(temp2);
+ did_unpoison = true;
+ }
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp2->component_type_
+ __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ __ cmp(temp1, ShifterOperand(temp2));
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ Label do_copy;
+ __ b(&do_copy, EQ);
+ if (!did_unpoison) {
+ __ MaybeUnpoisonHeapReference(temp1);
+ }
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+ __ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+ // No need to unpoison the result, we're comparing against null.
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ b(intrinsic_slow_path->GetEntryLabel(), NE);
+ }
}
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
- __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp3);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp3` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
+ __ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp3);
+ }
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
__ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
}
- // Compute base source address, base destination address, and end source address.
-
int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+ uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ // Compute the base source address in `temp1`.
if (src_pos.IsConstant()) {
int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
__ AddConstant(temp1, src, element_size * constant + offset);
} else {
- __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, 2));
+ __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, element_size_shift));
__ AddConstant(temp1, offset);
}
- if (dest_pos.IsConstant()) {
- int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
- __ AddConstant(temp2, dest, element_size * constant + offset);
- } else {
- __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, 2));
- __ AddConstant(temp2, offset);
- }
-
+ // Compute the end source address in `temp3`.
if (length.IsConstant()) {
int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
__ AddConstant(temp3, temp1, element_size * constant);
} else {
- __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, 2));
+ __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, element_size_shift));
}
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- Label loop, done;
- __ cmp(temp1, ShifterOperand(temp3));
- __ b(&done, EQ);
- __ Bind(&loop);
- __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
- __ str(IP, Address(temp2, element_size, Address::PostIndex));
- __ cmp(temp1, ShifterOperand(temp3));
- __ b(&loop, NE);
- __ Bind(&done);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // The base destination address is computed later, as `temp2` is
+ // used for intermediate computations.
+
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ Label loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&done, EQ);
+
+ // /* int32_t */ monitor = src->monitor_
+ __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset);
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `src` is unchanged by this operation, but its value now depends
+ // on `temp2`.
+ __ add(src, src, ShifterOperand(temp2, LSR, 32));
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCode* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke);
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
+ // Carry flag is the last bit shifted out by LSRS.
+ __ b(read_barrier_slow_path->GetEntryLabel(), CS);
+
+ // Fast-path copy.
+
+ // Compute the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConstant(temp2, dest, element_size * constant + offset);
+ } else {
+ __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+ __ AddConstant(temp2, offset);
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
+ __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+ __ str(IP, Address(temp2, element_size, Address::PostIndex));
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&loop, NE);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
+ } else {
+ // Non read barrier code.
+
+ // Compute the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConstant(temp2, dest, element_size * constant + offset);
+ } else {
+ __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+ __ AddConstant(temp2, offset);
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ Label loop, done;
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&done, EQ);
+ __ Bind(&loop);
+ __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+ __ str(IP, Address(temp2, element_size, Address::PostIndex));
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&loop, NE);
+ __ Bind(&done);
+ }
// We only need one card marking on the destination array.
codegen_->MarkGCCard(temp1,
@@ -1651,7 +1923,7 @@
Register(kNoRegister),
/* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 9cfe3ce..91374b3 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -144,6 +144,73 @@
DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64);
};
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+ ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
+ : SlowPathCodeARM64(instruction), tmp_(tmp) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+ CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+
+ Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
+ Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
+ Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
+ Register tmp_reg = WRegisterFrom(tmp_);
+
+ __ Bind(GetEntryLabel());
+ vixl::aarch64::Label slow_copy_loop;
+ __ Bind(&slow_copy_loop);
+ __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
+ codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
+ // TODO: Inline the mark bit check before calling the runtime?
+ // tmp_reg = ReadBarrier::Mark(tmp_reg);
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
+ // explanations.)
+ DCHECK_NE(tmp_.reg(), LR);
+ DCHECK_NE(tmp_.reg(), WSP);
+ DCHECK_NE(tmp_.reg(), WZR);
+ // IP0 is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary (and not preserved). It thus cannot be used by
+ // any live register in this slow path.
+ DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
+ DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
+ DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
+ DCHECK_NE(tmp_.reg(), IP0);
+ DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
+ // This runtime call does not require a stack map.
+ codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
+ __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&slow_copy_loop, ne);
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
+
+ private:
+ Location tmp_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
+};
#undef __
bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
@@ -2035,9 +2102,9 @@
// We want to use two temporary registers in order to reduce the register pressure in arm64.
// So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2090,12 +2157,20 @@
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Temporary register IP0, obtained from the VIXL scratch register
+ // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
+ // (because that register is clobbered by ReadBarrierMarkRegX
+ // entry points). Get an extra temporary register from the
+ // register allocator.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
MacroAssembler* masm = GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -2104,6 +2179,7 @@
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
Register src = XRegisterFrom(locations->InAt(0));
Location src_pos = locations->InAt(1);
@@ -2111,10 +2187,12 @@
Location dest_pos = locations->InAt(3);
Location length = locations->InAt(4);
Register temp1 = WRegisterFrom(locations->GetTemp(0));
+ Location temp1_loc = LocationFrom(temp1);
Register temp2 = WRegisterFrom(locations->GetTemp(1));
+ Location temp2_loc = LocationFrom(temp2);
- SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCodeARM64* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
vixl::aarch64::Label conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -2130,7 +2208,7 @@
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ Cmp(src, dest);
- __ B(slow_path->GetEntryLabel(), eq);
+ __ B(intrinsic_slow_path->GetEntryLabel(), eq);
}
// Checked when building locations.
DCHECK(!optimizations.GetDestinationIsSource()
@@ -2141,7 +2219,7 @@
__ B(&conditions_on_positions_validated, ne);
}
__ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
- __ B(slow_path->GetEntryLabel(), gt);
+ __ B(intrinsic_slow_path->GetEntryLabel(), gt);
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -2150,19 +2228,19 @@
}
__ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
- __ B(slow_path->GetEntryLabel(), lt);
+ __ B(intrinsic_slow_path->GetEntryLabel(), lt);
}
__ Bind(&conditions_on_positions_validated);
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
- __ Cbz(src, slow_path->GetEntryLabel());
+ __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
- __ Cbz(dest, slow_path->GetEntryLabel());
+ __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
}
// We have already checked in the LocationsBuilder for the constant case.
@@ -2170,17 +2248,17 @@
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
// If the length is negative, bail out.
- __ Tbnz(WRegisterFrom(length), kWRegSize - 1, slow_path->GetEntryLabel());
+ __ Tbnz(WRegisterFrom(length), kWRegSize - 1, intrinsic_slow_path->GetEntryLabel());
// If the length >= 128 then (currently) prefer native implementation.
__ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
- __ B(slow_path->GetEntryLabel(), ge);
+ __ B(intrinsic_slow_path->GetEntryLabel(), ge);
}
// Validity checks: source.
CheckSystemArrayCopyPosition(masm,
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -2189,90 +2267,236 @@
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
{
// We use a block to end the scratch scope before the write barrier, thus
// freeing the temporary registers so they can be used in `MarkGCCard`.
UseScratchRegisterScope temps(masm);
+ // Note: Because it is acquired from VIXL's scratch register pool,
+ // `temp3` might be IP0, and thus cannot be used as `ref` argument
+ // of CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
+ // calls below (see ReadBarrierMarkSlowPathARM64 for more details).
Register temp3 = temps.AcquireW();
+
if (!optimizations.GetDoesNotNeedTypeCheck()) {
// Check whether all elements of the source array are assignable to the component
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- __ Ldr(temp1, MemOperand(dest, class_offset));
- __ Ldr(temp2, MemOperand(src, class_offset));
- bool did_unpoison = false;
- if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
- !optimizations.GetSourceIsNonPrimitiveArray()) {
- // One or two of the references need to be unpoisoned. Unpoison them
- // both to make the identity check valid.
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
- did_unpoison = true;
- }
- if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
- // Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ Ldr(temp3, HeapOperand(temp1, component_offset));
- __ Cbz(temp3, slow_path->GetEntryLabel());
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
- __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, slow_path->GetEntryLabel());
- }
-
- if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp2->component_type_
- __ Ldr(temp3, HeapOperand(temp2, component_offset));
- __ Cbz(temp3, slow_path->GetEntryLabel());
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
- __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, slow_path->GetEntryLabel());
- }
-
- __ Cmp(temp1, temp2);
-
- if (optimizations.GetDestinationIsTypedObjectArray()) {
- vixl::aarch64::Label do_copy;
- __ B(&do_copy, eq);
- if (!did_unpoison) {
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ src.W(),
+ class_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ temp1,
+ component_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
+ __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
}
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ Ldr(temp1, HeapOperand(temp1, component_offset));
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- __ Ldr(temp1, HeapOperand(temp1, super_offset));
- // No need to unpoison the result, we're comparing against null.
- __ Cbnz(temp1, slow_path->GetEntryLabel());
- __ Bind(&do_copy);
+
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ dest.W(),
+ class_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ //
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp2_loc,
+ temp1,
+ component_offset,
+ temp3,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+ __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp2_loc,
+ src.W(),
+ class_offset,
+ temp3,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+ __ Cmp(temp1, temp2);
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ vixl::aarch64::Label do_copy;
+ __ B(&do_copy, eq);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ temp1,
+ component_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ Ldr(temp1, HeapOperand(temp1, super_offset));
+ __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ B(intrinsic_slow_path->GetEntryLabel(), ne);
+ }
} else {
- __ B(slow_path->GetEntryLabel(), ne);
+ // Non read barrier code.
+
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ Ldr(temp1, MemOperand(dest, class_offset));
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ __ Ldr(temp2, MemOperand(src, class_offset));
+ bool did_unpoison = false;
+ if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+ !optimizations.GetSourceIsNonPrimitiveArray()) {
+ // One or two of the references need to be unpoisoned. Unpoison them
+ // both to make the identity check valid.
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+ did_unpoison = true;
+ }
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ __ Ldr(temp3, HeapOperand(temp1, component_offset));
+ __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp2->component_type_
+ __ Ldr(temp3, HeapOperand(temp2, component_offset));
+ __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ __ Cmp(temp1, temp2);
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ vixl::aarch64::Label do_copy;
+ __ B(&do_copy, eq);
+ if (!did_unpoison) {
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ }
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ Ldr(temp1, HeapOperand(temp1, component_offset));
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ Ldr(temp1, HeapOperand(temp1, super_offset));
+ // No need to unpoison the result, we're comparing against null.
+ __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ B(intrinsic_slow_path->GetEntryLabel(), ne);
+ }
}
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ Ldr(temp1, HeapOperand(src.W(), class_offset));
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ Ldr(temp3, HeapOperand(temp1, component_offset));
- __ Cbz(temp3, slow_path->GetEntryLabel());
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
- __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ src.W(),
+ class_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp2_loc,
+ temp1,
+ component_offset,
+ temp3,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ Ldr(temp1, HeapOperand(src.W(), class_offset));
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ __ Ldr(temp2, HeapOperand(temp1, component_offset));
+ __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+ }
+ // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+ __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, slow_path->GetEntryLabel());
+ __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
}
Register src_curr_addr = temp1.X();
Register dst_curr_addr = temp2.X();
- Register src_stop_addr = temp3.X();
+ Register src_stop_addr;
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Temporary register IP0, obtained from the VIXL scratch
+ // register pool as `temp3`, cannot be used in
+ // ReadBarrierSystemArrayCopySlowPathARM64 (because that
+ // register is clobbered by ReadBarrierMarkRegX entry points).
+ // So another temporary register allocated by the register
+ // allocator instead.
+ DCHECK_EQ(LocationFrom(temp3).reg(), IP0);
+ src_stop_addr = XRegisterFrom(locations->GetTemp(2));
+ } else {
+ src_stop_addr = temp3.X();
+ }
GenSystemArrayCopyAddresses(masm,
Primitive::kPrimNot,
@@ -2285,25 +2509,98 @@
dst_curr_addr,
src_stop_addr);
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- vixl::aarch64::Label loop, done;
const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
- __ Bind(&loop);
- __ Cmp(src_curr_addr, src_stop_addr);
- __ B(&done, eq);
- {
+
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ vixl::aarch64::Label loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&done, eq);
+
Register tmp = temps.AcquireW();
+ // Make sure `tmp` is not IP0, as it is clobbered by
+ // ReadBarrierMarkRegX entry points in
+ // ReadBarrierSystemArrayCopySlowPathARM64.
+ DCHECK_NE(LocationFrom(tmp).reg(), IP0);
+
+ // /* int32_t */ monitor = src->monitor_
+ __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including rb_state,
+ // to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ // `src` is unchanged by this operation, but its value now depends
+ // on `tmp`.
+ __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCodeARM64* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp));
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
+
+ // Fast-path copy.
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
__ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
__ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&loop, ne);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
+ } else {
+ // Non read barrier code.
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ vixl::aarch64::Label loop, done;
+ __ Bind(&loop);
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&done, eq);
+ {
+ Register tmp = temps.AcquireW();
+ __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
+ __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+ }
+ __ B(&loop);
+ __ Bind(&done);
}
- __ B(&loop);
- __ Bind(&done);
}
// We only need one card marking on the destination array.
codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
static void GenIsInfinite(LocationSummary* locations,
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 22f4181..49d6c19 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -70,6 +70,105 @@
using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
+ public:
+ explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
+ : SlowPathCode(instruction) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+ uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ Register src = locations->InAt(0).AsRegister<Register>();
+ Location src_pos = locations->InAt(1);
+ Register dest = locations->InAt(2).AsRegister<Register>();
+ Location dest_pos = locations->InAt(3);
+ Location length = locations->InAt(4);
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+ Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+
+ __ Bind(GetEntryLabel());
+ // In this code path, registers `temp1`, `temp2`, and `temp3`
+ // (resp.) are not used for the base source address, the base
+ // destination address, and the end source address (resp.), as in
+ // other SystemArrayCopy intrinsic code paths. Instead they are
+ // (resp.) used for:
+ // - the loop index (`i`);
+ // - the source index (`src_index`) and the loaded (source)
+ // reference (`value`); and
+ // - the destination index (`dest_index`).
+
+ // i = 0
+ __ xorl(temp1, temp1);
+ NearLabel loop;
+ __ Bind(&loop);
+ // value = src_array[i + src_pos]
+ if (src_pos.IsConstant()) {
+ int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+ int32_t adjusted_offset = offset + constant * element_size;
+ __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
+ } else {
+ __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
+ __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
+ }
+ __ MaybeUnpoisonHeapReference(temp2);
+ // TODO: Inline the mark bit check before calling the runtime?
+ // value = ReadBarrier::Mark(value)
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
+ // explanations.)
+ DCHECK_NE(temp2, ESP);
+ DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
+ // This runtime call does not require a stack map.
+ x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ __ MaybePoisonHeapReference(temp2);
+ // dest_array[i + dest_pos] = value
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ int32_t adjusted_offset = offset + constant * element_size;
+ __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
+ } else {
+ __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
+ __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
+ }
+ // ++i
+ __ addl(temp1, Immediate(1));
+ // if (i != length) goto loop
+ x86_codegen->GenerateIntCompare(temp1_loc, length);
+ __ j(kNotEqual, &loop);
+ __ jmp(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
+};
+
+#undef __
+
#define __ assembler->
static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
@@ -2678,9 +2777,9 @@
}
void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2710,9 +2809,9 @@
}
void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
X86Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -2721,17 +2820,21 @@
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
Register src = locations->InAt(0).AsRegister<Register>();
Location src_pos = locations->InAt(1);
Register dest = locations->InAt(2).AsRegister<Register>();
Location dest_pos = locations->InAt(3);
- Location length = locations->InAt(4);
- Register temp1 = locations->GetTemp(0).AsRegister<Register>();
- Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+ Location length_arg = locations->InAt(4);
+ Location length = length_arg;
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Location temp2_loc = locations->GetTemp(1);
+ Register temp2 = temp2_loc.AsRegister<Register>();
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
NearLabel conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -2747,7 +2850,7 @@
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ cmpl(src, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -2755,7 +2858,7 @@
__ j(kNotEqual, &conditions_on_positions_validated);
}
__ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
- __ j(kGreater, slow_path->GetEntryLabel());
+ __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -2765,10 +2868,10 @@
if (dest_pos.IsConstant()) {
int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
__ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
} else {
__ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
}
@@ -2777,16 +2880,17 @@
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
__ testl(src, src);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
__ testl(dest, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
- Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+ Location temp3_loc = locations->GetTemp(2);
+ Register temp3 = temp3_loc.AsRegister<Register>();
if (length.IsStackSlot()) {
__ movl(temp3, Address(ESP, length.GetStackIndex()));
length = Location::RegisterLocation(temp3);
@@ -2798,7 +2902,7 @@
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
__ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
// Validity checks: source.
@@ -2806,7 +2910,7 @@
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -2815,7 +2919,7 @@
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
@@ -2824,72 +2928,159 @@
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
+
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- // /* HeapReference<Class> */ temp1 = temp1->klass_
- __ movl(temp1, Address(src, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ movl(temp1, Address(temp1, component_offset));
- __ testl(temp1, temp1);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp1);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ movl(temp1, Address(src, class_offset));
+ __ MaybeUnpoisonHeapReference(temp1);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ movl(temp1, Address(temp1, component_offset));
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp1);
+ }
__ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
- if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
- // /* HeapReference<Class> */ temp1 = temp1->klass_
- __ movl(temp1, Address(dest, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ temp2 = temp1->component_type_
- __ movl(temp2, Address(temp1, component_offset));
- __ testl(temp2, temp2);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp2);
- __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
- // Re-poison the heap reference to make the compare instruction below
- // compare two poisoned references.
- __ PoisonHeapReference(temp1);
- } else {
- // /* HeapReference<Class> */ temp1 = temp1->klass_
- __ movl(temp1, Address(dest, class_offset));
- }
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (length.Equals(Location::RegisterLocation(temp3))) {
+ // When Baker read barriers are enabled, register `temp3`,
+ // which in the present case contains the `length` parameter,
+ // will be overwritten below. Make the `length` location
+ // reference the original stack location; it will be moved
+ // back to `temp3` later if necessary.
+ DCHECK(length_arg.IsStackSlot());
+ length = length_arg;
+ }
- // Note: if poisoning is on, we are here comparing two poisoned references.
- __ cmpl(temp1, Address(src, class_offset));
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
- if (optimizations.GetDestinationIsTypedObjectArray()) {
- NearLabel do_copy;
- __ j(kEqual, &do_copy);
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ movl(temp1, Address(temp1, component_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- __ cmpl(Address(temp1, super_offset), Immediate(0));
- __ j(kNotEqual, slow_path->GetEntryLabel());
- __ Bind(&do_copy);
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ //
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ __ testl(temp2, temp2);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+ // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+ __ cmpl(temp1, temp2);
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ NearLabel do_copy;
+ __ j(kEqual, &do_copy);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ }
} else {
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ // Non read barrier code.
+
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ movl(temp1, Address(dest, class_offset));
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ __ MaybeUnpoisonHeapReference(temp1);
+ // Bail out if the destination is not a non primitive array.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ __ movl(temp2, Address(temp1, component_offset));
+ __ testl(temp2, temp2);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp2);
+ __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ // Re-poison the heap reference to make the compare instruction below
+ // compare two poisoned references.
+ __ PoisonHeapReference(temp1);
+ }
+
+ // Note: if heap poisoning is on, we are comparing two poisoned references here.
+ __ cmpl(temp1, Address(src, class_offset));
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ NearLabel do_copy;
+ __ j(kEqual, &do_copy);
+ __ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ movl(temp1, Address(temp1, component_offset));
+ __ MaybeUnpoisonHeapReference(temp1);
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ }
}
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ movl(temp1, Address(src, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ movl(temp1, Address(temp1, component_offset));
- __ testl(temp1, temp1);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp1);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ movl(temp1, Address(src, class_offset));
+ __ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ movl(temp1, Address(temp1, component_offset));
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp1);
+ }
__ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
- // Compute base source address, base destination address, and end source address.
+ // Compute the base source address in `temp1`.
int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
DCHECK_EQ(element_size, 4);
uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
@@ -2900,35 +3091,136 @@
__ leal(temp1, Address(src, src_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
}
- if (dest_pos.IsConstant()) {
- int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp2, Address(dest, element_size * constant + offset));
- } else {
- __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
- }
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // If it is needed (in the case of the fast-path loop), the base
+ // destination address is computed later, as `temp2` is used for
+ // intermediate computations.
- if (length.IsConstant()) {
- int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp3, Address(temp1, element_size * constant));
- } else {
- __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
- }
+ // Compute the end source address in `temp3`.
+ if (length.IsConstant()) {
+ int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp3, Address(temp1, element_size * constant));
+ } else {
+ if (length.IsStackSlot()) {
+ // Location `length` is again pointing at a stack slot, as
+ // register `temp3` (which was containing the length parameter
+ // earlier) has been overwritten; restore it now
+ DCHECK(length.Equals(length_arg));
+ __ movl(temp3, Address(ESP, length.GetStackIndex()));
+ length = Location::RegisterLocation(temp3);
+ }
+ __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
+ }
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- NearLabel loop, done;
- __ cmpl(temp1, temp3);
- __ j(kEqual, &done);
- __ Bind(&loop);
- __ pushl(Address(temp1, 0));
- __ cfi().AdjustCFAOffset(4);
- __ popl(Address(temp2, 0));
- __ cfi().AdjustCFAOffset(-4);
- __ addl(temp1, Immediate(element_size));
- __ addl(temp2, Immediate(element_size));
- __ cmpl(temp1, temp3);
- __ j(kNotEqual, &loop);
- __ Bind(&done);
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // for (size_t i = 0; i != length; ++i) {
+ // dest_array[dest_pos + i] =
+ // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
+ // }
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ NearLabel loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+
+ // /* int32_t */ monitor = src->monitor_
+ __ movl(temp2, Address(src, monitor_offset));
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Load fence to prevent load-load reordering.
+ // Note that this is a no-op, thanks to the x86 memory model.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCode* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with SHR.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ shrl(temp2, Immediate(LockWord::kReadBarrierStateShift + 1));
+ __ j(kCarrySet, read_barrier_slow_path->GetEntryLabel());
+
+ // Fast-path copy.
+
+ // Set the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp2, Address(dest, element_size * constant + offset));
+ } else {
+ __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
+ __ pushl(Address(temp1, 0));
+ __ cfi().AdjustCFAOffset(4);
+ __ popl(Address(temp2, 0));
+ __ cfi().AdjustCFAOffset(-4);
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
+ } else {
+ // Non read barrier code.
+
+ // Compute the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp2, Address(dest, element_size * constant + offset));
+ } else {
+ __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+ }
+
+ // Compute the end source address in `temp3`.
+ if (length.IsConstant()) {
+ int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp3, Address(temp1, element_size * constant));
+ } else {
+ __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ NearLabel loop, done;
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+ __ Bind(&loop);
+ __ pushl(Address(temp1, 0));
+ __ cfi().AdjustCFAOffset(4);
+ __ popl(Address(temp2, 0));
+ __ cfi().AdjustCFAOffset(-4);
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+ __ Bind(&done);
+ }
// We only need one card marking on the destination array.
codegen_->MarkGCCard(temp1,
@@ -2937,7 +3229,7 @@
Register(kNoRegister),
/* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index ab8b05c..311e1cd 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -64,6 +64,65 @@
using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
+ public:
+ explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
+ : SlowPathCode(instruction) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+
+ CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
+ CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
+
+ __ Bind(GetEntryLabel());
+ NearLabel loop;
+ __ Bind(&loop);
+ __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ // TODO: Inline the mark bit check before calling the runtime?
+ // TMP = ReadBarrier::Mark(TMP);
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
+ // This runtime call does not require a stack map.
+ x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ __ MaybePoisonHeapReference(CpuRegister(TMP));
+ __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
+ __ addl(src_curr_addr, Immediate(element_size));
+ __ addl(dst_curr_addr, Immediate(element_size));
+ __ cmpl(src_curr_addr, src_stop_addr);
+ __ j(kNotEqual, &loop);
+ __ jmp(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
+};
+
+#undef __
+
#define __ assembler->
static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -1053,9 +1112,9 @@
void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -1063,9 +1122,9 @@
}
void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
X86_64Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -1074,18 +1133,23 @@
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
Location src_pos = locations->InAt(1);
CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
Location dest_pos = locations->InAt(3);
Location length = locations->InAt(4);
- CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
- CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
- CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>();
+ Location temp1_loc = locations->GetTemp(0);
+ CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
+ Location temp2_loc = locations->GetTemp(1);
+ CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
+ Location temp3_loc = locations->GetTemp(2);
+ CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
+ Location TMP_loc = Location::RegisterLocation(TMP);
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
NearLabel conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -1101,7 +1165,7 @@
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ cmpl(src, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -1109,7 +1173,7 @@
__ j(kNotEqual, &conditions_on_positions_validated);
}
__ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
- __ j(kGreater, slow_path->GetEntryLabel());
+ __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -1119,10 +1183,10 @@
if (dest_pos.IsConstant()) {
int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
__ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
} else {
__ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
}
@@ -1131,13 +1195,13 @@
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
__ testl(src, src);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
__ testl(dest, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
// If the length is negative, bail out.
@@ -1146,7 +1210,7 @@
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
__ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
// Validity checks: source.
@@ -1154,7 +1218,7 @@
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -1163,7 +1227,7 @@
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
@@ -1172,38 +1236,80 @@
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- __ movl(temp1, Address(dest, class_offset));
- __ movl(temp2, Address(src, class_offset));
+
bool did_unpoison = false;
- if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
- !optimizations.GetSourceIsNonPrimitiveArray()) {
- // One or two of the references need to be unpoisoned. Unpoison them
- // both to make the identity check valid.
- __ MaybeUnpoisonHeapReference(temp1);
- __ MaybeUnpoisonHeapReference(temp2);
- did_unpoison = true;
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, dest, class_offset, temp3_loc, /* needs_null_check */ false);
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+ // If heap poisoning is enabled, `temp1` and `temp2` have been
+ // unpoisoned by the the previous calls to
+ // GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ movl(temp1, Address(dest, class_offset));
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ __ movl(temp2, Address(src, class_offset));
+ if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+ !optimizations.GetSourceIsNonPrimitiveArray()) {
+ // One or two of the references need to be unpoisoned. Unpoison them
+ // both to make the identity check valid.
+ __ MaybeUnpoisonHeapReference(temp1);
+ __ MaybeUnpoisonHeapReference(temp2);
+ did_unpoison = true;
+ }
}
if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
// Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ TMP = temp1->component_type_
- __ movl(CpuRegister(TMP), Address(temp1, component_offset));
- __ testl(CpuRegister(TMP), CpuRegister(TMP));
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, TMP_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `TMP` has been unpoisoned by
+ // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ }
__ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ TMP = temp2->component_type_
- __ movl(CpuRegister(TMP), Address(temp2, component_offset));
- __ testl(CpuRegister(TMP), CpuRegister(TMP));
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ TMP = temp2->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, TMP_loc, temp2, component_offset, temp3_loc, /* needs_null_check */ false);
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `TMP` has been unpoisoned by
+ // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ TMP = temp2->component_type_
+ __ movl(CpuRegister(TMP), Address(temp2, component_offset));
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ }
__ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
__ cmpl(temp1, temp2);
@@ -1211,34 +1317,56 @@
if (optimizations.GetDestinationIsTypedObjectArray()) {
NearLabel do_copy;
__ j(kEqual, &do_copy);
- if (!did_unpoison) {
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
+ } else {
+ if (!did_unpoison) {
+ __ MaybeUnpoisonHeapReference(temp1);
+ }
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ movl(temp1, Address(temp1, component_offset));
__ MaybeUnpoisonHeapReference(temp1);
+ // No need to unpoison the following heap reference load, as
+ // we're comparing against null.
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
}
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ movl(temp1, Address(temp1, component_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- __ movl(temp1, Address(temp1, super_offset));
- // No need to unpoison the result, we're comparing against null.
- __ testl(temp1, temp1);
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
__ Bind(&do_copy);
} else {
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ movl(temp1, Address(src, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ TMP = temp1->component_type_
- __ movl(CpuRegister(TMP), Address(temp1, component_offset));
- __ testl(CpuRegister(TMP), CpuRegister(TMP));
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, TMP_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ movl(temp1, Address(src, class_offset));
+ __ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+ // No need to unpoison `TMP` now, as we're comparing against null.
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ }
__ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
// Compute base source address, base destination address, and end source address.
@@ -1266,19 +1394,86 @@
__ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0));
}
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- NearLabel loop, done;
- __ cmpl(temp1, temp3);
- __ j(kEqual, &done);
- __ Bind(&loop);
- __ movl(CpuRegister(TMP), Address(temp1, 0));
- __ movl(Address(temp2, 0), CpuRegister(TMP));
- __ addl(temp1, Immediate(element_size));
- __ addl(temp2, Immediate(element_size));
- __ cmpl(temp1, temp3);
- __ j(kNotEqual, &loop);
- __ Bind(&done);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ NearLabel loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+
+ // /* int32_t */ monitor = src->monitor_
+ __ movl(CpuRegister(TMP), Address(src, monitor_offset));
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Load fence to prevent load-load reordering.
+ // Note that this is a no-op, thanks to the x86-64 memory model.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCode* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with SHR.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ shrl(CpuRegister(TMP), Immediate(LockWord::kReadBarrierStateShift + 1));
+ __ j(kCarrySet, read_barrier_slow_path->GetEntryLabel());
+
+ // Fast-path copy.
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
+ __ movl(CpuRegister(TMP), Address(temp1, 0));
+ __ movl(Address(temp2, 0), CpuRegister(TMP));
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
+ } else {
+ // Non read barrier code.
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ NearLabel loop, done;
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+ __ Bind(&loop);
+ __ movl(CpuRegister(TMP), Address(temp1, 0));
+ __ movl(Address(temp2, 0), CpuRegister(TMP));
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+ __ Bind(&done);
+ }
// We only need one card marking on the destination array.
codegen_->MarkGCCard(temp1,
@@ -1287,7 +1482,7 @@
CpuRegister(kNoRegister),
/* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 86548e1..c52a5a9 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -908,6 +908,12 @@
// reg = -reg.
rsb(reg, reg, ShifterOperand(0));
}
+ // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+ void MaybePoisonHeapReference(Register reg) {
+ if (kPoisonHeapReferences) {
+ PoisonHeapReference(reg);
+ }
+ }
// Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
void MaybeUnpoisonHeapReference(Register reg) {
if (kPoisonHeapReferences) {
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 19450b3..f91bcfa 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -146,6 +146,12 @@
___ Neg(reg, Operand(reg));
}
+void Arm64Assembler::MaybePoisonHeapReference(Register reg) {
+ if (kPoisonHeapReferences) {
+ PoisonHeapReference(reg);
+ }
+}
+
void Arm64Assembler::MaybeUnpoisonHeapReference(Register reg) {
if (kPoisonHeapReferences) {
UnpoisonHeapReference(reg);
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 2847cb8..66a7fed 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -93,6 +93,8 @@
void PoisonHeapReference(vixl::aarch64::Register reg);
// Unpoison a heap reference contained in `reg`.
void UnpoisonHeapReference(vixl::aarch64::Register reg);
+ // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+ void MaybePoisonHeapReference(vixl::aarch64::Register reg);
// Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
void MaybeUnpoisonHeapReference(vixl::aarch64::Register reg);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 92a92a5..63aa4a4 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -639,6 +639,12 @@
void PoisonHeapReference(Register reg) { negl(reg); }
// Unpoison a heap reference contained in `reg`.
void UnpoisonHeapReference(Register reg) { negl(reg); }
+ // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+ void MaybePoisonHeapReference(Register reg) {
+ if (kPoisonHeapReferences) {
+ PoisonHeapReference(reg);
+ }
+ }
// Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
void MaybeUnpoisonHeapReference(Register reg) {
if (kPoisonHeapReferences) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 370f49c..a4166f9 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -741,6 +741,12 @@
void PoisonHeapReference(CpuRegister reg) { negl(reg); }
// Unpoison a heap reference contained in `reg`.
void UnpoisonHeapReference(CpuRegister reg) { negl(reg); }
+ // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+ void MaybePoisonHeapReference(CpuRegister reg) {
+ if (kPoisonHeapReferences) {
+ PoisonHeapReference(reg);
+ }
+ }
// Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
void MaybeUnpoisonHeapReference(CpuRegister reg) {
if (kPoisonHeapReferences) {