Re-enable the ArraySet fast path with Baker read barriers.
Benchmarks (ARM64) score variations on Nexus 5X with CPU
cores clamped at 960000 Hz (aosp_bullhead-userdebug build):
- Ritzperf - average (lower is better): -0.95% (virtually unchanged)
- CaffeineMark - average (higher is better): +2.50% (slightly better)
- DeltaBlue (lower is better): -0.55% (virtually unchanged)
- Richards - average (lower is better): +0.67% (virtually unchanged)
- SciMark2 - average (higher is better): -0.10% (virtually unchanged)
Details about Ritzperf benchmarks with meaningful variations
(lower is better):
- GenericCalcActions.MemAllocTest: -5.05% (better)
Details about CaffeineMark benchmarks with meaningful variations
(higher is better):
- Method: +16.88% (better)
Details about Richards benchmarks with meaningful variations
(lower is better):
- deutsch_acc_interface: +9.86% (worse)
Boot image code size variation on Nexus 5X
(aosp_bullhead-userdebug build):
- total ARM64 framework Oat files size change:
105933472 bytes -> 106027680 bytes (+0.09%)
- total ARM framework Oat files size change:
89157936 bytes -> 89239856 bytes (+0.09%)
Test: ART host and target (ARM, ARM64) tests.
Bug: 29516974
Bug: 29506760
Bug: 12687968
Change-Id: Ib9e9709712295e17804b8888ac10e3d518ff2e70
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index f50eb5c..e8740a5 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -460,6 +460,7 @@
DCHECK(instruction_->IsInstanceFieldGet() ||
instruction_->IsStaticFieldGet() ||
instruction_->IsArrayGet() ||
+ instruction_->IsArraySet() ||
instruction_->IsLoadClass() ||
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
@@ -5279,6 +5280,7 @@
}
if (needs_write_barrier) {
// Temporary registers for the write barrier.
+ // These registers may be used for Baker read barriers too.
locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
// Ensure the card is in a byte register.
locations->AddTemp(Location::RegisterLocation(ECX));
@@ -5349,9 +5351,13 @@
DCHECK(needs_write_barrier);
Register register_value = value.AsRegister<Register>();
- NearLabel done, not_null, do_put;
+ // We cannot use a NearLabel for `done`, as its range may be too
+ // short when Baker read barriers are enabled.
+ Label done;
+ NearLabel not_null, do_put;
SlowPathCode* slow_path = nullptr;
- Register temp = locations->GetTemp(0).AsRegister<Register>();
+ Location temp_loc = locations->GetTemp(0);
+ Register temp = temp_loc.AsRegister<Register>();
if (may_need_runtime_call_for_type_check) {
slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86(instruction);
codegen_->AddSlowPath(slow_path);
@@ -5365,33 +5371,77 @@
}
if (kEmitCompilerReadBarrier) {
- // When read barriers are enabled, the type checking
- // instrumentation requires two read barriers:
- //
- // __ movl(temp2, temp);
- // // /* HeapReference<Class> */ temp = temp->component_type_
- // __ movl(temp, Address(temp, component_offset));
- // codegen_->GenerateReadBarrierSlow(
- // instruction, temp_loc, temp_loc, temp2_loc, component_offset);
- //
- // // /* HeapReference<Class> */ temp2 = register_value->klass_
- // __ movl(temp2, Address(register_value, class_offset));
- // codegen_->GenerateReadBarrierSlow(
- // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
- //
- // __ cmpl(temp, temp2);
- //
- // However, the second read barrier may trash `temp`, as it
- // is a temporary register, and as such would not be saved
- // along with live registers before calling the runtime (nor
- // restored afterwards). So in this case, we bail out and
- // delegate the work to the array set slow path.
- //
- // TODO: Extend the register allocator to support a new
- // "(locally) live temp" location so as to avoid always
- // going into the slow path when read barriers are enabled.
- __ jmp(slow_path->GetEntryLabel());
+ if (!kUseBakerReadBarrier) {
+ // When (non-Baker) read barriers are enabled, the type
+ // checking instrumentation requires two read barriers
+ // generated by CodeGeneratorX86::GenerateReadBarrierSlow:
+ //
+ // __ movl(temp2, temp);
+ // // /* HeapReference<Class> */ temp = temp->component_type_
+ // __ movl(temp, Address(temp, component_offset));
+ // codegen_->GenerateReadBarrierSlow(
+ // instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+ //
+ // // /* HeapReference<Class> */ temp2 = register_value->klass_
+ // __ movl(temp2, Address(register_value, class_offset));
+ // codegen_->GenerateReadBarrierSlow(
+ // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
+ //
+ // __ cmpl(temp, temp2);
+ //
+ // However, the second read barrier may trash `temp`, as it
+ // is a temporary register, and as such would not be saved
+ // along with live registers before calling the runtime (nor
+ // restored afterwards). So in this case, we bail out and
+ // delegate the work to the array set slow path.
+ //
+ // TODO: Extend the register allocator to support a new
+ // "(locally) live temp" location so as to avoid always
+ // going into the slow path when read barriers are enabled?
+ //
+ // There is no such problem with Baker read barriers (see below).
+ __ jmp(slow_path->GetEntryLabel());
+ } else {
+ Location temp2_loc = locations->GetTemp(1);
+ Register temp2 = temp2_loc.AsRegister<Register>();
+ // /* HeapReference<Class> */ temp = array->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, temp_loc, array, class_offset, /* needs_null_check */ true);
+
+ // /* HeapReference<Class> */ temp = temp->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, temp_loc, temp, component_offset, /* needs_null_check */ false);
+ // Register `temp` is not trashed by the read barrier
+ // emitted by GenerateFieldLoadWithBakerReadBarrier below,
+ // as that method produces a call to a ReadBarrierMarkRegX
+ // entry point, which saves all potentially live registers,
+ // including temporaries such a `temp`.
+ // /* HeapReference<Class> */ temp2 = register_value->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, temp2_loc, register_value, class_offset, /* needs_null_check */ false);
+ // If heap poisoning is enabled, `temp` and `temp2` have
+ // been unpoisoned by the the previous calls to
+ // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier.
+ __ cmpl(temp, temp2);
+
+ if (instruction->StaticTypeOfArrayIsObjectArray()) {
+ __ j(kEqual, &do_put);
+ // We do not need to emit a read barrier for the
+ // following heap reference load, as `temp` is only used
+ // in a comparison with null below, and this reference
+ // is not kept afterwards. Also, if heap poisoning is
+ // enabled, there is no need to unpoison that heap
+ // reference for the same reason (comparison with null).
+ __ cmpl(Address(temp, super_offset), Immediate(0));
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ Bind(&do_put);
+ } else {
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ }
+ }
} else {
+ // Non read barrier code.
+
// /* HeapReference<Class> */ temp = array->klass_
__ movl(temp, Address(array, class_offset));
codegen_->MaybeRecordImplicitNullCheck(instruction);
@@ -5410,11 +5460,10 @@
// not been unpoisoned yet; unpoison it now.
__ MaybeUnpoisonHeapReference(temp);
- // /* HeapReference<Class> */ temp = temp->super_class_
- __ movl(temp, Address(temp, super_offset));
- // If heap poisoning is enabled, no need to unpoison
- // `temp`, as we are comparing against null below.
- __ testl(temp, temp);
+ // If heap poisoning is enabled, no need to unpoison the
+ // heap reference loaded below, as it is only used for a
+ // comparison with null.
+ __ cmpl(Address(temp, super_offset), Immediate(0));
__ j(kNotEqual, slow_path->GetEntryLabel());
__ Bind(&do_put);
} else {