Remove unused DMB code paths in the ARM64 Optimizing Compiler
Currently all ARM64 CPUs will be using the acquire-release code paths.
This patch removes the instruction set feature PreferAcquireRelease()
as well as all the unused DMB code paths.
Change-Id: I61c320d6d685f96c9e260f25eac3593907793830
Signed-off-by: Serban Constantinescu <serban.constantinescu@linaro.org>
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index c0e3959..d56de0b 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1556,21 +1556,13 @@
UseScratchRegisterScope temps(GetVIXLAssembler());
Register temp = temps.AcquireW();
size_t status_offset = mirror::Class::StatusOffset().SizeValue();
- bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
// Even if the initialized flag is set, we need to ensure consistent memory ordering.
- if (use_acquire_release) {
- // TODO(vixl): Let the MacroAssembler handle MemOperand.
- __ Add(temp, class_reg, status_offset);
- __ Ldar(temp, HeapOperand(temp));
- __ Cmp(temp, mirror::Class::kStatusInitialized);
- __ B(lt, slow_path->GetEntryLabel());
- } else {
- __ Ldr(temp, HeapOperand(class_reg, status_offset));
- __ Cmp(temp, mirror::Class::kStatusInitialized);
- __ B(lt, slow_path->GetEntryLabel());
- __ Dmb(InnerShareable, BarrierReads);
- }
+ // TODO(vixl): Let the MacroAssembler handle MemOperand.
+ __ Add(temp, class_reg, status_offset);
+ __ Ldar(temp, HeapOperand(temp));
+ __ Cmp(temp, mirror::Class::kStatusInitialized);
+ __ B(lt, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -1716,9 +1708,7 @@
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
Primitive::Type field_type = field_info.GetFieldType();
BlockPoolsScope block_pools(GetVIXLAssembler());
-
MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
- bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// Object FieldGet with Baker's read barrier case.
@@ -1736,26 +1726,15 @@
offset,
temp,
/* needs_null_check */ true,
- field_info.IsVolatile() && use_acquire_release);
- if (field_info.IsVolatile() && !use_acquire_release) {
- // For IRIW sequential consistency kLoadAny is not sufficient.
- codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
- }
+ field_info.IsVolatile());
} else {
// General case.
if (field_info.IsVolatile()) {
- if (use_acquire_release) {
- // Note that a potential implicit null check is handled in this
- // CodeGeneratorARM64::LoadAcquire call.
- // NB: LoadAcquire will record the pc info if needed.
- codegen_->LoadAcquire(
- instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
- } else {
- codegen_->Load(field_type, OutputCPURegister(instruction), field);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- // For IRIW sequential consistency kLoadAny is not sufficient.
- codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
- }
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorARM64::LoadAcquire call.
+ // NB: LoadAcquire will record the pc info if needed.
+ codegen_->LoadAcquire(
+ instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
} else {
codegen_->Load(field_type, OutputCPURegister(instruction), field);
codegen_->MaybeRecordImplicitNullCheck(instruction);
@@ -1791,7 +1770,6 @@
CPURegister source = value;
Offset offset = field_info.GetFieldOffset();
Primitive::Type field_type = field_info.GetFieldType();
- bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
{
// We use a block to end the scratch scope before the write barrier, thus
@@ -1807,15 +1785,8 @@
}
if (field_info.IsVolatile()) {
- if (use_acquire_release) {
- codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- } else {
- codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
- codegen_->Store(field_type, source, HeapOperand(obj, offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
- }
+ codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
} else {
codegen_->Store(field_type, source, HeapOperand(obj, offset));
codegen_->MaybeRecordImplicitNullCheck(instruction);
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index c888f01..dbd7688 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -750,7 +750,6 @@
Register offset = XRegisterFrom(offset_loc); // Long offset.
Location trg_loc = locations->Out();
Register trg = RegisterFrom(trg_loc, type);
- bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
@@ -758,19 +757,11 @@
Register temp = temps.AcquireW();
codegen->GenerateArrayLoadWithBakerReadBarrier(
invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
- if (is_volatile && !use_acquire_release) {
- __ Dmb(InnerShareable, BarrierReads);
- }
} else {
// Other cases.
MemOperand mem_op(base.X(), offset);
if (is_volatile) {
- if (use_acquire_release) {
- codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
- } else {
- codegen->Load(type, trg, mem_op);
- __ Dmb(InnerShareable, BarrierReads);
- }
+ codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
} else {
codegen->Load(type, trg, mem_op);
}
@@ -884,8 +875,6 @@
Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
Register value = RegisterFrom(locations->InAt(3), type);
Register source = value;
- bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
-
MemOperand mem_op(base.X(), offset);
{
@@ -902,15 +891,7 @@
}
if (is_volatile || is_ordered) {
- if (use_acquire_release) {
- codegen->StoreRelease(type, source, mem_op);
- } else {
- __ Dmb(InnerShareable, BarrierAll);
- codegen->Store(type, source, mem_op);
- if (is_volatile) {
- __ Dmb(InnerShareable, BarrierReads);
- }
- }
+ codegen->StoreRelease(type, source, mem_op);
} else {
codegen->Store(type, source, mem_op);
}
@@ -1000,7 +981,6 @@
}
static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) {
- bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
Register out = WRegisterFrom(locations->Out()); // Boolean result.
@@ -1036,43 +1016,20 @@
// result = tmp_value != 0;
vixl::Label loop_head, exit_loop;
- if (use_acquire_release) {
- __ Bind(&loop_head);
- // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
- // the reference stored in the object before attempting the CAS,
- // similar to the one in the art::Unsafe_compareAndSwapObject JNI
- // implementation.
- //
- // Note that this code is not (yet) used when read barriers are
- // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
- DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
- __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
- __ Cmp(tmp_value, expected);
- __ B(&exit_loop, ne);
- __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
- __ Cbnz(tmp_32, &loop_head);
- } else {
- // Emit a `Dmb(InnerShareable, BarrierAll)` (DMB ISH) instruction
- // instead of a `Dmb(InnerShareable, BarrierWrites)` (DMB ISHST)
- // one, as the latter allows a preceding load to be delayed past
- // the STXR instruction below.
- __ Dmb(InnerShareable, BarrierAll);
- __ Bind(&loop_head);
- // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
- // the reference stored in the object before attempting the CAS,
- // similar to the one in the art::Unsafe_compareAndSwapObject JNI
- // implementation.
- //
- // Note that this code is not (yet) used when read barriers are
- // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
- DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
- __ Ldxr(tmp_value, MemOperand(tmp_ptr));
- __ Cmp(tmp_value, expected);
- __ B(&exit_loop, ne);
- __ Stxr(tmp_32, value, MemOperand(tmp_ptr));
- __ Cbnz(tmp_32, &loop_head);
- __ Dmb(InnerShareable, BarrierAll);
- }
+ __ Bind(&loop_head);
+ // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
+ // the reference stored in the object before attempting the CAS,
+ // similar to the one in the art::Unsafe_compareAndSwapObject JNI
+ // implementation.
+ //
+ // Note that this code is not (yet) used when read barriers are
+ // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
+ DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
+ __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
+ __ Cmp(tmp_value, expected);
+ __ B(&exit_loop, ne);
+ __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
+ __ Cbnz(tmp_32, &loop_head);
__ Bind(&exit_loop);
__ Cset(out, eq);
diff --git a/runtime/arch/arm64/instruction_set_features_arm64.h b/runtime/arch/arm64/instruction_set_features_arm64.h
index 805131f..abd7e83 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64.h
+++ b/runtime/arch/arm64/instruction_set_features_arm64.h
@@ -66,14 +66,6 @@
return fix_cortex_a53_843419_;
}
- // NOTE: This flag can be tunned on a CPU basis. In general all ARMv8 CPUs
- // should prefer the Acquire-Release semantics over the explicit DMBs when
- // handling load/store-volatile. For a specific use case see the ARM64
- // Optimizing backend.
- bool PreferAcquireRelease() const {
- return true;
- }
-
virtual ~Arm64InstructionSetFeatures() {}
protected:
diff --git a/runtime/arch/arm64/instruction_set_features_arm64_test.cc b/runtime/arch/arm64/instruction_set_features_arm64_test.cc
index 599f24e..027e59c 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64_test.cc
+++ b/runtime/arch/arm64/instruction_set_features_arm64_test.cc
@@ -30,8 +30,6 @@
EXPECT_TRUE(arm64_features->Equals(arm64_features.get()));
EXPECT_STREQ("smp,a53", arm64_features->GetFeatureString().c_str());
EXPECT_EQ(arm64_features->AsBitmap(), 3U);
- // See the comments in instruction_set_features_arm64.h.
- EXPECT_TRUE(arm64_features->AsArm64InstructionSetFeatures()->PreferAcquireRelease());
}
} // namespace art