Remove unused DMB code paths in the ARM64 Optimizing Compiler

Currently all ARM64 CPUs will be using the acquire-release code paths.
This patch removes the instruction set feature PreferAcquireRelease()
as well as all the unused DMB code paths.

Change-Id: I61c320d6d685f96c9e260f25eac3593907793830
Signed-off-by: Serban Constantinescu <serban.constantinescu@linaro.org>
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index c0e3959..d56de0b 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1556,21 +1556,13 @@
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register temp = temps.AcquireW();
   size_t status_offset = mirror::Class::StatusOffset().SizeValue();
-  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   // Even if the initialized flag is set, we need to ensure consistent memory ordering.
-  if (use_acquire_release) {
-    // TODO(vixl): Let the MacroAssembler handle MemOperand.
-    __ Add(temp, class_reg, status_offset);
-    __ Ldar(temp, HeapOperand(temp));
-    __ Cmp(temp, mirror::Class::kStatusInitialized);
-    __ B(lt, slow_path->GetEntryLabel());
-  } else {
-    __ Ldr(temp, HeapOperand(class_reg, status_offset));
-    __ Cmp(temp, mirror::Class::kStatusInitialized);
-    __ B(lt, slow_path->GetEntryLabel());
-    __ Dmb(InnerShareable, BarrierReads);
-  }
+  // TODO(vixl): Let the MacroAssembler handle MemOperand.
+  __ Add(temp, class_reg, status_offset);
+  __ Ldar(temp, HeapOperand(temp));
+  __ Cmp(temp, mirror::Class::kStatusInitialized);
+  __ B(lt, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1716,9 +1708,7 @@
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
   Primitive::Type field_type = field_info.GetFieldType();
   BlockPoolsScope block_pools(GetVIXLAssembler());
-
   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
-  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // Object FieldGet with Baker's read barrier case.
@@ -1736,26 +1726,15 @@
         offset,
         temp,
         /* needs_null_check */ true,
-        field_info.IsVolatile() && use_acquire_release);
-    if (field_info.IsVolatile() && !use_acquire_release) {
-      // For IRIW sequential consistency kLoadAny is not sufficient.
-      codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
-    }
+        field_info.IsVolatile());
   } else {
     // General case.
     if (field_info.IsVolatile()) {
-      if (use_acquire_release) {
-        // Note that a potential implicit null check is handled in this
-        // CodeGeneratorARM64::LoadAcquire call.
-        // NB: LoadAcquire will record the pc info if needed.
-        codegen_->LoadAcquire(
-            instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
-      } else {
-        codegen_->Load(field_type, OutputCPURegister(instruction), field);
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        // For IRIW sequential consistency kLoadAny is not sufficient.
-        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
-      }
+      // Note that a potential implicit null check is handled in this
+      // CodeGeneratorARM64::LoadAcquire call.
+      // NB: LoadAcquire will record the pc info if needed.
+      codegen_->LoadAcquire(
+          instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
     } else {
       codegen_->Load(field_type, OutputCPURegister(instruction), field);
       codegen_->MaybeRecordImplicitNullCheck(instruction);
@@ -1791,7 +1770,6 @@
   CPURegister source = value;
   Offset offset = field_info.GetFieldOffset();
   Primitive::Type field_type = field_info.GetFieldType();
-  bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
   {
     // We use a block to end the scratch scope before the write barrier, thus
@@ -1807,15 +1785,8 @@
     }
 
     if (field_info.IsVolatile()) {
-      if (use_acquire_release) {
-        codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-      } else {
-        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
-        codegen_->Store(field_type, source, HeapOperand(obj, offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
-      }
+      codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
     } else {
       codegen_->Store(field_type, source, HeapOperand(obj, offset));
       codegen_->MaybeRecordImplicitNullCheck(instruction);
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index c888f01..dbd7688 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -750,7 +750,6 @@
   Register offset = XRegisterFrom(offset_loc);  // Long offset.
   Location trg_loc = locations->Out();
   Register trg = RegisterFrom(trg_loc, type);
-  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
 
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
@@ -758,19 +757,11 @@
     Register temp = temps.AcquireW();
     codegen->GenerateArrayLoadWithBakerReadBarrier(
         invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
-    if (is_volatile && !use_acquire_release) {
-      __ Dmb(InnerShareable, BarrierReads);
-    }
   } else {
     // Other cases.
     MemOperand mem_op(base.X(), offset);
     if (is_volatile) {
-      if (use_acquire_release) {
-        codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
-      } else {
-        codegen->Load(type, trg, mem_op);
-        __ Dmb(InnerShareable, BarrierReads);
-      }
+      codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
     } else {
       codegen->Load(type, trg, mem_op);
     }
@@ -884,8 +875,6 @@
   Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
   Register value = RegisterFrom(locations->InAt(3), type);
   Register source = value;
-  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
-
   MemOperand mem_op(base.X(), offset);
 
   {
@@ -902,15 +891,7 @@
     }
 
     if (is_volatile || is_ordered) {
-      if (use_acquire_release) {
-        codegen->StoreRelease(type, source, mem_op);
-      } else {
-        __ Dmb(InnerShareable, BarrierAll);
-        codegen->Store(type, source, mem_op);
-        if (is_volatile) {
-          __ Dmb(InnerShareable, BarrierReads);
-        }
-      }
+      codegen->StoreRelease(type, source, mem_op);
     } else {
       codegen->Store(type, source, mem_op);
     }
@@ -1000,7 +981,6 @@
 }
 
 static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) {
-  bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
   vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
 
   Register out = WRegisterFrom(locations->Out());                  // Boolean result.
@@ -1036,43 +1016,20 @@
   // result = tmp_value != 0;
 
   vixl::Label loop_head, exit_loop;
-  if (use_acquire_release) {
-    __ Bind(&loop_head);
-    // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
-    // the reference stored in the object before attempting the CAS,
-    // similar to the one in the art::Unsafe_compareAndSwapObject JNI
-    // implementation.
-    //
-    // Note that this code is not (yet) used when read barriers are
-    // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
-    DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
-    __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
-    __ Cmp(tmp_value, expected);
-    __ B(&exit_loop, ne);
-    __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
-    __ Cbnz(tmp_32, &loop_head);
-  } else {
-    // Emit a `Dmb(InnerShareable, BarrierAll)` (DMB ISH) instruction
-    // instead of a `Dmb(InnerShareable, BarrierWrites)` (DMB ISHST)
-    // one, as the latter allows a preceding load to be delayed past
-    // the STXR instruction below.
-    __ Dmb(InnerShareable, BarrierAll);
-    __ Bind(&loop_head);
-    // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
-    // the reference stored in the object before attempting the CAS,
-    // similar to the one in the art::Unsafe_compareAndSwapObject JNI
-    // implementation.
-    //
-    // Note that this code is not (yet) used when read barriers are
-    // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
-    DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
-    __ Ldxr(tmp_value, MemOperand(tmp_ptr));
-    __ Cmp(tmp_value, expected);
-    __ B(&exit_loop, ne);
-    __ Stxr(tmp_32, value, MemOperand(tmp_ptr));
-    __ Cbnz(tmp_32, &loop_head);
-    __ Dmb(InnerShareable, BarrierAll);
-  }
+  __ Bind(&loop_head);
+  // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
+  // the reference stored in the object before attempting the CAS,
+  // similar to the one in the art::Unsafe_compareAndSwapObject JNI
+  // implementation.
+  //
+  // Note that this code is not (yet) used when read barriers are
+  // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
+  DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
+  __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
+  __ Cmp(tmp_value, expected);
+  __ B(&exit_loop, ne);
+  __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
+  __ Cbnz(tmp_32, &loop_head);
   __ Bind(&exit_loop);
   __ Cset(out, eq);
 
diff --git a/runtime/arch/arm64/instruction_set_features_arm64.h b/runtime/arch/arm64/instruction_set_features_arm64.h
index 805131f..abd7e83 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64.h
+++ b/runtime/arch/arm64/instruction_set_features_arm64.h
@@ -66,14 +66,6 @@
       return fix_cortex_a53_843419_;
   }
 
-  // NOTE: This flag can be tunned on a CPU basis. In general all ARMv8 CPUs
-  // should prefer the Acquire-Release semantics over the explicit DMBs when
-  // handling load/store-volatile. For a specific use case see the ARM64
-  // Optimizing backend.
-  bool PreferAcquireRelease() const {
-    return true;
-  }
-
   virtual ~Arm64InstructionSetFeatures() {}
 
  protected:
diff --git a/runtime/arch/arm64/instruction_set_features_arm64_test.cc b/runtime/arch/arm64/instruction_set_features_arm64_test.cc
index 599f24e..027e59c 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64_test.cc
+++ b/runtime/arch/arm64/instruction_set_features_arm64_test.cc
@@ -30,8 +30,6 @@
   EXPECT_TRUE(arm64_features->Equals(arm64_features.get()));
   EXPECT_STREQ("smp,a53", arm64_features->GetFeatureString().c_str());
   EXPECT_EQ(arm64_features->AsBitmap(), 3U);
-  // See the comments in instruction_set_features_arm64.h.
-  EXPECT_TRUE(arm64_features->AsArm64InstructionSetFeatures()->PreferAcquireRelease());
 }
 
 }  // namespace art