Revert "Revert "ART: Split out more cases of Load/StoreRef, volatile as parameter""

This reverts commit de68676b24f61a55adc0b22fe828f036a5925c41.

Fixes an API comment, and differentiates between inserting and appending.

Change-Id: I0e9a21bb1d25766e3cbd802d8b48633ae251a6bf
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index b236f99..bc8f95b 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -961,31 +961,37 @@
   return load;
 }
 
-LIR* ArmMir2Lir::LoadBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_dest,
-                                      OpSize size) {
-  // Only 64-bit load needs special handling.
-  if (UNLIKELY(size == k64 || size == kDouble)) {
-    DCHECK(!r_dest.IsFloat());  // See RegClassForFieldLoadSave().
-    // If the cpu supports LPAE, aligned LDRD is atomic - fall through to LoadBaseDisp().
-    if (!cu_->compiler_driver->GetInstructionSetFeatures().HasLpae()) {
-      // Use LDREXD for the atomic load. (Expect displacement > 0, don't optimize for == 0.)
-      RegStorage r_ptr = AllocTemp();
-      OpRegRegImm(kOpAdd, r_ptr, r_base, displacement);
-      LIR* lir = NewLIR3(kThumb2Ldrexd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_ptr.GetReg());
-      FreeTemp(r_ptr);
-      return lir;
-    }
-  }
-  return LoadBaseDisp(r_base, displacement, r_dest, size);
-}
-
 LIR* ArmMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
-                              OpSize size) {
+                              OpSize size, VolatileKind is_volatile) {
   // TODO: base this on target.
   if (size == kWord) {
     size = k32;
   }
-  return LoadBaseDispBody(r_base, displacement, r_dest, size);
+  LIR* load;
+  if (UNLIKELY(is_volatile == kVolatile &&
+               (size == k64 || size == kDouble) &&
+               !cu_->compiler_driver->GetInstructionSetFeatures().HasLpae())) {
+    // Only 64-bit load needs special handling.
+    // If the cpu supports LPAE, aligned LDRD is atomic - fall through to LoadBaseDisp().
+    DCHECK(!r_dest.IsFloat());  // See RegClassForFieldLoadSave().
+    // Use LDREXD for the atomic load. (Expect displacement > 0, don't optimize for == 0.)
+    RegStorage r_ptr = AllocTemp();
+    OpRegRegImm(kOpAdd, r_ptr, r_base, displacement);
+    LIR* lir = NewLIR3(kThumb2Ldrexd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_ptr.GetReg());
+    FreeTemp(r_ptr);
+    return lir;
+  } else {
+    load = LoadBaseDispBody(r_base, displacement, r_dest, size);
+  }
+
+  if (UNLIKELY(is_volatile == kVolatile)) {
+    // Without context sensitive analysis, we must issue the most conservative barriers.
+    // In this case, either a load or store may follow so we issue both barriers.
+    GenMemBarrier(kLoadLoad);
+    GenMemBarrier(kLoadStore);
+  }
+
+  return load;
 }
 
 
@@ -1081,49 +1087,58 @@
   return store;
 }
 
-LIR* ArmMir2Lir::StoreBaseDispVolatile(RegStorage r_base, int displacement, RegStorage r_src,
-                                       OpSize size) {
-  // Only 64-bit store needs special handling.
-  if (UNLIKELY(size == k64 || size == kDouble)) {
-    DCHECK(!r_src.IsFloat());  // See RegClassForFieldLoadSave().
-    // If the cpu supports LPAE, aligned STRD is atomic - fall through to StoreBaseDisp().
-    if (!cu_->compiler_driver->GetInstructionSetFeatures().HasLpae()) {
-      // Use STREXD for the atomic store. (Expect displacement > 0, don't optimize for == 0.)
-      RegStorage r_ptr = AllocTemp();
-      OpRegRegImm(kOpAdd, r_ptr, r_base, displacement);
-      LIR* fail_target = NewLIR0(kPseudoTargetLabel);
-      // We have only 5 temporary registers available and if r_base, r_src and r_ptr already
-      // take 4, we can't directly allocate 2 more for LDREXD temps. In that case clobber r_ptr
-      // in LDREXD and recalculate it from r_base.
-      RegStorage r_temp = AllocTemp();
-      RegStorage r_temp_high = AllocFreeTemp();  // We may not have another temp.
-      if (r_temp_high.Valid()) {
-        NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_temp_high.GetReg(), r_ptr.GetReg());
-        FreeTemp(r_temp_high);
-        FreeTemp(r_temp);
-      } else {
-        // If we don't have another temp, clobber r_ptr in LDREXD and reload it.
-        NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_ptr.GetReg(), r_ptr.GetReg());
-        FreeTemp(r_temp);  // May need the temp for kOpAdd.
-        OpRegRegImm(kOpAdd, r_ptr, r_base, displacement);
-      }
-      LIR* lir = NewLIR4(kThumb2Strexd, r_temp.GetReg(), r_src.GetLowReg(), r_src.GetHighReg(),
-                         r_ptr.GetReg());
-      OpCmpImmBranch(kCondNe, r_temp, 0, fail_target);
-      FreeTemp(r_ptr);
-      return lir;
-    }
-  }
-  return StoreBaseDisp(r_base, displacement, r_src, size);
-}
-
 LIR* ArmMir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
-                               OpSize size) {
-  // TODO: base this on target.
-  if (size == kWord) {
-    size = k32;
+                               OpSize size, VolatileKind is_volatile) {
+  if (UNLIKELY(is_volatile == kVolatile)) {
+    // There might have been a store before this volatile one so insert StoreStore barrier.
+    GenMemBarrier(kStoreStore);
   }
-  return StoreBaseDispBody(r_base, displacement, r_src, size);
+
+  LIR* store;
+  if (UNLIKELY(is_volatile == kVolatile &&
+               (size == k64 || size == kDouble) &&
+               !cu_->compiler_driver->GetInstructionSetFeatures().HasLpae())) {
+    // Only 64-bit store needs special handling.
+    // If the cpu supports LPAE, aligned STRD is atomic - fall through to StoreBaseDisp().
+    // Use STREXD for the atomic store. (Expect displacement > 0, don't optimize for == 0.)
+    DCHECK(!r_src.IsFloat());  // See RegClassForFieldLoadSave().
+    RegStorage r_ptr = AllocTemp();
+    OpRegRegImm(kOpAdd, r_ptr, r_base, displacement);
+    LIR* fail_target = NewLIR0(kPseudoTargetLabel);
+    // We have only 5 temporary registers available and if r_base, r_src and r_ptr already
+    // take 4, we can't directly allocate 2 more for LDREXD temps. In that case clobber r_ptr
+    // in LDREXD and recalculate it from r_base.
+    RegStorage r_temp = AllocTemp();
+    RegStorage r_temp_high = AllocFreeTemp();  // We may not have another temp.
+    if (r_temp_high.Valid()) {
+      NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_temp_high.GetReg(), r_ptr.GetReg());
+      FreeTemp(r_temp_high);
+      FreeTemp(r_temp);
+    } else {
+      // If we don't have another temp, clobber r_ptr in LDREXD and reload it.
+      NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_ptr.GetReg(), r_ptr.GetReg());
+      FreeTemp(r_temp);  // May need the temp for kOpAdd.
+      OpRegRegImm(kOpAdd, r_ptr, r_base, displacement);
+    }
+    store = NewLIR4(kThumb2Strexd, r_temp.GetReg(), r_src.GetLowReg(), r_src.GetHighReg(),
+                    r_ptr.GetReg());
+    OpCmpImmBranch(kCondNe, r_temp, 0, fail_target);
+    FreeTemp(r_ptr);
+  } else {
+    // TODO: base this on target.
+    if (size == kWord) {
+      size = k32;
+    }
+
+    store = StoreBaseDispBody(r_base, displacement, r_src, size);
+  }
+
+  if (UNLIKELY(is_volatile == kVolatile)) {
+    // A load might follow the volatile store so insert a StoreLoad barrier.
+    GenMemBarrier(kStoreLoad);
+  }
+
+  return store;
 }
 
 LIR* ArmMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {