Merge "Opt Compiler: ARM64: Add support for rem-float, rem-double and volatile."
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index dde0dfe..f6ca6c7 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -671,13 +671,6 @@
     return false;
   }
 
-#if defined(__aarch64__)
-  if (resolved_field->IsVolatile()) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledVolatile);
-    return false;
-  }
-#endif
-
   Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
 
   HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot);
@@ -706,8 +699,6 @@
   return true;
 }
 
-
-
 bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
                                            uint32_t dex_pc,
                                            bool is_put) {
@@ -728,13 +719,6 @@
     return false;
   }
 
-#if defined(__aarch64__)
-  if (resolved_field->IsVolatile()) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledVolatile);
-    return false;
-  }
-#endif
-
   Handle<mirror::Class> referrer_class(hs.NewHandle(compiler_driver_->ResolveCompilingMethodsClass(
       soa, dex_cache, class_loader, outer_compilation_unit_)));
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index c7517d3..e10b47c 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -38,6 +38,9 @@
 
 namespace arm64 {
 
+// TODO: Tune the use of Load-Acquire, Store-Release vs Data Memory Barriers.
+// For now we prefer the use of load-acquire, store-release over explicit memory barriers.
+static constexpr bool kUseAcquireRelease = true;
 static constexpr bool kExplicitStackOverflowCheck = false;
 static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>);
 static constexpr int kCurrentMethodStackOffset = 0;
@@ -233,8 +236,9 @@
 static const Register kRuntimeParameterCoreRegisters[] = { x0, x1, x2, x3, x4, x5, x6, x7 };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
-static const FPRegister kRuntimeParameterFpuRegisters[] = { };
-static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
+static const FPRegister kRuntimeParameterFpuRegisters[] = { d0, d1, d2, d3, d4, d5, d6, d7 };
+static constexpr size_t kRuntimeParameterFpuRegistersLength =
+    arraysize(kRuntimeParameterCoreRegisters);
 
 class InvokeRuntimeCallingConvention : public CallingConvention<Register, FPRegister> {
  public:
@@ -949,8 +953,8 @@
 }
 
 void CodeGeneratorARM64::Load(Primitive::Type type,
-                              vixl::CPURegister dst,
-                              const vixl::MemOperand& src) {
+                              CPURegister dst,
+                              const MemOperand& src) {
   switch (type) {
     case Primitive::kPrimBoolean:
       __ Ldrb(Register(dst), src);
@@ -969,7 +973,7 @@
     case Primitive::kPrimLong:
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      DCHECK(dst.Is64Bits() == Is64BitType(type));
+      DCHECK_EQ(dst.Is64Bits(), Is64BitType(type));
       __ Ldr(dst, src);
       break;
     case Primitive::kPrimVoid:
@@ -977,31 +981,123 @@
   }
 }
 
+void CodeGeneratorARM64::LoadAcquire(Primitive::Type type,
+                                     CPURegister dst,
+                                     const MemOperand& src) {
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  Register temp_base = temps.AcquireX();
+
+  DCHECK(!src.IsRegisterOffset());
+  DCHECK(!src.IsPreIndex());
+  DCHECK(!src.IsPostIndex());
+
+  // TODO(vixl): Let the MacroAssembler handle MemOperand.
+  __ Add(temp_base, src.base(), src.offset());
+  MemOperand base = MemOperand(temp_base);
+  switch (type) {
+    case Primitive::kPrimBoolean:
+      __ Ldarb(Register(dst), base);
+      break;
+    case Primitive::kPrimByte:
+      __ Ldarb(Register(dst), base);
+      __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
+      break;
+    case Primitive::kPrimChar:
+      __ Ldarh(Register(dst), base);
+      break;
+    case Primitive::kPrimShort:
+      __ Ldarh(Register(dst), base);
+      __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+    case Primitive::kPrimLong:
+      DCHECK_EQ(dst.Is64Bits(), Is64BitType(type));
+      __ Ldar(Register(dst), base);
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      DCHECK(dst.IsFPRegister());
+      DCHECK_EQ(dst.Is64Bits(), Is64BitType(type));
+
+      Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
+      __ Ldar(temp, base);
+      __ Fmov(FPRegister(dst), temp);
+      break;
+    }
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+  }
+}
+
 void CodeGeneratorARM64::Store(Primitive::Type type,
-                               vixl::CPURegister rt,
-                               const vixl::MemOperand& dst) {
+                               CPURegister src,
+                               const MemOperand& dst) {
   switch (type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
-      __ Strb(Register(rt), dst);
+      __ Strb(Register(src), dst);
       break;
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
-      __ Strh(Register(rt), dst);
+      __ Strh(Register(src), dst);
       break;
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
     case Primitive::kPrimLong:
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      DCHECK(rt.Is64Bits() == Is64BitType(type));
-      __ Str(rt, dst);
+      DCHECK_EQ(src.Is64Bits(), Is64BitType(type));
+      __ Str(src, dst);
       break;
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unreachable type " << type;
   }
 }
 
+void CodeGeneratorARM64::StoreRelease(Primitive::Type type,
+                                      CPURegister src,
+                                      const MemOperand& dst) {
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  Register temp_base = temps.AcquireX();
+
+  DCHECK(!dst.IsRegisterOffset());
+  DCHECK(!dst.IsPreIndex());
+  DCHECK(!dst.IsPostIndex());
+
+  // TODO(vixl): Let the MacroAssembler handle this.
+  __ Add(temp_base, dst.base(), dst.offset());
+  MemOperand base = MemOperand(temp_base);
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      __ Stlrb(Register(src), base);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      __ Stlrh(Register(src), base);
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+    case Primitive::kPrimLong:
+      DCHECK_EQ(src.Is64Bits(), Is64BitType(type));
+      __ Stlr(Register(src), base);
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      DCHECK(src.IsFPRegister());
+      DCHECK_EQ(src.Is64Bits(), Is64BitType(type));
+
+      Register temp = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
+      __ Fmov(temp, FPRegister(src));
+      __ Stlr(temp, base);
+      break;
+    }
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+  }
+}
+
 void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) {
   DCHECK(current_method.IsW());
   __ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset));
@@ -1026,14 +1122,47 @@
                                                                      vixl::Register class_reg) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register temp = temps.AcquireW();
-  __ Ldr(temp, HeapOperand(class_reg, mirror::Class::StatusOffset()));
-  __ Cmp(temp, mirror::Class::kStatusInitialized);
-  __ B(lt, slow_path->GetEntryLabel());
+  size_t status_offset = mirror::Class::StatusOffset().SizeValue();
+
   // Even if the initialized flag is set, we need to ensure consistent memory ordering.
-  __ Dmb(InnerShareable, BarrierReads);
+  if (kUseAcquireRelease) {
+    // TODO(vixl): Let the MacroAssembler handle MemOperand.
+    __ Add(temp, class_reg, status_offset);
+    __ Ldar(temp, HeapOperand(temp));
+    __ Cmp(temp, mirror::Class::kStatusInitialized);
+    __ B(lt, slow_path->GetEntryLabel());
+  } else {
+    __ Ldr(temp, HeapOperand(class_reg, status_offset));
+    __ Cmp(temp, mirror::Class::kStatusInitialized);
+    __ B(lt, slow_path->GetEntryLabel());
+    __ Dmb(InnerShareable, BarrierReads);
+  }
   __ Bind(slow_path->GetExitLabel());
 }
 
+void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
+  BarrierType type = BarrierAll;
+
+  switch (kind) {
+    case MemBarrierKind::kAnyAny:
+    case MemBarrierKind::kAnyStore: {
+      type = BarrierAll;
+      break;
+    }
+    case MemBarrierKind::kLoadAny: {
+      type = BarrierReads;
+      break;
+    }
+    case MemBarrierKind::kStoreStore: {
+      type = BarrierWrites;
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected memory barrier " << kind;
+  }
+  __ Dmb(InnerShareable, type);
+}
+
 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
                                                          HBasicBlock* successor) {
   SuspendCheckSlowPathARM64* slow_path =
@@ -1660,28 +1789,54 @@
 }
 
 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset());
-  codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+
+  if (instruction->IsVolatile()) {
+    if (kUseAcquireRelease) {
+      codegen_->LoadAcquire(instruction->GetType(), OutputCPURegister(instruction), field);
+    } else {
+      codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+      // For IRIW sequential consistency kLoadAny is not sufficient.
+      GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    }
+  } else {
+    codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+  }
 }
 
 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
-  Primitive::Type field_type = instruction->GetFieldType();
-  CPURegister value = InputCPURegisterAt(instruction, 1);
   Register obj = InputRegisterAt(instruction, 0);
-  codegen_->Store(field_type, value, HeapOperand(obj, instruction->GetFieldOffset()));
-  if (field_type == Primitive::kPrimNot) {
+  CPURegister value = InputCPURegisterAt(instruction, 1);
+  Offset offset = instruction->GetFieldOffset();
+  Primitive::Type field_type = instruction->GetFieldType();
+
+  if (instruction->IsVolatile()) {
+    if (kUseAcquireRelease) {
+      codegen_->StoreRelease(field_type, value, HeapOperand(obj, offset));
+    } else {
+      GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+      codegen_->Store(field_type, value, HeapOperand(obj, offset));
+      GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    }
+  } else {
+    codegen_->Store(field_type, value, HeapOperand(obj, offset));
+  }
+
+  if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
     codegen_->MarkGCCard(obj, Register(value));
   }
 }
@@ -2175,9 +2330,12 @@
 }
 
 void LocationsBuilderARM64::VisitRem(HRem* rem) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
-  switch (rem->GetResultType()) {
+  Primitive::Type type = rem->GetResultType();
+  LocationSummary::CallKind call_kind = IsFPType(type) ? LocationSummary::kCall
+                                                       : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
+
+  switch (type) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
@@ -2185,13 +2343,24 @@
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
+      locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
+      locations->SetOut(calling_convention.GetReturnLocation(type));
+
+      break;
+    }
+
     default:
-      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+      LOG(FATAL) << "Unexpected rem type " << type;
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
   Primitive::Type type = rem->GetResultType();
+
   switch (type) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
@@ -2206,6 +2375,14 @@
       break;
     }
 
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
+                                                             : QUICK_ENTRY_POINT(pFmod);
+      codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc());
+      break;
+    }
+
     default:
       LOG(FATAL) << "Unexpected rem type " << type;
   }
@@ -2294,7 +2471,18 @@
 
 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset());
-  codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+
+  if (instruction->IsVolatile()) {
+    if (kUseAcquireRelease) {
+      codegen_->LoadAcquire(instruction->GetType(), OutputCPURegister(instruction), field);
+    } else {
+      codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+      // For IRIW sequential consistency kLoadAny is not sufficient.
+      GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    }
+  } else {
+    codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
+  }
 }
 
 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
@@ -2305,13 +2493,24 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
-  CPURegister value = InputCPURegisterAt(instruction, 1);
   Register cls = InputRegisterAt(instruction, 0);
+  CPURegister value = InputCPURegisterAt(instruction, 1);
   Offset offset = instruction->GetFieldOffset();
   Primitive::Type field_type = instruction->GetFieldType();
 
-  codegen_->Store(field_type, value, HeapOperand(cls, offset));
-  if (field_type == Primitive::kPrimNot) {
+  if (instruction->IsVolatile()) {
+    if (kUseAcquireRelease) {
+      codegen_->StoreRelease(field_type, value, HeapOperand(cls, offset));
+    } else {
+      GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+      codegen_->Store(field_type, value, HeapOperand(cls, offset));
+      GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    }
+  } else {
+    codegen_->Store(field_type, value, HeapOperand(cls, offset));
+  }
+
+  if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
     codegen_->MarkGCCard(cls, Register(value));
   }
 }
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 1d5bfb7..19488a4 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
 
 #include "code_generator.h"
+#include "dex/compiler_enums.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
 #include "utils/arm64/assembler_arm64.h"
@@ -108,6 +109,7 @@
 
  private:
   void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg);
+  void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* instr);
   void HandleShift(HBinaryOperation* instr);
@@ -257,6 +259,8 @@
   void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
   void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
   void LoadCurrentMethod(vixl::Register current_method);
+  void LoadAcquire(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
+  void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
 
   // Generate code to invoke a runtime entry point.
   void InvokeRuntime(int32_t offset, HInstruction* instruction, uint32_t dex_pc);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 7993b19..cc2723d 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -38,7 +38,6 @@
   kNotCompiledUnresolvedMethod,
   kNotCompiledUnresolvedField,
   kNotCompiledNonSequentialRegPair,
-  kNotCompiledVolatile,
   kNotOptimizedTryCatch,
   kNotOptimizedDisabled,
   kNotCompiledCantAccesType,
@@ -92,7 +91,6 @@
       case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod";
       case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField";
       case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair";
-      case kNotCompiledVolatile : return "kNotCompiledVolatile";
       case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
       case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
       case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
diff --git a/test/436-rem-float/src/Main.java b/test/436-rem-float/src/Main.java
index e20c21f..cc6341a 100644
--- a/test/436-rem-float/src/Main.java
+++ b/test/436-rem-float/src/Main.java
@@ -22,13 +22,44 @@
   }
 
   private static void remFloat() {
-    expectApproxEquals(2F, $opt$RemConst(6F));
+    expectApproxEquals(1.98F, $opt$Rem(1.98F, 2F));
+    expectApproxEquals(0F, $opt$Rem(2F, 0.5F));
+    expectApproxEquals(0.09999F, $opt$Rem(1.0F, 0.1F));
+    expectApproxEquals(1.9F, $opt$Rem(6.5F, 2.3F));
+    expectApproxEquals(0.48F, $opt$Rem(1.98F, 1.5F));
+    expectApproxEquals(0.9999F, $opt$Rem(0.9999F, 1.222F));
+    expectApproxEquals(0.9999F, $opt$Rem(0.9999F, 1.0001F));
+    expectApproxEquals(-1.98F, $opt$Rem(-1.98F, 2F));
+    expectApproxEquals(-0F, $opt$Rem(-2F, 0.5F));
+    expectApproxEquals(-0.09999F, $opt$Rem(-1.0F, 0.1F));
+    expectApproxEquals(-1.9F, $opt$Rem(-6.5F, 2.3F));
+    expectApproxEquals(-0.48F, $opt$Rem(-1.98F, 1.5F));
+    expectApproxEquals(-0.9999F, $opt$Rem(-0.9999F, 1.222F));
+    expectApproxEquals(-0.9999F, $opt$Rem(-0.9999F, 1.0001F));
+    expectApproxEquals(1.98F, $opt$Rem(1.98F, -2F));
+    expectApproxEquals(0F, $opt$Rem(2F, -0.5F));
+    expectApproxEquals(0.09999F, $opt$Rem(1.0F, -0.1F));
+    expectApproxEquals(1.9F, $opt$Rem(6.5F, -2.3F));
+    expectApproxEquals(0.48F, $opt$Rem(1.98F, -1.5F));
+    expectApproxEquals(0.9999F, $opt$Rem(0.9999F, -1.222F));
+    expectApproxEquals(0.9999F, $opt$Rem(0.9999F, -1.0001F));
+    expectApproxEquals(-1.98F, $opt$Rem(-1.98F, -2F));
+    expectApproxEquals(-0F, $opt$Rem(-2F, -0.5F));
+    expectApproxEquals(-0.09999F, $opt$Rem(-1.0F, -0.1F));
+    expectApproxEquals(-1.9F, $opt$Rem(-6.5F, -2.3F));
+    expectApproxEquals(-0.48F, $opt$Rem(-1.98F, -1.5F));
+    expectApproxEquals(-0.9999F, $opt$Rem(-0.9999F, -1.222F));
+    expectApproxEquals(-0.9999F, $opt$Rem(-0.9999F, -1.0001F));
 
+    expectApproxEquals(1.68267e-18F, $opt$Rem(61615.2F, -2.48699e-17F));
+    expectApproxEquals(-8.63819e-09F, $opt$Rem(-1.73479e+14F, 3.11154e-08F));
+    expectApproxEquals(1.10911e-12F, $opt$Rem(338122F, 4.57572e-12F));
+
+    expectApproxEquals(2F, $opt$RemConst(6F));
     expectApproxEquals(2F, $opt$Rem(5.1F, 3.1F));
     expectApproxEquals(2.1F, $opt$Rem(5.1F, 3F));
     expectApproxEquals(-2F, $opt$Rem(-5.1F, 3.1F));
     expectApproxEquals(-2.1F, $opt$Rem(-5.1F, -3F));
-
     expectApproxEquals(2F, $opt$Rem(6F, 4F));
     expectApproxEquals(2F, $opt$Rem(6F, -4F));
     expectApproxEquals(0F, $opt$Rem(6F, 3F));
@@ -43,34 +74,87 @@
     expectApproxEquals(7F, $opt$Rem(7F, -9F));
     expectApproxEquals(-7F, $opt$Rem(-7F, 9F));
     expectApproxEquals(-7F, $opt$Rem(-7F, -9F));
-
     expectApproxEquals(0F, $opt$Rem(Float.MAX_VALUE, 1F));
     expectApproxEquals(0F, $opt$Rem(Float.MAX_VALUE, -1F));
     expectApproxEquals(0F, $opt$Rem(Float.MIN_VALUE, 1F));
     expectApproxEquals(0F, $opt$Rem(Float.MIN_VALUE, -1F));
-
     expectApproxEquals(0F, $opt$Rem(0F, 7F));
     expectApproxEquals(0F, $opt$Rem(0F, Float.MAX_VALUE));
     expectApproxEquals(0F, $opt$Rem(0F, Float.MIN_VALUE));
+    expectApproxEquals(0F, $opt$Rem(0F, Float.POSITIVE_INFINITY));
+    expectApproxEquals(0F, $opt$Rem(0F, Float.NEGATIVE_INFINITY));
+    expectApproxEquals(4F, $opt$Rem(4F, Float.POSITIVE_INFINITY));
+    expectApproxEquals(4F, $opt$Rem(4F, Float.NEGATIVE_INFINITY));
+    expectApproxEquals(-4F, $opt$Rem(-4F, Float.POSITIVE_INFINITY));
+    expectApproxEquals(-4F, $opt$Rem(-4F, Float.NEGATIVE_INFINITY));
+    expectApproxEquals(0F, $opt$Rem(Float.MIN_NORMAL, Float.MIN_VALUE));
+    expectApproxEquals(0F, $opt$Rem(Float.MIN_NORMAL, Float.MIN_NORMAL));
+    expectApproxEquals(0F, $opt$Rem(Float.MIN_VALUE, Float.MIN_VALUE));
+    expectApproxEquals(0F, $opt$Rem(Float.MAX_VALUE, Float.MIN_VALUE));
+    expectApproxEquals(0F, $opt$Rem(Float.MAX_VALUE, Float.MAX_VALUE));
+    expectApproxEquals(0F, $opt$Rem(Float.MAX_VALUE, Float.MIN_NORMAL));
+    expectApproxEquals(Float.MIN_NORMAL, $opt$Rem(Float.MIN_NORMAL, Float.MAX_VALUE));
+    expectApproxEquals(Float.MIN_NORMAL, $opt$Rem(Float.MIN_NORMAL, Float.NEGATIVE_INFINITY));
+    expectApproxEquals(Float.MIN_NORMAL, $opt$Rem(Float.MIN_NORMAL, Float.POSITIVE_INFINITY));
+    expectApproxEquals(Float.MIN_VALUE, $opt$Rem(Float.MIN_VALUE, Float.MAX_VALUE));
+    expectApproxEquals(Float.MIN_VALUE, $opt$Rem(Float.MIN_VALUE, Float.MIN_NORMAL));
+    expectApproxEquals(Float.MIN_VALUE, $opt$Rem(Float.MIN_VALUE, Float.NEGATIVE_INFINITY));
+    expectApproxEquals(Float.MIN_VALUE, $opt$Rem(Float.MIN_VALUE, Float.POSITIVE_INFINITY));
+    expectApproxEquals(Float.MAX_VALUE, $opt$Rem(Float.MAX_VALUE, Float.NEGATIVE_INFINITY));
+    expectApproxEquals(Float.MAX_VALUE, $opt$Rem(Float.MAX_VALUE, Float.POSITIVE_INFINITY));
 
     expectNaN($opt$Rem(Float.NaN, 3F));
     expectNaN($opt$Rem(3F, Float.NaN));
-    expectNaN($opt$Rem(Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY));
-    expectNaN($opt$Rem(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY));
     expectNaN($opt$Rem(3F, 0F));
-
-    expectApproxEquals(4F, $opt$Rem(4F, Float.POSITIVE_INFINITY));
-    expectApproxEquals(4F, $opt$Rem(4F, Float.NEGATIVE_INFINITY));
+    expectNaN($opt$Rem(1F, 0F));
+    expectNaN($opt$Rem(-1F, 0F));
+    expectNaN($opt$Rem(Float.NEGATIVE_INFINITY, Float.MIN_VALUE));
+    expectNaN($opt$Rem(Float.NEGATIVE_INFINITY, Float.MAX_VALUE));
+    expectNaN($opt$Rem(Float.NEGATIVE_INFINITY, Float.MIN_NORMAL));
+    expectNaN($opt$Rem(Float.NEGATIVE_INFINITY, Float.NEGATIVE_INFINITY));
+    expectNaN($opt$Rem(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY));
+    expectNaN($opt$Rem(Float.POSITIVE_INFINITY, Float.MIN_VALUE));
+    expectNaN($opt$Rem(Float.POSITIVE_INFINITY, Float.MAX_VALUE));
+    expectNaN($opt$Rem(Float.POSITIVE_INFINITY, Float.MIN_NORMAL));
+    expectNaN($opt$Rem(Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY));
+    expectNaN($opt$Rem(Float.POSITIVE_INFINITY, Float.POSITIVE_INFINITY));
   }
 
   private static void remDouble() {
-    expectApproxEquals(2D, $opt$RemConst(6D));
+    expectApproxEquals(1.98D, $opt$Rem(1.98D, 2D));
+    expectApproxEquals(0D, $opt$Rem(2D, 0.5D));
+    expectApproxEquals(0.09999D, $opt$Rem(1.0D, 0.1D));
+    expectApproxEquals(1.9D, $opt$Rem(6.5D, 2.3D));
+    expectApproxEquals(0.48D, $opt$Rem(1.98D, 1.5D));
+    expectApproxEquals(0.9999D, $opt$Rem(0.9999D, 1.222D));
+    expectApproxEquals(0.9999D, $opt$Rem(0.9999D, 1.0001D));
+    expectApproxEquals(-1.98D, $opt$Rem(-1.98D, 2D));
+    expectApproxEquals(-0D, $opt$Rem(-2D, 0.5D));
+    expectApproxEquals(-0.09999D, $opt$Rem(-1.0D, 0.1D));
+    expectApproxEquals(-1.9D, $opt$Rem(-6.5D, 2.3D));
+    expectApproxEquals(-0.48D, $opt$Rem(-1.98D, 1.5D));
+    expectApproxEquals(-0.9999D, $opt$Rem(-0.9999D, 1.222D));
+    expectApproxEquals(-0.9999D, $opt$Rem(-0.9999D, 1.0001D));
+    expectApproxEquals(1.98D, $opt$Rem(1.98D, -2D));
+    expectApproxEquals(0D, $opt$Rem(2D, -0.5D));
+    expectApproxEquals(0.09999D, $opt$Rem(1.0D, -0.1D));
+    expectApproxEquals(1.9D, $opt$Rem(6.5D, -2.3D));
+    expectApproxEquals(0.48D, $opt$Rem(1.98D, -1.5D));
+    expectApproxEquals(0.9999D, $opt$Rem(0.9999D, -1.222D));
+    expectApproxEquals(0.9999D, $opt$Rem(0.9999D, -1.0001D));
+    expectApproxEquals(-1.98D, $opt$Rem(-1.98D, -2D));
+    expectApproxEquals(-0D, $opt$Rem(-2D, -0.5D));
+    expectApproxEquals(-0.09999D, $opt$Rem(-1.0D, -0.1D));
+    expectApproxEquals(-1.9D, $opt$Rem(-6.5D, -2.3D));
+    expectApproxEquals(-0.48D, $opt$Rem(-1.98D, -1.5D));
+    expectApproxEquals(-0.9999D, $opt$Rem(-0.9999D, -1.222D));
+    expectApproxEquals(-0.9999D, $opt$Rem(-0.9999D, -1.0001D));
 
+    expectApproxEquals(2D, $opt$RemConst(6D));
     expectApproxEquals(2D, $opt$Rem(5.1D, 3.1D));
     expectApproxEquals(2.1D, $opt$Rem(5.1D, 3D));
     expectApproxEquals(-2D, $opt$Rem(-5.1D, 3.1D));
     expectApproxEquals(-2.1D, $opt$Rem(-5.1D, -3D));
-
     expectApproxEquals(2D, $opt$Rem(6D, 4D));
     expectApproxEquals(2D, $opt$Rem(6D, -4D));
     expectApproxEquals(0D, $opt$Rem(6D, 3D));
@@ -85,24 +169,50 @@
     expectApproxEquals(7D, $opt$Rem(7D, -9D));
     expectApproxEquals(-7D, $opt$Rem(-7D, 9D));
     expectApproxEquals(-7D, $opt$Rem(-7D, -9D));
-
     expectApproxEquals(0D, $opt$Rem(Double.MAX_VALUE, 1D));
     expectApproxEquals(0D, $opt$Rem(Double.MAX_VALUE, -1D));
     expectApproxEquals(0D, $opt$Rem(Double.MIN_VALUE, 1D));
     expectApproxEquals(0D, $opt$Rem(Double.MIN_VALUE, -1D));
-
     expectApproxEquals(0D, $opt$Rem(0D, 7D));
     expectApproxEquals(0D, $opt$Rem(0D, Double.MAX_VALUE));
     expectApproxEquals(0D, $opt$Rem(0D, Double.MIN_VALUE));
+    expectApproxEquals(0D, $opt$Rem(0D, Double.POSITIVE_INFINITY));
+    expectApproxEquals(0D, $opt$Rem(0D, Double.NEGATIVE_INFINITY));
+    expectApproxEquals(4D, $opt$Rem(4D, Double.POSITIVE_INFINITY));
+    expectApproxEquals(4D, $opt$Rem(4D, Double.NEGATIVE_INFINITY));
+    expectApproxEquals(-4D, $opt$Rem(-4D, Double.POSITIVE_INFINITY));
+    expectApproxEquals(-4D, $opt$Rem(-4D, Double.NEGATIVE_INFINITY));
+    expectApproxEquals(0D, $opt$Rem(Double.MIN_NORMAL, Double.MIN_VALUE));
+    expectApproxEquals(0D, $opt$Rem(Double.MIN_NORMAL, Double.MIN_NORMAL));
+    expectApproxEquals(0D, $opt$Rem(Double.MIN_VALUE, Double.MIN_VALUE));
+    expectApproxEquals(0D, $opt$Rem(Double.MAX_VALUE, Double.MIN_VALUE));
+    expectApproxEquals(0D, $opt$Rem(Double.MAX_VALUE, Double.MAX_VALUE));
+    expectApproxEquals(0D, $opt$Rem(Double.MAX_VALUE, Double.MIN_NORMAL));
+    expectApproxEquals(Double.MIN_NORMAL, $opt$Rem(Double.MIN_NORMAL, Double.MAX_VALUE));
+    expectApproxEquals(Double.MIN_NORMAL, $opt$Rem(Double.MIN_NORMAL, Double.NEGATIVE_INFINITY));
+    expectApproxEquals(Double.MIN_NORMAL, $opt$Rem(Double.MIN_NORMAL, Double.POSITIVE_INFINITY));
+    expectApproxEquals(Double.MIN_VALUE, $opt$Rem(Double.MIN_VALUE, Double.MAX_VALUE));
+    expectApproxEquals(Double.MIN_VALUE, $opt$Rem(Double.MIN_VALUE, Double.MIN_NORMAL));
+    expectApproxEquals(Double.MIN_VALUE, $opt$Rem(Double.MIN_VALUE, Double.NEGATIVE_INFINITY));
+    expectApproxEquals(Double.MIN_VALUE, $opt$Rem(Double.MIN_VALUE, Double.POSITIVE_INFINITY));
+    expectApproxEquals(Double.MAX_VALUE, $opt$Rem(Double.MAX_VALUE, Double.NEGATIVE_INFINITY));
+    expectApproxEquals(Double.MAX_VALUE, $opt$Rem(Double.MAX_VALUE, Double.POSITIVE_INFINITY));
 
     expectNaN($opt$Rem(Double.NaN, 3D));
     expectNaN($opt$Rem(3D, Double.NaN));
-    expectNaN($opt$Rem(Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY));
-    expectNaN($opt$Rem(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY));
     expectNaN($opt$Rem(3D, 0D));
-
-    expectApproxEquals(4D, $opt$Rem(4D, Double.POSITIVE_INFINITY));
-    expectApproxEquals(4D, $opt$Rem(4D, Double.NEGATIVE_INFINITY));
+    expectNaN($opt$Rem(1D, 0D));
+    expectNaN($opt$Rem(-1D, 0D));
+    expectNaN($opt$Rem(Double.NEGATIVE_INFINITY, Double.MIN_VALUE));
+    expectNaN($opt$Rem(Double.NEGATIVE_INFINITY, Double.MAX_VALUE));
+    expectNaN($opt$Rem(Double.NEGATIVE_INFINITY, Double.MIN_NORMAL));
+    expectNaN($opt$Rem(Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY));
+    expectNaN($opt$Rem(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY));
+    expectNaN($opt$Rem(Double.POSITIVE_INFINITY, Double.MIN_VALUE));
+    expectNaN($opt$Rem(Double.POSITIVE_INFINITY, Double.MAX_VALUE));
+    expectNaN($opt$Rem(Double.POSITIVE_INFINITY, Double.MIN_NORMAL));
+    expectNaN($opt$Rem(Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY));
+    expectNaN($opt$Rem(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY));
   }
 
   static float $opt$Rem(float a, float b) {
diff --git a/test/438-volatile/expected.txt b/test/438-volatile/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/438-volatile/expected.txt
diff --git a/test/438-volatile/info.txt b/test/438-volatile/info.txt
new file mode 100644
index 0000000..7a4c81a
--- /dev/null
+++ b/test/438-volatile/info.txt
@@ -0,0 +1 @@
+Tests basic operations (set/get) on volatiles.
diff --git a/test/438-volatile/src/Main.java b/test/438-volatile/src/Main.java
new file mode 100644
index 0000000..a870e4c
--- /dev/null
+++ b/test/438-volatile/src/Main.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  static volatile long long_volatile;
+  static volatile double double_volatile;
+
+  public static void main(String[] args) {
+    checkVolatileUpdate(0L);
+    checkVolatileUpdate(Long.MAX_VALUE);
+    checkVolatileUpdate(Long.MIN_VALUE);
+
+    checkVolatileUpdate(0.0);
+    checkVolatileUpdate(Double.MAX_VALUE);
+    checkVolatileUpdate(-Double.MAX_VALUE);
+  }
+
+  public static long $opt$update(long a) {
+     long_volatile = a;
+     return long_volatile;
+  }
+
+  public static double $opt$update(double a) {
+     double_volatile = a;
+     return double_volatile;
+  }
+
+  public static void checkVolatileUpdate(long value) {
+    if (value != $opt$update(value)) {
+      throw new RuntimeException("Volatile update failed for long:" + value);
+    }
+  }
+
+  public static void checkVolatileUpdate(double value) {
+    if (value != $opt$update(value)) {
+      throw new RuntimeException("Volatile update failed for double:" + value);
+    }
+  }
+
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index e085d3f..170ec31 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -297,10 +297,7 @@
 TEST_ART_BROKEN_DEFAULT_RUN_TESTS :=
 
 # Known broken tests for the arm64 optimizing compiler backend.
-TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS := \
-  003-omnibus-opcodes64 \
-  012-math64 \
-  436-rem-float64
+TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS :=
 
 ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \