Unify ART's various implementations of bit_cast.

ART had several implementations of art::bit_cast:

1. one in runtime/base/casts.h, declared as:

   template <class Dest, class Source>
   inline Dest bit_cast(const Source& source);

2. another one in runtime/utils.h, declared as:

   template<typename U, typename V>
   static inline V bit_cast(U in);

3. and a third local version, in runtime/memory_region.h,
   similar to the previous one:

   template<typename Source, typename Destination>
   static Destination MemoryRegion::local_bit_cast(Source in);

This CL removes versions 2. and 3. and changes their callers
to use 1. instead.  That version was chosen over the others
as:
- it was the oldest one in the code base; and
- its syntax was closer to the standard C++ cast operators,
  as it supports the following use:

    bit_cast<Destination>(source)

  since `Source' can be deduced from `source'.

Change-Id: I7334fd5d55bf0b8a0c52cb33cfbae6894ff83633
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index a8ec6c0..49b15fe 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -449,7 +449,7 @@
 }
 
 bool Arm64Mir2Lir::GenInlinedRound(CallInfo* info, bool is_double) {
-  int32_t encoded_imm = EncodeImmSingle(bit_cast<float, uint32_t>(0.5f));
+  int32_t encoded_imm = EncodeImmSingle(bit_cast<uint32_t, float>(0.5f));
   A64Opcode wide = (is_double) ? WIDE(0) : UNWIDE(0);
   RegLocation rl_src = info->args[0];
   RegLocation rl_dest = (is_double) ? InlineTargetWide(info) : InlineTarget(info);
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index b80fd74..2bcaaca 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1874,8 +1874,8 @@
     int32_t divisor = mir_graph_->ConstantValue(rl_src2);
     if (CanDivideByReciprocalMultiplyFloat(divisor)) {
       // Generate multiply by reciprocal instead of div.
-      float recip = 1.0f/bit_cast<int32_t, float>(divisor);
-      GenMultiplyByConstantFloat(rl_dest, rl_src1, bit_cast<float, int32_t>(recip));
+      float recip = 1.0f/bit_cast<float, int32_t>(divisor);
+      GenMultiplyByConstantFloat(rl_dest, rl_src1, bit_cast<int32_t, float>(recip));
       return true;
     }
   } else {
@@ -1883,7 +1883,7 @@
     if (CanDivideByReciprocalMultiplyDouble(divisor)) {
       // Generate multiply by reciprocal instead of div.
       double recip = 1.0/bit_cast<double, int64_t>(divisor);
-      GenMultiplyByConstantDouble(rl_dest, rl_src1, bit_cast<double, int64_t>(recip));
+      GenMultiplyByConstantDouble(rl_dest, rl_src1, bit_cast<int64_t, double>(recip));
       return true;
     }
   }
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index f513ea8..70bfb81 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -1510,25 +1510,25 @@
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = bit_cast<jfloat, jint>(f1);
+  jint i11 = bit_cast<jint, jfloat>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = bit_cast<jfloat, jint>(f2);
+  jint i12 = bit_cast<jint, jfloat>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = bit_cast<jfloat, jint>(f3);
+  jint i13 = bit_cast<jint, jfloat>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = bit_cast<jfloat, jint>(f4);
+  jint i14 = bit_cast<jint, jfloat>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = bit_cast<jfloat, jint>(f5);
+  jint i15 = bit_cast<jint, jfloat>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = bit_cast<jfloat, jint>(f6);
+  jint i16 = bit_cast<jint, jfloat>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = bit_cast<jfloat, jint>(f7);
+  jint i17 = bit_cast<jint, jfloat>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = bit_cast<jfloat, jint>(f8);
+  jint i18 = bit_cast<jint, jfloat>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = bit_cast<jfloat, jint>(f9);
+  jint i19 = bit_cast<jint, jfloat>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = bit_cast<jfloat, jint>(f10);
+  jint i20 = bit_cast<jint, jfloat>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1547,16 +1547,16 @@
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = bit_cast<jint, jfloat>(11);
-  jfloat f2 = bit_cast<jint, jfloat>(12);
-  jfloat f3 = bit_cast<jint, jfloat>(13);
-  jfloat f4 = bit_cast<jint, jfloat>(14);
-  jfloat f5 = bit_cast<jint, jfloat>(15);
-  jfloat f6 = bit_cast<jint, jfloat>(16);
-  jfloat f7 = bit_cast<jint, jfloat>(17);
-  jfloat f8 = bit_cast<jint, jfloat>(18);
-  jfloat f9 = bit_cast<jint, jfloat>(19);
-  jfloat f10 = bit_cast<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jfloat, jint>(11);
+  jfloat f2 = bit_cast<jfloat, jint>(12);
+  jfloat f3 = bit_cast<jfloat, jint>(13);
+  jfloat f4 = bit_cast<jfloat, jint>(14);
+  jfloat f5 = bit_cast<jfloat, jint>(15);
+  jfloat f6 = bit_cast<jfloat, jint>(16);
+  jfloat f7 = bit_cast<jfloat, jint>(17);
+  jfloat f8 = bit_cast<jfloat, jint>(18);
+  jfloat f9 = bit_cast<jfloat, jint>(19);
+  jfloat f10 = bit_cast<jfloat, jint>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, f1, f2,
                              f3, f4, f5, f6, f7, f8, f9, f10);
@@ -1580,25 +1580,25 @@
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = bit_cast<jfloat, jint>(f1);
+  jint i11 = bit_cast<jint, jfloat>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = bit_cast<jfloat, jint>(f2);
+  jint i12 = bit_cast<jint, jfloat>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = bit_cast<jfloat, jint>(f3);
+  jint i13 = bit_cast<jint, jfloat>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = bit_cast<jfloat, jint>(f4);
+  jint i14 = bit_cast<jint, jfloat>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = bit_cast<jfloat, jint>(f5);
+  jint i15 = bit_cast<jint, jfloat>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = bit_cast<jfloat, jint>(f6);
+  jint i16 = bit_cast<jint, jfloat>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = bit_cast<jfloat, jint>(f7);
+  jint i17 = bit_cast<jint, jfloat>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = bit_cast<jfloat, jint>(f8);
+  jint i18 = bit_cast<jint, jfloat>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = bit_cast<jfloat, jint>(f9);
+  jint i19 = bit_cast<jint, jfloat>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = bit_cast<jfloat, jint>(f10);
+  jint i20 = bit_cast<jint, jfloat>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1617,16 +1617,16 @@
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = bit_cast<jint, jfloat>(11);
-  jfloat f2 = bit_cast<jint, jfloat>(12);
-  jfloat f3 = bit_cast<jint, jfloat>(13);
-  jfloat f4 = bit_cast<jint, jfloat>(14);
-  jfloat f5 = bit_cast<jint, jfloat>(15);
-  jfloat f6 = bit_cast<jint, jfloat>(16);
-  jfloat f7 = bit_cast<jint, jfloat>(17);
-  jfloat f8 = bit_cast<jint, jfloat>(18);
-  jfloat f9 = bit_cast<jint, jfloat>(19);
-  jfloat f10 = bit_cast<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jfloat, jint>(11);
+  jfloat f2 = bit_cast<jfloat, jint>(12);
+  jfloat f3 = bit_cast<jfloat, jint>(13);
+  jfloat f4 = bit_cast<jfloat, jint>(14);
+  jfloat f5 = bit_cast<jfloat, jint>(15);
+  jfloat f6 = bit_cast<jfloat, jint>(16);
+  jfloat f7 = bit_cast<jfloat, jint>(17);
+  jfloat f8 = bit_cast<jfloat, jint>(18);
+  jfloat f9 = bit_cast<jfloat, jint>(19);
+  jfloat f10 = bit_cast<jfloat, jint>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, i1, i2, i3,
                              i4, i5, i6, i7, i8, i9, i10);
@@ -1649,25 +1649,25 @@
   EXPECT_EQ(i9, 9);
   EXPECT_EQ(i10, 10);
 
-  jint i11 = bit_cast<jfloat, jint>(f1);
+  jint i11 = bit_cast<jint, jfloat>(f1);
   EXPECT_EQ(i11, 11);
-  jint i12 = bit_cast<jfloat, jint>(f2);
+  jint i12 = bit_cast<jint, jfloat>(f2);
   EXPECT_EQ(i12, 12);
-  jint i13 = bit_cast<jfloat, jint>(f3);
+  jint i13 = bit_cast<jint, jfloat>(f3);
   EXPECT_EQ(i13, 13);
-  jint i14 = bit_cast<jfloat, jint>(f4);
+  jint i14 = bit_cast<jint, jfloat>(f4);
   EXPECT_EQ(i14, 14);
-  jint i15 = bit_cast<jfloat, jint>(f5);
+  jint i15 = bit_cast<jint, jfloat>(f5);
   EXPECT_EQ(i15, 15);
-  jint i16 = bit_cast<jfloat, jint>(f6);
+  jint i16 = bit_cast<jint, jfloat>(f6);
   EXPECT_EQ(i16, 16);
-  jint i17 = bit_cast<jfloat, jint>(f7);
+  jint i17 = bit_cast<jint, jfloat>(f7);
   EXPECT_EQ(i17, 17);
-  jint i18 = bit_cast<jfloat, jint>(f8);
+  jint i18 = bit_cast<jint, jfloat>(f8);
   EXPECT_EQ(i18, 18);
-  jint i19 = bit_cast<jfloat, jint>(f9);
+  jint i19 = bit_cast<jint, jfloat>(f9);
   EXPECT_EQ(i19, 19);
-  jint i20 = bit_cast<jfloat, jint>(f10);
+  jint i20 = bit_cast<jint, jfloat>(f10);
   EXPECT_EQ(i20, 20);
 }
 
@@ -1686,16 +1686,16 @@
   jint i9 = 9;
   jint i10 = 10;
 
-  jfloat f1 = bit_cast<jint, jfloat>(11);
-  jfloat f2 = bit_cast<jint, jfloat>(12);
-  jfloat f3 = bit_cast<jint, jfloat>(13);
-  jfloat f4 = bit_cast<jint, jfloat>(14);
-  jfloat f5 = bit_cast<jint, jfloat>(15);
-  jfloat f6 = bit_cast<jint, jfloat>(16);
-  jfloat f7 = bit_cast<jint, jfloat>(17);
-  jfloat f8 = bit_cast<jint, jfloat>(18);
-  jfloat f9 = bit_cast<jint, jfloat>(19);
-  jfloat f10 = bit_cast<jint, jfloat>(20);
+  jfloat f1 = bit_cast<jfloat, jint>(11);
+  jfloat f2 = bit_cast<jfloat, jint>(12);
+  jfloat f3 = bit_cast<jfloat, jint>(13);
+  jfloat f4 = bit_cast<jfloat, jint>(14);
+  jfloat f5 = bit_cast<jfloat, jint>(15);
+  jfloat f6 = bit_cast<jfloat, jint>(16);
+  jfloat f7 = bit_cast<jfloat, jint>(17);
+  jfloat f8 = bit_cast<jfloat, jint>(18);
+  jfloat f9 = bit_cast<jfloat, jint>(19);
+  jfloat f10 = bit_cast<jfloat, jint>(20);
 
   env_->CallStaticVoidMethod(jklass_, jmethod_, i1, f1, i2, f2, i3, f3, i4, f4, i5, f5, i6, f6, i7,
                              f7, i8, f8, i9, f9, i10, f10);
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 561dcb7..787a170 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -628,7 +628,7 @@
               ++i, DexRegisterLocation::Kind::kConstant, High32Bits(value));
           DCHECK_LT(i, environment_size);
         } else if (current->IsDoubleConstant()) {
-          int64_t value = bit_cast<double, int64_t>(current->AsDoubleConstant()->GetValue());
+          int64_t value = bit_cast<int64_t, double>(current->AsDoubleConstant()->GetValue());
           stack_map_stream_.AddDexRegisterEntry(
               i, DexRegisterLocation::Kind::kConstant, Low32Bits(value));
           stack_map_stream_.AddDexRegisterEntry(
@@ -641,7 +641,7 @@
           stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0);
         } else {
           DCHECK(current->IsFloatConstant()) << current->DebugName();
-          int32_t value = bit_cast<float, int32_t>(current->AsFloatConstant()->GetValue());
+          int32_t value = bit_cast<int32_t, float>(current->AsFloatConstant()->GetValue());
           stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, value);
         }
         break;
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ecaa6f0..07ca6b1 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -271,7 +271,7 @@
       return 0;
     } else {
       DCHECK(constant->IsFloatConstant());
-      return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
+      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
     }
   }
 
@@ -281,12 +281,12 @@
     } else if (constant->IsNullConstant()) {
       return 0;
     } else if (constant->IsFloatConstant()) {
-      return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
+      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
     } else if (constant->IsLongConstant()) {
       return constant->AsLongConstant()->GetValue();
     } else {
       DCHECK(constant->IsDoubleConstant());
-      return bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+      return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
     }
   }
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 02b9b32..e808a12 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -680,7 +680,7 @@
         value = constant->AsLongConstant()->GetValue();
       } else {
         DCHECK(constant->IsDoubleConstant());
-        value = bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+        value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
       }
       __ movl(Address(ESP, destination.GetStackIndex()), Immediate(Low32Bits(value)));
       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value)));
@@ -3665,7 +3665,7 @@
       }
     } else if (constant->IsFloatConstant()) {
       float fp_value = constant->AsFloatConstant()->GetValue();
-      int32_t value = bit_cast<float, int32_t>(fp_value);
+      int32_t value = bit_cast<int32_t, float>(fp_value);
       Immediate imm(value);
       if (destination.IsFpuRegister()) {
         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
@@ -3699,7 +3699,7 @@
     } else {
       DCHECK(constant->IsDoubleConstant());
       double dbl_value = constant->AsDoubleConstant()->GetValue();
-      int64_t value = bit_cast<double, int64_t>(dbl_value);
+      int64_t value = bit_cast<int64_t, double>(dbl_value);
       int32_t low_value = Low32Bits(value);
       int32_t high_value = High32Bits(value);
       Immediate low(low_value);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index d09c8f8..b12f57e 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -625,7 +625,7 @@
       HConstant* constant = source.GetConstant();
       int64_t value = constant->AsLongConstant()->GetValue();
       if (constant->IsDoubleConstant()) {
-        value = bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+        value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
       } else {
         DCHECK(constant->IsLongConstant());
         value = constant->AsLongConstant()->GetValue();
@@ -3344,7 +3344,7 @@
       }
     } else if (constant->IsFloatConstant()) {
       float fp_value = constant->AsFloatConstant()->GetValue();
-      int32_t value = bit_cast<float, int32_t>(fp_value);
+      int32_t value = bit_cast<int32_t, float>(fp_value);
       Immediate imm(value);
       if (destination.IsFpuRegister()) {
         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
@@ -3362,7 +3362,7 @@
     } else {
       DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
       double fp_value =  constant->AsDoubleConstant()->GetValue();
-      int64_t value = bit_cast<double, int64_t>(fp_value);
+      int64_t value = bit_cast<int64_t, double>(fp_value);
       Immediate imm(value);
       if (destination.IsFpuRegister()) {
         XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 07ff8ba..df847aa 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1875,20 +1875,22 @@
   float GetValue() const { return value_; }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return bit_cast<float, int32_t>(other->AsFloatConstant()->value_) ==
-        bit_cast<float, int32_t>(value_);
+    return bit_cast<uint32_t, float>(other->AsFloatConstant()->value_) ==
+        bit_cast<uint32_t, float>(value_);
   }
 
   size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
 
   bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint32_t>(AsFloatConstant()->GetValue()) == bit_cast<uint32_t>((-1.0f));
+    return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) ==
+        bit_cast<uint32_t, float>((-1.0f));
   }
   bool IsZero() const OVERRIDE {
     return AsFloatConstant()->GetValue() == 0.0f;
   }
   bool IsOne() const OVERRIDE {
-    return bit_cast<uint32_t>(AsFloatConstant()->GetValue()) == bit_cast<uint32_t>(1.0f);
+    return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) ==
+        bit_cast<uint32_t, float>(1.0f);
   }
 
   DECLARE_INSTRUCTION(FloatConstant);
@@ -1906,20 +1908,22 @@
   double GetValue() const { return value_; }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return bit_cast<double, int64_t>(other->AsDoubleConstant()->value_) ==
-        bit_cast<double, int64_t>(value_);
+    return bit_cast<uint64_t, double>(other->AsDoubleConstant()->value_) ==
+        bit_cast<uint64_t, double>(value_);
   }
 
   size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
 
   bool IsMinusOne() const OVERRIDE {
-    return bit_cast<uint64_t>(AsDoubleConstant()->GetValue()) == bit_cast<uint64_t>((-1.0));
+    return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) ==
+        bit_cast<uint64_t, double>((-1.0));
   }
   bool IsZero() const OVERRIDE {
     return AsDoubleConstant()->GetValue() == 0.0;
   }
   bool IsOne() const OVERRIDE {
-    return bit_cast<uint64_t>(AsDoubleConstant()->GetValue()) == bit_cast<uint64_t>(1.0);
+    return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) ==
+        bit_cast<uint64_t, double>(1.0);
   }
 
   DECLARE_INSTRUCTION(DoubleConstant);
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index ba11e90..ae6bf16 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -359,12 +359,12 @@
   if (result == nullptr) {
     HGraph* graph = constant->GetBlock()->GetGraph();
     ArenaAllocator* allocator = graph->GetArena();
-    result = new (allocator) HFloatConstant(bit_cast<int32_t, float>(constant->GetValue()));
+    result = new (allocator) HFloatConstant(bit_cast<float, int32_t>(constant->GetValue()));
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
-    DCHECK_EQ((bit_cast<float, int32_t>(result->GetValue())), constant->GetValue());
+    DCHECK_EQ((bit_cast<int32_t, float>(result->GetValue())), constant->GetValue());
   }
   return result;
 }
@@ -381,12 +381,12 @@
   if (result == nullptr) {
     HGraph* graph = constant->GetBlock()->GetGraph();
     ArenaAllocator* allocator = graph->GetArena();
-    result = new (allocator) HDoubleConstant(bit_cast<int64_t, double>(constant->GetValue()));
+    result = new (allocator) HDoubleConstant(bit_cast<double, int64_t>(constant->GetValue()));
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
-    DCHECK_EQ((bit_cast<double, int64_t>(result->GetValue())), constant->GetValue());
+    DCHECK_EQ((bit_cast<int64_t, double>(result->GetValue())), constant->GetValue());
   }
   return result;
 }
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 5eb97d8..8351e22 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -1184,7 +1184,7 @@
       gpr_index_--;
       if (kMultiGPRegistersWidened) {
         DCHECK_EQ(sizeof(uintptr_t), sizeof(int64_t));
-        PushGpr(static_cast<int64_t>(bit_cast<uint32_t, int32_t>(val)));
+        PushGpr(static_cast<int64_t>(bit_cast<int32_t, uint32_t>(val)));
       } else {
         PushGpr(val);
       }
@@ -1192,7 +1192,7 @@
       stack_entries_++;
       if (kMultiGPRegistersWidened) {
         DCHECK_EQ(sizeof(uintptr_t), sizeof(int64_t));
-        PushStack(static_cast<int64_t>(bit_cast<uint32_t, int32_t>(val)));
+        PushStack(static_cast<int64_t>(bit_cast<int32_t, uint32_t>(val)));
       } else {
         PushStack(val);
       }
@@ -1252,16 +1252,16 @@
 
   void AdvanceFloat(float val) {
     if (kNativeSoftFloatAbi) {
-      AdvanceInt(bit_cast<float, uint32_t>(val));
+      AdvanceInt(bit_cast<uint32_t, float>(val));
     } else {
       if (HaveFloatFpr()) {
         fpr_index_--;
         if (kRegistersNeededForDouble == 1) {
           if (kMultiFPRegistersWidened) {
-            PushFpr8(bit_cast<double, uint64_t>(val));
+            PushFpr8(bit_cast<uint64_t, double>(val));
           } else {
             // No widening, just use the bits.
-            PushFpr8(bit_cast<float, uint64_t>(val));
+            PushFpr8(static_cast<uint64_t>(bit_cast<uint32_t, float>(val)));
           }
         } else {
           PushFpr4(val);
@@ -1272,9 +1272,9 @@
           // Need to widen before storing: Note the "double" in the template instantiation.
           // Note: We need to jump through those hoops to make the compiler happy.
           DCHECK_EQ(sizeof(uintptr_t), sizeof(uint64_t));
-          PushStack(static_cast<uintptr_t>(bit_cast<double, uint64_t>(val)));
+          PushStack(static_cast<uintptr_t>(bit_cast<uint64_t, double>(val)));
         } else {
-          PushStack(bit_cast<float, uintptr_t>(val));
+          PushStack(static_cast<uintptr_t>(bit_cast<uint32_t, float>(val)));
         }
         fpr_index_ = 0;
       }
@@ -1908,8 +1908,8 @@
       case 'F': {
         if (kRuntimeISA == kX86) {
           // Convert back the result to float.
-          double d = bit_cast<uint64_t, double>(result_f);
-          return bit_cast<float, uint32_t>(static_cast<float>(d));
+          double d = bit_cast<double, uint64_t>(result_f);
+          return bit_cast<uint32_t, float>(static_cast<float>(d));
         } else {
           return result_f;
         }
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index fbbc863..98dfdbd 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -460,7 +460,7 @@
 static void UnstartedDoubleDoubleToRawLongBits(
     Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
   double in = shadow_frame->GetVRegDouble(arg_offset);
-  result->SetJ(bit_cast<int64_t>(in));
+  result->SetJ(bit_cast<int64_t, double>(in));
 }
 
 static mirror::Object* GetDexFromDexCache(Thread* self, mirror::DexCache* dex_cache)
diff --git a/runtime/memory_region.h b/runtime/memory_region.h
index 939a1a9..f867f6a 100644
--- a/runtime/memory_region.h
+++ b/runtime/memory_region.h
@@ -19,6 +19,7 @@
 
 #include <stdint.h>
 
+#include "base/casts.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/value_object.h"
@@ -60,23 +61,6 @@
     *ComputeInternalPointer<T>(offset) = value;
   }
 
-  // TODO: Local hack to prevent name clashes between two conflicting
-  // implementations of bit_cast:
-  // - art::bit_cast<Destination, Source> runtime/base/casts.h, and
-  // - art::bit_cast<Source, Destination> from runtime/utils.h.
-  // Remove this when these routines have been merged.
-  template<typename Source, typename Destination>
-  static Destination local_bit_cast(Source in) {
-    static_assert(sizeof(Source) <= sizeof(Destination),
-                  "Size of Source not <= size of Destination");
-    union {
-      Source u;
-      Destination v;
-    } tmp;
-    tmp.u = in;
-    return tmp.v;
-  }
-
   // Load value of type `T` at `offset`.  The memory address corresponding
   // to `offset` does not need to be word-aligned.
   template<typename T> T LoadUnaligned(uintptr_t offset) const {
@@ -88,7 +72,7 @@
       equivalent_unsigned_integer_value +=
           *ComputeInternalPointer<uint8_t>(offset + i) << (i * kBitsPerByte);
     }
-    return local_bit_cast<U, T>(equivalent_unsigned_integer_value);
+    return bit_cast<T, U>(equivalent_unsigned_integer_value);
   }
 
   // Store `value` (of type `T`) at `offset`.  The memory address
@@ -96,7 +80,7 @@
   template<typename T> void StoreUnaligned(uintptr_t offset, T value) const {
     // Equivalent unsigned integer type corresponding to T.
     typedef typename UnsignedIntegerType<sizeof(T)>::type U;
-    U equivalent_unsigned_integer_value = local_bit_cast<T, U>(value);
+    U equivalent_unsigned_integer_value = bit_cast<U, T>(value);
     // Write the value byte by byte in a little-endian fashion.
     for (size_t i = 0; i < sizeof(U); ++i) {
       *ComputeInternalPointer<uint8_t>(offset + i) =
diff --git a/runtime/utils.h b/runtime/utils.h
index 9a9f51a..e20412e 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -311,19 +311,6 @@
   return (ch < ' ' || ch > '~');
 }
 
-// Interpret the bit pattern of input (type U) as type V. Requires the size
-// of V >= size of U (compile-time checked).
-template<typename U, typename V>
-static inline V bit_cast(U in) {
-  static_assert(sizeof(U) <= sizeof(V), "Size of U not <= size of V");
-  union {
-    U u;
-    V v;
-  } tmp;
-  tmp.u = in;
-  return tmp.v;
-}
-
 std::string PrintableChar(uint16_t ch);
 
 // Returns an ASCII string corresponding to the given UTF-8 string.
diff --git a/test/454-get-vreg/get_vreg_jni.cc b/test/454-get-vreg/get_vreg_jni.cc
index 937d2fe..6b4bc11 100644
--- a/test/454-get-vreg/get_vreg_jni.cc
+++ b/test/454-get-vreg/get_vreg_jni.cc
@@ -55,7 +55,7 @@
       CHECK_EQ(value, 1u);
 
       CHECK(GetVReg(m, 5, kFloatVReg, &value));
-      uint32_t cast = bit_cast<float, uint32_t>(1.0f);
+      uint32_t cast = bit_cast<uint32_t, float>(1.0f);
       CHECK_EQ(value, cast);
 
       CHECK(GetVReg(m, 6, kIntVReg, &value));
@@ -95,7 +95,7 @@
       CHECK_EQ(value, 0u);
 
       CHECK(GetVRegPair(m, 13, kDoubleLoVReg, kDoubleHiVReg, &value));
-      uint64_t cast = bit_cast<double, uint64_t>(2.0);
+      uint64_t cast = bit_cast<uint64_t, double>(2.0);
       CHECK_EQ(value, cast);
     }
 
diff --git a/test/455-set-vreg/set_vreg_jni.cc b/test/455-set-vreg/set_vreg_jni.cc
index 24d7832..0a83ac0 100644
--- a/test/455-set-vreg/set_vreg_jni.cc
+++ b/test/455-set-vreg/set_vreg_jni.cc
@@ -60,21 +60,21 @@
       CHECK(GetVReg(m, 1, kReferenceVReg, &value));
       CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
 
-      CHECK(SetVReg(m, 2, bit_cast<float, uint32_t>(5.0f), kFloatVReg));
-      CHECK(SetVReg(m, 3, bit_cast<float, uint32_t>(4.0f), kFloatVReg));
-      CHECK(SetVReg(m, 4, bit_cast<float, uint32_t>(3.0f), kFloatVReg));
-      CHECK(SetVReg(m, 5, bit_cast<float, uint32_t>(2.0f), kFloatVReg));
-      CHECK(SetVReg(m, 6, bit_cast<float, uint32_t>(1.0f), kFloatVReg));
+      CHECK(SetVReg(m, 2, bit_cast<uint32_t, float>(5.0f), kFloatVReg));
+      CHECK(SetVReg(m, 3, bit_cast<uint32_t, float>(4.0f), kFloatVReg));
+      CHECK(SetVReg(m, 4, bit_cast<uint32_t, float>(3.0f), kFloatVReg));
+      CHECK(SetVReg(m, 5, bit_cast<uint32_t, float>(2.0f), kFloatVReg));
+      CHECK(SetVReg(m, 6, bit_cast<uint32_t, float>(1.0f), kFloatVReg));
     } else if (m_name.compare("testDoubleVReg") == 0) {
       uint32_t value = 0;
       CHECK(GetVReg(m, 3, kReferenceVReg, &value));
       CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
 
-      CHECK(SetVRegPair(m, 4, bit_cast<double, uint64_t>(5.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 6, bit_cast<double, uint64_t>(4.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 8, bit_cast<double, uint64_t>(3.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 10, bit_cast<double, uint64_t>(2.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 12, bit_cast<double, uint64_t>(1.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 4, bit_cast<uint64_t, double>(5.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 6, bit_cast<uint64_t, double>(4.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 8, bit_cast<uint64_t, double>(3.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 10, bit_cast<uint64_t, double>(2.0), kDoubleLoVReg, kDoubleHiVReg));
+      CHECK(SetVRegPair(m, 12, bit_cast<uint64_t, double>(1.0), kDoubleLoVReg, kDoubleHiVReg));
     }
 
     return true;
diff --git a/test/457-regs/regs_jni.cc b/test/457-regs/regs_jni.cc
index ce701e8..1b32348 100644
--- a/test/457-regs/regs_jni.cc
+++ b/test/457-regs/regs_jni.cc
@@ -69,7 +69,7 @@
       CHECK_EQ(value, 1u);
 
       CHECK(GetVReg(m, 4, kFloatVReg, &value));
-      uint32_t cast = bit_cast<float, uint32_t>(4.0f);
+      uint32_t cast = bit_cast<uint32_t, float>(4.0f);
       CHECK_EQ(value, cast);
       did_check_ = true;
     } else if (m_name.compare("phiEquivalent") == 0) {
@@ -138,7 +138,7 @@
   std::unique_ptr<Context> context(Context::Create());
   CHECK(soa.Decode<mirror::Object*>(main) == nullptr);
   CHECK_EQ(int_value, 0);
-  int32_t cast = bit_cast<float, int32_t>(float_value);
+  int32_t cast = bit_cast<int32_t, float>(float_value);
   CHECK_EQ(cast, 0);
   TestVisitor visitor(soa.Self(), context.get());
   visitor.WalkStack();